mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-17 11:30:13 +02:00
Compare commits
119 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b7824004db | |||
| c9c9a5262c | |||
| 9c5a4054f6 | |||
| 71a2ef4ce7 | |||
| 51f377f03d | |||
| 5ede669a12 | |||
| 8fcb01276c | |||
| 10dc9450be | |||
| bef462cdcf | |||
| 5135b771f5 | |||
| 7151563a41 | |||
| 52a28967a0 | |||
| 96182fe66d | |||
| 174031479c | |||
| f1cd9eb4b9 | |||
| c266c5ff5e | |||
| 52a0968092 | |||
| 89d6bb8fb9 | |||
| d48a0cdace | |||
| df76f6f147 | |||
| 776c89bfcf | |||
| d3006df57a | |||
| e78e4d186d | |||
| d1e1be4016 | |||
| 0afb85e241 | |||
| 039a0f9d0c | |||
| b9b99c1fa8 | |||
| a8fd33a758 | |||
| 7346129d0e | |||
| eb8f39f84e | |||
| 00f9e3f1fd | |||
| ffdfe0426b | |||
| 1583fd5715 | |||
| af9b3d08cc | |||
| b64b9e0962 | |||
| 76f4deb3a7 | |||
| 49d90eaf69 | |||
| 079ff7b737 | |||
| bd81a940ff | |||
| 9a0a9a116a | |||
| 80a01275ff | |||
| 3ac8442e4b | |||
| 5f322b0a79 | |||
| 363b5a49c8 | |||
| a3e5c98cd0 | |||
| 6a098e1c5f | |||
| f08781bdc9 | |||
| c3dd95f6a9 | |||
| 10a8c7b5be | |||
| f03ebbba11 | |||
| a16f22ed34 | |||
| 41e35e4da2 | |||
| be3ab5823a | |||
| ef52bd03d2 | |||
| 017f383096 | |||
| 41799f9891 | |||
| a1af9c3595 | |||
| c8a8fc56f8 | |||
| e6aba86ce1 | |||
| d5609ac02f | |||
| 1d7fa5185a | |||
| fb97042c01 | |||
| 2616a6c9e3 | |||
| a930497e14 | |||
| 2dc1fcc778 | |||
| 896d1ae938 | |||
| 8dfa6a7199 | |||
| ef6b8ec181 | |||
| dcea325fba | |||
| 03b8053617 | |||
| 20807a2d62 | |||
| 79fbf9741b | |||
| a2f5d62926 | |||
| 5e0b2c037e | |||
| 69ef231e5a | |||
| 7a5f47ca9e | |||
| 5cd49542bf | |||
| f14d4feb6d | |||
| 19a8560a80 | |||
| 0d0e009867 | |||
| febcce9125 | |||
| 31ebcb5cd9 | |||
| b3fca3dc18 | |||
| 401f114e4f | |||
| 79b39e8985 | |||
| c3e38621fc | |||
| 9ef02dd06f | |||
| ba39d3b9aa | |||
| f91ddcf38b | |||
| 49151d8b9f | |||
| 767a2f6c00 | |||
| 2da739c9e8 | |||
| eca7f24e2c | |||
| 7bfaad17f0 | |||
| e3efcfd476 | |||
| 32b8421a1c | |||
| bc70cc3527 | |||
| 44e9b38ac2 | |||
| b01a69c172 | |||
| b041b5e97c | |||
| c54ea7fd9f | |||
| a3aa7b4dec | |||
| 19fb7f0b1e | |||
| 35cd4e4c71 | |||
| 31f79fd8e2 | |||
| fd7d6fa401 | |||
| 49621824b1 | |||
| 76750caa92 | |||
| c3ef9f4b9e | |||
| 5e6bb8511a | |||
| 0fee36e8f7 | |||
| e125467721 | |||
| 2b03b808ac | |||
| 2e14e75a0e | |||
| 084e563412 | |||
| 9ef6213284 | |||
| fb11e0881f | |||
| 7f96151e56 | |||
| d0299fc0a0 |
+52
-2
@@ -10,6 +10,37 @@ OPENSKY_CLIENT_ID=
|
||||
OPENSKY_CLIENT_SECRET=
|
||||
AIS_API_KEY=
|
||||
|
||||
# Global Fishing Watch — fishing vessel activity events (Fishing Activity map layer).
|
||||
# Free API token from https://globalfishingwatch.org/our-apis/tokens
|
||||
# Without this the fishing_activity layer stays empty.
|
||||
# GFW_API_TOKEN=
|
||||
# Optional tuning — GFW can return 40k+ global events; defaults cap fetch for map paint.
|
||||
# GFW_EVENTS_PAGE_SIZE=500
|
||||
# GFW_EVENTS_MAX_PAGES=10
|
||||
# GFW_EVENTS_LOOKBACK_DAYS=7
|
||||
# GFW_EVENTS_TIMEOUT_S=90
|
||||
|
||||
# Windy Webcams global CCTV layer — free key from https://api.windy.com/webcams/docs
|
||||
# WINDY_API_KEY=
|
||||
|
||||
# Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token).
|
||||
# TELEGRAM_OSINT_ENABLED=true
|
||||
# TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor
|
||||
# TELEGRAM_OSINT_TRANSLATE=true
|
||||
# TELEGRAM_OSINT_TRANSLATE_TO=en
|
||||
|
||||
# Strategic Risk Analytics (experimental derived OSINT — off by default)
|
||||
# GT_ANALYTICS_ENABLED=false
|
||||
# GT_ANALYTICS_PROFILE=lean
|
||||
# On 1 vCPU nodes (fleet VPS), leave disabled or set profile=lean. Scheduled ingest
|
||||
# and Louvain clustering stay off until GT_ANALYTICS_ACK_LOW_CPU=true.
|
||||
# GT_ANALYTICS_ACK_LOW_CPU=false
|
||||
# GT_ANALYTICS_BASE_PRIOR=0.15
|
||||
# GT_ANALYTICS_HIGH_RISK_THRESHOLD=0.6
|
||||
# GT_ANALYTICS_SIGNAL_WEIGHTS=payroll_loan=3.0,purge=3.5,troop_movement=3.0
|
||||
# GT_ANALYTICS_WATCHED_CHANNELS=osintdefender,war_monitor,nexta_live
|
||||
# GT_ANALYTICS_LOUVAIN_INTERVAL_MINUTES=30
|
||||
|
||||
# Admin key to protect sensitive endpoints (settings, updates).
|
||||
# If blank, loopback/localhost requests still work for local single-host dev.
|
||||
# Remote/non-loopback admin access requires ADMIN_KEY, or ALLOW_INSECURE_ADMIN=true in debug-only setups.
|
||||
@@ -39,8 +70,8 @@ ADMIN_KEY=
|
||||
# NUFORC_MAPBOX_TOKEN=
|
||||
|
||||
# Optional startup-risk controls.
|
||||
# On Windows, external curl fallback and the Playwright LiveUAMap scraper are
|
||||
# disabled by default so blocked upstream feeds cannot interrupt start.bat.
|
||||
# On Windows, external curl fallback is off by default. LiveUAMap uses UI consent
|
||||
# when you enable Global Incidents (or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true).
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=false
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false
|
||||
# AIS starts by default when AIS_API_KEY is set. Set to 0/false to force-disable.
|
||||
@@ -77,6 +108,19 @@ ADMIN_KEY=
|
||||
# pip install earthengine-api
|
||||
# GEE_SERVICE_ACCOUNT_KEY=
|
||||
|
||||
# Copernicus CDSE — Sentinel-2 imagery (Settings → Imagery, or backend .env).
|
||||
# Free OAuth app at https://dataspace.copernicus.eu/
|
||||
# SENTINEL_CLIENT_ID=
|
||||
# SENTINEL_CLIENT_SECRET=
|
||||
|
||||
# Sentinel-2 road corridor freight trends (DrishX engine port — opt-in slow layer).
|
||||
# pip install -e backend[road-corridor] (or uv sync --extra road-corridor)
|
||||
# ROAD_CORRIDOR_SAT_ENABLED=false
|
||||
# ROAD_CORRIDOR_SCHEDULED_PRESETS=laredo_i35
|
||||
# ROAD_CORRIDOR_MONTHS=2
|
||||
# ROAD_CORRIDOR_MAX_FRAMES=6
|
||||
# ROAD_CORRIDOR_REFRESH_HOURS=24
|
||||
|
||||
# Override the backend URL the frontend uses (leave blank for auto-detect)
|
||||
# NEXT_PUBLIC_API_URL=http://192.168.1.50:8000
|
||||
|
||||
@@ -128,8 +172,14 @@ ADMIN_KEY=
|
||||
# MESH_DM_ROOT_TRANSPARENCY_LEDGER_READBACK_URI=backend/../ops/root_transparency_ledger.json
|
||||
|
||||
# ── Self Update ────────────────────────────────────────────────
|
||||
# Optional ZIP updater digest pin. The updater checks this first, then
|
||||
# backend/data/release_digests.json, then the release SHA256SUMS.txt asset.
|
||||
# MESH_UPDATE_SHA256=
|
||||
|
||||
# Optional strict nonce-only frontend CSP. Leave unset unless the exact build
|
||||
# has been verified to hydrate cleanly in your deployment.
|
||||
# SHADOWBROKER_STRICT_CSP=1
|
||||
|
||||
# ── Wormhole (Local Agent) ─────────────────────────────────────
|
||||
# WORMHOLE_URL=http://127.0.0.1:8787
|
||||
# WORMHOLE_TRANSPORT=direct
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
## Summary
|
||||
|
||||
<!-- What changed and why (1–3 bullets). -->
|
||||
|
||||
## Test plan
|
||||
|
||||
- [ ] <!-- How you verified the change -->
|
||||
|
||||
## Production hardening (data path / fetchers / unattended deploys only)
|
||||
|
||||
If this PR touches the data path, fetchers, or live-data APIs, walk through [docs/production-hardening.md](https://github.com/BigBodyCobain/Shadowbroker/blob/main/docs/production-hardening.md) and note any N/A items here.
|
||||
|
||||
- [ ] Checklist reviewed (or N/A — explain why)
|
||||
@@ -7,6 +7,28 @@ on:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
# CI flake mitigation:
|
||||
# ci.yml is triggered TWICE per PR on the same commit — once directly via
|
||||
# the `pull_request` trigger above ("Frontend Tests & Build" check) and once
|
||||
# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests &
|
||||
# Build" check). Both jobs land on the same Actions runner pool at the same
|
||||
# time and fight for CPU/RAM. Under contention, React's reconciliation in
|
||||
# `messagesViewFirstContact.test.tsx > removes an approved contact …`
|
||||
# overruns its 5s waitFor timeout — that's the single failure mode we've
|
||||
# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the
|
||||
# fd7d6fa push. Backend tests and every other frontend test pass under
|
||||
# the same conditions, which is what made this look random.
|
||||
#
|
||||
# Pinning a concurrency group on the SHA (PR head, or the pushed commit
|
||||
# for main) serializes the two invocations so neither starves the other.
|
||||
# We use cancel-in-progress: false so the second one queues instead of
|
||||
# cancelling — cancelling could leave the PR check stuck "Expected" if
|
||||
# only one of the two ever finishes. Total CI time grows by ~2 min in
|
||||
# exchange for deterministic outcomes.
|
||||
concurrency:
|
||||
group: ci-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
frontend:
|
||||
name: Frontend Tests & Build
|
||||
|
||||
+49
@@ -101,6 +101,17 @@ backend/data/*
|
||||
# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream
|
||||
# Let's Encrypt renewal failures without disabling TLS validation entirely.
|
||||
!backend/data/aisstream_spki_pins.json
|
||||
# Issue #231: pinned SHA-256 digests for known release archives. Used by
|
||||
# the self-updater as a second-line integrity check when the release's
|
||||
# SHA256SUMS.txt asset can't be fetched.
|
||||
!backend/data/release_digests.json
|
||||
# Issue #244/#245/#246: one-shot carrier-position seed shipped with each
|
||||
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||
# after that the cache reflects this install's own GDELT observations.
|
||||
!backend/data/carrier_seed.json
|
||||
# DrishX RF model weights (MIT — see backend/third_party/drishx/NOTICE.md)
|
||||
!backend/data/drishx/
|
||||
!backend/data/drishx/rf_model.pickle
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
@@ -166,6 +177,8 @@ frontend/eslint-report.json
|
||||
.git_backup/
|
||||
local-artifacts/
|
||||
release-secrets/
|
||||
release-staging/
|
||||
.tmp-release-inspect/
|
||||
shadowbroker_repo/
|
||||
frontend/src/components.bak/
|
||||
frontend/src/components/map/icons/backups/
|
||||
@@ -190,6 +203,8 @@ graphify-out/
|
||||
# Internal docs & brainstorming (never commit)
|
||||
# ========================
|
||||
docs/*
|
||||
!docs/OUTBOUND_DATA.md
|
||||
!docs/production-hardening.md
|
||||
!docs/mesh/
|
||||
docs/mesh/*
|
||||
!docs/mesh/threat-model.md
|
||||
@@ -248,8 +263,42 @@ frontend/.desktop-export-stash-*/
|
||||
backend/data/wormhole_stderr.log
|
||||
backend/data/wormhole_stdout.log
|
||||
|
||||
# Hermes Agent (operator-local runtime install — not project source)
|
||||
.hermes/
|
||||
**/.hermes/
|
||||
hermes-agent/
|
||||
|
||||
# Runtime caches that already slip through the backend/data/* blanket
|
||||
# (these are caught by the wildcard but listing for clarity)
|
||||
|
||||
# Compressed snapshot archives (can be 100 MB+)
|
||||
*.json.gz
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# AI assistant / coding-agent scratch
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-tool config + scratch directories. These are private to whichever
|
||||
# coding agent the operator happens to be using and have no business in
|
||||
# the repo. If a tool's instructions need to be canonical for the project,
|
||||
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||
# repo root.
|
||||
|
||||
# OpenAI Codex CLI
|
||||
.codex/
|
||||
.codex-app-schema/
|
||||
.codex-app-ts/
|
||||
|
||||
# Per-agent instruction files dropped at repo root by various tools.
|
||||
# These are operator-side preferences, not part of the project contract.
|
||||
AGENTS.md
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
.github/copilot-instructions.md
|
||||
|
||||
# Stale AI-generated test file that referenced fields that don't exist in
|
||||
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||
# ignored so it doesn't accidentally get committed if it shows up again
|
||||
# from a tool that's working off an out-of-date understanding of the
|
||||
# module. If a real test for that function is needed, write it under a
|
||||
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||
backend/tests/test_carrier_tracker_region_centers.py
|
||||
|
||||
+42
-12
@@ -13,13 +13,22 @@
|
||||
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
||||
# on GitLab) so the two sources stay in sync.
|
||||
#
|
||||
# Pipelines on this repo were instant-failing for free-tier accounts until
|
||||
# identity verification was added — the May 2026 bump in this comment is
|
||||
# the marker commit that confirms runner allocation after verification.
|
||||
#
|
||||
# Auth notes:
|
||||
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
||||
# automatically. No credentials need to be configured.
|
||||
# - The reverse mirror requires a GitHub personal access token stored
|
||||
# as the GitLab CI/CD variable GITHUB_MIRROR_TOKEN (Protected + Masked).
|
||||
# Scope: public_repo (or repo for private). If the variable isn't
|
||||
# set the mirror job is skipped — image builds still run.
|
||||
# - The reverse mirror authenticates to GitHub via a per-repo SSH
|
||||
# deploy key. The private half is stored as the File-type GitLab
|
||||
# CI/CD variable GITHUB_MIRROR_SSH_KEY (Protected). The matching
|
||||
# public key is added to github.com/BigBodyCobain/Shadowbroker/
|
||||
# settings/keys with write access. This is a tighter-scoped
|
||||
# replacement for a personal access token: it can ONLY push to
|
||||
# Shadowbroker, never expires, and rotating it is a one-click
|
||||
# delete on GitHub's deploy-keys page. If the variable isn't set,
|
||||
# the mirror job is skipped — image builds still run.
|
||||
|
||||
stages:
|
||||
- build
|
||||
@@ -48,7 +57,11 @@ variables:
|
||||
- docker info
|
||||
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
||||
- docker run --privileged --rm tonistiigi/binfmt --install all
|
||||
- docker buildx create --use --name multiarch --driver docker-container
|
||||
# buildx --driver docker-container can't read TLS from the env vars
|
||||
# the GitLab dind service exports. Wrap them in a docker context and
|
||||
# bind buildx to it. See https://docs.gitlab.com/ee/ci/docker/using_docker_build.html#use-docker-buildx
|
||||
- docker context create tls-env
|
||||
- docker buildx create --use --name multiarch --driver docker-container tls-env
|
||||
|
||||
# ── Backend image ────────────────────────────────────────────────────────
|
||||
build-backend:
|
||||
@@ -93,18 +106,35 @@ build-frontend:
|
||||
- .gitlab-ci.yml
|
||||
|
||||
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker.
|
||||
# Fast-forward-only — if GitLab main and GitHub main have diverged, this
|
||||
# fails loudly rather than silently overwriting either side.
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker via SSH
|
||||
# using a per-repo deploy key. Fast-forward-only by default — if GitLab
|
||||
# main and GitHub main have diverged, the push fails loudly rather than
|
||||
# silently overwriting either side.
|
||||
#
|
||||
# Only runs if GITHUB_MIRROR_TOKEN is set as a CI/CD variable. See the
|
||||
# header comment of this file for setup instructions.
|
||||
# Only runs if GITHUB_MIRROR_SSH_KEY is set as a File-type CI/CD variable.
|
||||
# See the header comment of this file for setup instructions.
|
||||
mirror-to-github:
|
||||
stage: mirror
|
||||
image: alpine:3.20
|
||||
needs: []
|
||||
before_script:
|
||||
- apk add --no-cache git openssh-client ca-certificates
|
||||
- mkdir -p ~/.ssh
|
||||
- chmod 700 ~/.ssh
|
||||
# Install the deploy key. File-type CI variable exposes the path; copy
|
||||
# to ~/.ssh/id_ed25519 with restrictive perms so ssh accepts it.
|
||||
- cp "$GITHUB_MIRROR_SSH_KEY" ~/.ssh/id_ed25519
|
||||
- chmod 600 ~/.ssh/id_ed25519
|
||||
# Pin github.com's current host keys so we never trust a man-in-the-
|
||||
# middle. Sourced from https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints
|
||||
# (rotated 2023-03-24 after the previous RSA key leak).
|
||||
- |
|
||||
cat > ~/.ssh/known_hosts <<'EOF'
|
||||
github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
|
||||
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
|
||||
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=
|
||||
EOF
|
||||
- chmod 644 ~/.ssh/known_hosts
|
||||
script:
|
||||
- git config --global user.email "ci-mirror@gitlab.com"
|
||||
- git config --global user.name "GitLab CI Mirror"
|
||||
@@ -115,7 +145,7 @@ mirror-to-github:
|
||||
- cd repo
|
||||
- >
|
||||
git push
|
||||
"https://x-access-token:${GITHUB_MIRROR_TOKEN}@github.com/BigBodyCobain/Shadowbroker.git"
|
||||
"git@github.com:BigBodyCobain/Shadowbroker.git"
|
||||
"${CI_COMMIT_SHA}:refs/heads/main"
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_TOKEN
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_SSH_KEY
|
||||
|
||||
+2
-1
@@ -44,7 +44,8 @@ These sources have their own terms; consult each link before redistributing.
|
||||
| aisstream.io | https://aisstream.io | Free-tier API terms (attribution required) | AIS vessel positions |
|
||||
| Global Fishing Watch | https://globalfishingwatch.org | CC BY 4.0 (for public data) | Fishing activity events |
|
||||
| Microsoft Planetary Computer | https://planetarycomputer.microsoft.com | Sentinel-2 / ESA Copernicus terms | Sentinel-2 imagery |
|
||||
| Copernicus CDSE (Sentinel Hub) | https://dataspace.copernicus.eu | ESA Copernicus open data terms | SAR + optical imagery |
|
||||
| Copernicus CDSE (Sentinel Hub) | https://dataspace.copernicus.eu | ESA Copernicus open data terms | SAR + optical imagery, optional road-corridor truck trends |
|
||||
| DrishX / Fisser et al. 2022 | https://github.com/sparkyniner/DRISH-X-Satellite-powered-freight-intelligence- | MIT (engine); research methodology attribution | Sentinel-2 motion-smear truck detection on major roads (opt-in) |
|
||||
| Shodan | https://www.shodan.io | Operator-supplied API key, Shodan ToS | Internet device search |
|
||||
| Smithsonian GVP | https://volcano.si.edu | Attribution required | Volcanoes |
|
||||
| OpenAQ | https://openaq.org | CC BY 4.0 | Air quality stations |
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
**ShadowBroker** is a decentralized intelligence platform that aggregates real-time, multi-domain OSINT telemetry from 60+ live intelligence feeds into a single dark-ops map interface. Aircraft, ships, satellites, conflict zones, CCTV networks, GPS jamming, internet-connected devices, police scanners, mesh radio nodes, and breaking geopolitical events — all updating in real time on one screen as well as an obfuscated communications protocol and information exchange infrastructure.
|
||||
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers, including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 40+ toggleable data layers, including SAR ground-change detection, **Telegram OSINT** (public channel previews geoparsed onto the map), a **server-side recon toolkit** (DNS, WHOIS, sanctions, BGP, IP sweep, and more), supply-chain risk overlays, and malware/C2 + CISA KEV cyber threat feeds. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, entity-graph expansion, and the latest Sentinel-2 satellite photo. ShadowBroker has no accounts, product telemetry, or analytics; the dashboard talks to your self-hosted backend. Sensitive recon and Shodan queries never hit third-party APIs from the browser — they are proxied through the backend with SSRF guards and local-operator auth. The **OpenClaw / agent command channel** exposes the same recon backends plus full telemetry search — no separate API integration required.
|
||||
|
||||
Designed for analysts, researchers, radio operators, and anyone who wants to see what the world looks like when every public signal is on the same map.
|
||||
|
||||
@@ -28,18 +28,20 @@ Designed for analysts, researchers, radio operators, and anyone who wants to see
|
||||
|
||||
A surprising amount of global telemetry is already public — aircraft ADS-B broadcasts, maritime AIS signals, satellite orbital data, earthquake sensors, mesh radio networks, police scanner feeds, environmental monitoring stations, internet infrastructure telemetry, and more. This data is scattered across dozens of tools and APIs. ShadowBroker combines all of it into a single interface.
|
||||
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. No user data is collected or transmitted — everything runs locally against a self-hosted backend. No telemetry, no analytics, no accounts.
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. ShadowBroker does not include product telemetry, analytics, or accounts. Operator-supplied keys stay in your local deployment, but live OSINT features necessarily make outbound requests to the public data providers you enable or query.
|
||||
|
||||
### Shodan Connector
|
||||
### Shodan & Recon (security-first)
|
||||
|
||||
ShadowBroker includes an optional Shodan connector for operator-supplied API access. Shodan results are fetched with your own `SHODAN_API_KEY`, rendered as a local investigative overlay (not merged into core feeds), and remain subject to Shodan’s terms of service.
|
||||
ShadowBroker includes an optional **Shodan connector** for operator-supplied API access (`SHODAN_API_KEY`) and a **Recon Toolkit** panel for keyless OSINT lookups. Both run **server-side only**: the browser calls your self-hosted `/api/osint/*` and `/api/tools/shodan/*` routes; outbound requests are made by the backend after SSRF validation. Recon requires **local-operator** access (same trust model as layer toggles and admin routes). Shodan results render as a separate map overlay and remain subject to Shodan’s terms of service.
|
||||
|
||||
> **Not included:** embedded live-news YouTube grids or a built-in Gemini AI analyst panel — use the **OpenClaw / agent channel** for AI-assisted analysis instead.
|
||||
|
||||
---
|
||||
|
||||
## Interesting Use Cases
|
||||
|
||||
* **Track Air Force One**, the private jets of billionaires and dictators, and every military tanker, ISR, and fighter broadcasting ADS-B. Air Force One and all of the accompanying Presidential/Vice Presidential planes are highlighted and monitored from the moment they leave the ground.
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 35+ data layers, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 40+ data layers, compact cross-layer search (`search_telemetry`, `search_news`), the full recon toolkit (`osint_lookup` for IP/DNS/WHOIS/sanctions/CVE/etc.), entity-graph expansion, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Communicate on the InfoNet testnet** — The first decentralized intelligence mesh built into an OSINT tool. Obfuscated messaging with gate personas, Dead Drop peer-to-peer exchange, and a built-in terminal CLI. No accounts, no signup. Privacy is not guaranteed yet — this is an experimental testnet — but the protocol is live and being hardened.
|
||||
* **Right-click anywhere on Earth** for a country dossier (head of state, population, languages), Wikipedia summary, and the latest Sentinel-2 satellite photo at 10m resolution
|
||||
* **Click a KiwiSDR node** and tune into live shortwave radio directly in the dashboard. Click a police scanner feed and eavesdrop in one click.
|
||||
@@ -55,6 +57,12 @@ ShadowBroker includes an optional Shodan connector for operator-supplied API acc
|
||||
* **Track trains** across the US (Amtrak) and Europe (DigiTraffic) in real time
|
||||
* **Estimate where US aircraft carriers are** using automated GDELT news scraping — no other open tool does this
|
||||
* **Search internet-connected devices worldwide** via Shodan — cameras, SCADA systems, databases — plotted as a live overlay on the map
|
||||
* **Run a full recon toolkit** from the left sidebar — IP geolocation, DNS, RDAP/WHOIS, certificate transparency, BGP/ASN, OFAC sanctions search, CVE lookup, Tor/OTX threat checks, and subnet sweeps (InternetDB proxied server-side)
|
||||
* **Expand an entity graph** when you select an aircraft, vessel, company, or IP — Wikidata + OFAC + live store cross-links rendered in the Entity Graph panel
|
||||
* **Monitor supply-chain risk** — Tier 1/2 semiconductor and battery fabs scored against nearby earthquakes, wildfires, and conflict events (SCM panel)
|
||||
* **Toggle malware C2 hotspots** — abuse.ch Feodo Tracker + URLhaus feeds mapped by country (opt-in layer)
|
||||
* **Monitor Telegram OSINT channels** — public `t.me/s` war/conflict feeds (OSINTdefender, NEXTA, etc.) scraped hourly, risk-scored, geoparsed to metro anchors, and plotted as clickable map pins with inline media
|
||||
* **Overlay global submarine cables** — static TeleGeography-derived cable routes (opt-in layer)
|
||||
|
||||
|
||||
---
|
||||
@@ -83,6 +91,8 @@ Both paths produce identical containers — same source, same CI, same images by
|
||||
|
||||
Open `http://localhost:3000` to view the dashboard! *(Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/) or Docker Engine)*
|
||||
|
||||
> **Join the private InfoNet swarm (sb-testnet-0):** Click **NODE** in the dashboard, or run `./meshnode.sh` for a headless participant. No manual peer list — fleet defaults discover the seed and pull the signed manifest automatically. Set `MESH_INFONET_FLEET_JOIN=false` in `.env` for a private solo node.
|
||||
|
||||
> **Backend port already in use?** The browser only needs port `3000`, but the backend API is also published on host port `8000` for local diagnostics. If another app already uses `8000`, create or edit `.env` next to `docker-compose.yml` and set `BACKEND_PORT=8001`, then run `docker compose up -d`.
|
||||
|
||||
> **Blank news/UAP/bases/wastewater after several minutes?** Check for backend OOM restarts with `docker events --since 30m --filter container=shadowbroker-backend --filter event=oom`. The default compose file gives the backend 4GB; if your host has less memory, reduce enabled feeds or set `BACKEND_MEMORY_LIMIT=3G` and expect slower/heavier layers to warm more gradually.
|
||||
@@ -113,6 +123,20 @@ That's it. `pull` grabs the latest images, `up -d` restarts the containers.
|
||||
>
|
||||
> Podman users should run the equivalent provider command, for example `podman-compose pull` and `podman-compose up -d`, or use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d` from a bash-compatible shell.
|
||||
|
||||
### Update Integrity
|
||||
|
||||
Docker updates are delivered through signed container registries. The legacy ZIP self-updater verifies release archives through this chain, in order:
|
||||
|
||||
* `MESH_UPDATE_SHA256` when an operator pins a digest explicitly.
|
||||
* `backend/data/release_digests.json` for bundled release pins.
|
||||
* The release `SHA256SUMS.txt` asset on GitHub when a bundled pin is not present.
|
||||
|
||||
Release maintainers should run `python backend/scripts/release_helper.py hash <ShadowBroker_vX.Y.Z.zip>` before publishing, then publish `SHA256SUMS.txt` and update `backend/data/release_digests.json` when shipping a ZIP updater target. The updater keeps the operator override path intact instead of failing closed on missing bundled digests, so existing installs do not get stranded by a release-process mistake.
|
||||
|
||||
### CSP Hardening
|
||||
|
||||
The production frontend ships with a hydration-compatible CSP and a strict nonce-only CSP in `Content-Security-Policy-Report-Only`. Set `SHADOWBROKER_STRICT_CSP=1` only after verifying the exact build hydrates correctly in your deployment. Runtime Google Fonts are not required; the bundled Next font pipeline serves the dashboard font from the app build.
|
||||
|
||||
### ⚠️ **Stuck on the old version?**
|
||||
|
||||
**If `git pull` fails or `docker compose up` keeps building from source instead of pulling images**, your clone predates a March 2026 repository migration that rewrote commit history. A normal `git pull` cannot fix this. Run:
|
||||
@@ -174,7 +198,7 @@ ShadowBroker v0.9.7 ships **InfoNet** (decentralized intelligence mesh + Soverei
|
||||
| Channel | Privacy Status | Details |
|
||||
|---|---|---|
|
||||
| **Meshtastic / APRS** | **PUBLIC** | RF radio transmissions are public and interceptable by design. |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden. |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden despite being designed through Tor and Reticulum (Work in progress). |
|
||||
| **Dead Drop DMs** | **STRONGEST CURRENT LANE** | Token-based epoch mailbox with SAS word verification. Strongest lane in this build, but not yet confidently private. |
|
||||
| **Sovereign Shell governance** | **PUBLIC LEDGER** | Petitions, votes, upgrade hashes, and dispute stakes are signed events on a public hashchain. Pseudonymous via gate persona, but governance actions are intentionally observable. |
|
||||
| **Privacy primitives (RingCT / stealth / DEX)** | **NOT YET WIRED** | Locked Protocol contracts are in place, but the cryptographic scheme has not been chosen. The privacy-core Rust crate is the integration target for a future sprint. |
|
||||
@@ -199,7 +223,7 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Communication layer (since v0.9.6):**
|
||||
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay using Tor and Reticulum. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **Mesh Chat Panel** — Three-tab interface: **INFONET** (gate chat with obfuscated transport), **MESH** (Meshtastic radio integration), **DEAD DROP** (peer-to-peer message exchange with token-based epoch mailboxes — strongest current lane).
|
||||
* **Gate Persona System** — Pseudonymous identities with Ed25519 signing keys, prekey bundles, SAS word contact verification, and abuse reporting.
|
||||
* **Mesh Terminal** — Built-in CLI: `send`, `dm`, market commands, gate state inspection. Draggable panel, minimizes to the top bar. Type `help` to see all commands.
|
||||
@@ -219,17 +243,34 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Privacy primitive runway (NEW in v0.9.7):**
|
||||
|
||||
* **Function Keys — Anonymous Citizenship Proof** — A citizen proves "I am an Infonet citizen" without revealing their Infonet identity. 5 of 6 pieces shipped: nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, batched settlement. Issuance via blind signatures waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Function Keys — Anonymous Credential Scaffolding** — The plumbing is in place for nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, and batched settlement. Today's challenge-response is an HMAC-based placeholder for integration testing, not a production anonymous or zero-knowledge citizenship proof. True unlinkable issuance still waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Locked Protocol Contracts** — Stable interfaces in `services/infonet/privacy/contracts.py` for ring signatures, stealth addresses, Pedersen commitments, range proofs, and DEX matching. The `privacy-core` Rust crate is the integration target — no caller of the privacy module needs to know which scheme is active.
|
||||
* **Sprint 11+ Path** — When the cryptographic scheme is chosen, primitives wire into the locked Protocols without API churn.
|
||||
|
||||
> **Experimental Testnet — No Privacy Guarantee:** InfoNet messages are obfuscated but NOT end-to-end encrypted. The Mesh network (Meshtastic/APRS) is NOT private — radio transmissions are inherently public. The privacy primitive contracts are scaffolded but not yet wired. Do not send anything sensitive on any channel. Treat all channels as open and public for now.
|
||||
|
||||
### 🔍 Shodan Device Search (NEW in v0.9.6)
|
||||
### 🔍 Recon Toolkit & Shodan (Osiris-derived, security-first)
|
||||
|
||||
* **Internet Device Search** — Query Shodan directly from ShadowBroker. Search by keyword, CVE, port, or service — results plotted as a live overlay on the map
|
||||
Adapted from the [OSIRIS](https://github.com/simplifaisoul/osiris) recon stack (MIT) with ShadowBroker’s proxy model. Attribution: `backend/third_party/osiris/NOTICE.md`.
|
||||
|
||||
**Recon Toolkit** (left sidebar — local operator only):
|
||||
|
||||
* **IP / DNS / WHOIS** — ip-api.com geolocation, Google DNS-over-HTTPS, RDAP registrant data with optional HTTP security header scoring
|
||||
* **Certificates & BGP** — crt.sh subdomain discovery, bgpview.io ASN/prefix lookups
|
||||
* **Threat intel** — AlienVault OTX pulses, Tor exit-node checks, optional per-IP/domain reputation
|
||||
* **Sanctions** — OpenSanctions `us_ofac_sdn` index (CC-BY); cross-checks on WHOIS entities and IP ISP/org strings
|
||||
* **CVE / MAC / GitHub / leaks** — MITRE CVE API, MAC vendor lookup, GitHub profile recon, public breach checks
|
||||
* **IP sweep** — `/api/osint/sweep/scan` geolocates a target /24–/32 and proxies Shodan InternetDB host discovery server-side (browser never contacts InternetDB directly)
|
||||
* **SSRF guard** — Private, loopback, link-local, and metadata hostnames are blocked before any user-supplied fetch
|
||||
|
||||
**Entity graph** — Select any map entity to open the Entity Graph panel (`GET /api/entity/expand`). Resolves aircraft, vessels, companies, persons, IPs, and countries into a node/link graph (Wikidata SPARQL + OFAC + in-memory flight/ship store).
|
||||
|
||||
**OpenClaw / agent access** — The same recon backends are available on the HMAC command channel (no browser local-operator gate): `osint_lookup` (passive IP/DNS/WHOIS/certs/BGP/sanctions/CVE/MAC/GitHub/leaks/threats), `entity_expand` (relationship graph), and `osint_sweep` (active subnet scan — **full** access tier only). Call `osint_tools` to list supported lookup types. Skill package: `openclaw-skills/shadowbroker/` (`SKILL.md` + `sb_query.py`).
|
||||
|
||||
**Shodan overlay** (unchanged):
|
||||
|
||||
* **Internet Device Search** — Query Shodan with your own API key; results plotted as a live overlay
|
||||
* **Configurable Markers** — Shape, color, and size customization for Shodan results
|
||||
* **Operator-Supplied API** — Uses your own `SHODAN_API_KEY`; results rendered as a local investigative overlay
|
||||
|
||||
### 🛩️ Aviation Tracking
|
||||
|
||||
@@ -317,11 +358,12 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
### 📷 Surveillance
|
||||
|
||||
* **CCTV Mesh** — 11,000+ live traffic cameras from 13 sources across 6 countries:
|
||||
* **CCTV Mesh** — 22,000+ live traffic cameras from 21 ingestors across 10 countries (US, UK, Canada, Australia, Austria, Spain, Singapore, Netherlands when NDW feed is up, plus OSM):
|
||||
* 🇬🇧 Transport for London JamCams
|
||||
* 🇺🇸 NYC DOT, Austin TX (TxDOT)
|
||||
* 🇺🇸 California (12 Caltrans districts), Washington State (WSDOT), Georgia DOT, Illinois DOT, Michigan DOT
|
||||
* 🇪🇸 Spain DGT National (20 cities), Madrid City (357 cameras via KML)
|
||||
* 🇦🇹 Austria ASFINAG motorway webcams
|
||||
* 🇸🇬 Singapore LTA
|
||||
* 🌍 Windy Webcams
|
||||
* **Feed Rendering** — Automatic detection & rendering of video, MJPEG, HLS, embed, satellite tile, and image feeds
|
||||
@@ -342,6 +384,12 @@ The first decentralized intelligence communication and governance layer built di
|
||||
* **Data Center Mapping** — 2,000+ global data centers plotted from a curated dataset. Clustered purple markers with server-rack icons. Click for operator, location, and automatic internet outage cross-referencing by country.
|
||||
* **Military Bases** — Global military installation and missile facility database (NEW)
|
||||
* **Power Plants** — 35,000+ global power plants from the WRI database (NEW)
|
||||
* **Submarine Cables** — Global undersea cable routes from static TeleGeography-derived GeoJSON (`frontend/public/data/submarine-cables.json`). Opt-in line overlay.
|
||||
* **Malware C2 Layer** — Botnet C2 servers (Feodo Tracker) and recent malware URLs (URLhaus) from abuse.ch, refreshed on the slow tier when the layer is enabled.
|
||||
* **SCM Supplier Risk** — Tier 1/2 fabs and battery plants (TSMC, Samsung, CATL, etc.) cross-referenced against earthquakes, FIRMS fires, and GDELT conflict proximity. Alerts in the SCM panel; optional map layer.
|
||||
* **Cyber Threats Feed** — Recent CISA Known Exploited Vulnerabilities (KEV) entries exposed via `/api/cyber-threats` and the layer toggle.
|
||||
* **Country Risk Index** — Static geopolitical risk scores with USGS earthquake enrichment via `/api/country-risk`.
|
||||
* **Telegram OSINT** — Public channel web previews (`t.me/s/*`) from configurable war/OSINT feeds. Hourly incremental merge (no redundant re-scrape), keyword risk scoring, Cyrillic/Arabic place aliases, metro-anchor geocoding (separate from news centroids), inline photo/video via `/api/telegram/media` proxy. Layer key: `telegram_osint`.
|
||||
|
||||
### 🌐 Additional Layers & Tools
|
||||
|
||||
@@ -367,7 +415,9 @@ v0.9.7 turns ShadowBroker from a dashboard a human watches into an intelligence
|
||||
|
||||
**Capabilities:**
|
||||
|
||||
* **Full Telemetry Access** — The agent queries all 35+ data layers: flights, ships, satellites, SIGINT, conflict events, earthquakes, fires, wastewater, prediction markets, and more. Fast and slow tier endpoints return enriched data with geographic coordinates, timestamps, and source attribution.
|
||||
* **Full Telemetry Access** — The agent queries all 40+ data layers: flights, ships, satellites, SIGINT, conflict events, earthquakes, fires, wastewater, **Telegram OSINT**, malware/C2, **CISA KEV cyber threats**, SCM overlays, fishing activity (GFW), prediction markets, and more. Fast and slow tier endpoints return enriched data with geographic coordinates, timestamps, and source attribution.
|
||||
* **Compact Search (preferred over full dumps)** — `get_summary` → `get_layer_slice` with per-layer `since_layer_versions` (SSE `layer_changed` push tells the agent exactly which layers updated). `search_telemetry` is the Google-style cross-layer keyword index. `search_news` covers news, GDELT, CrowdThreat, LiveUAMap, frontlines, and Telegram posts. `entities_near`, `brief_area`, `find_flights`/`find_ships`/`find_entity`, and `correlate_entity` answer targeted questions without multi-megabyte pulls.
|
||||
* **Recon Toolkit on the Channel** — `osint_lookup` runs the same SSRF-guarded backends as the Recon panel (`ip`, `dns`, `whois`, `certs`, `bgp`, `sanctions`, `cve`, `mac`, `github`, `leaks`, `threats`, `sweep_init`). `entity_expand` builds Wikidata + OFAC relationship graphs. `osint_sweep` runs Shodan InternetDB subnet discovery (**full** tier). Layer aliases: `telegram`, `malware`/`botnet`, `cyber`/`cisa`/`kev`, `scm`/`suppliers`, `gfw`/`fishing`.
|
||||
* **AI Intel Pins** — Place color-coded investigation markers directly on the operator's map. 14 pin categories (threat, anomaly, military, maritime, aviation, SIGINT, infrastructure, etc.) with confidence scores, TTL expiry, source URLs, and batch placement up to 100 pins at once.
|
||||
* **Map Control** — Fly the operator's map view to any coordinate, trigger satellite imagery lookups, and open region dossiers. The agent can direct the operator's attention to specific locations in real time.
|
||||
* **SAR Ground-Change** — Query SAR anomaly feeds, inspect pin details, manage AOIs, and fly the map to watch areas. The agent can monitor for ground deformation, flood extent, or damage and promote anomalies to pins.
|
||||
@@ -380,7 +430,7 @@ v0.9.7 turns ShadowBroker from a dashboard a human watches into an intelligence
|
||||
* **Intelligence Reports** — Generate structured reports with summary stats, top military flights, correlations, earthquake activity, SIGINT counts, and pin inventories.
|
||||
* **Auditable** — Every channel call is logged; the operator can introspect what the agent has done.
|
||||
|
||||
**Connect an agent:** Open the AI Intel panel in the left sidebar, click **Connect Agent**, and copy the HMAC secret. From there, point any compatible agent at the channel — for OpenClaw, import `ShadowBrokerClient` from the OpenClaw skill package; for any other agent, use the same HMAC contract documented above (timestamp + nonce + body digest, tier-gated). The channel is the protocol, not the agent.
|
||||
**Connect an agent:** Open the AI Intel panel in the left sidebar, click **Connect Agent**, and copy the HMAC secret. From there, point any compatible agent at the channel — for OpenClaw, import `ShadowBrokerClient` from `openclaw-skills/shadowbroker/sb_query.py` (see `SKILL.md` for examples); for any other agent, use the same HMAC contract documented above (timestamp + nonce + body digest, tier-gated). Discovery: `GET /api/ai/tools` and `GET /api/ai/capabilities`. The channel is the protocol, not the agent.
|
||||
|
||||
### ⏱️ Time Machine — Snapshot Playback (NEW in v0.9.7)
|
||||
|
||||
@@ -529,9 +579,20 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes — the **Ope
|
||||
| [GDELT Project](https://www.gdeltproject.org) | Global conflict events | ~6h | No |
|
||||
| [DeepState Map](https://deepstatemap.live) | Ukraine frontline | ~30min | No |
|
||||
| [Shodan](https://www.shodan.io) | Internet-connected device search | On-demand | **Yes** |
|
||||
| [OpenSanctions](https://www.opensanctions.org) | OFAC SDN sanctions index (recon + entity graph) | 24h cache | No |
|
||||
| [abuse.ch Feodo + URLhaus](https://abuse.ch) | Malware C2 / distribution URLs | ~5min (opt-in layer) | No |
|
||||
| [CISA KEV](https://www.cisa.gov/known-exploited-vulnerabilities-catalog) | Known exploited CVEs | ~5min (opt-in layer) | No |
|
||||
| [ip-api.com](https://ip-api.com) | IP geolocation (recon, entity graph) | On-demand | No |
|
||||
| [Google Public DNS](https://dns.google) | DNS-over-HTTPS lookups (recon) | On-demand | No |
|
||||
| [RDAP.org](https://rdap.org) | Domain registration data (recon) | On-demand | No |
|
||||
| [crt.sh](https://crt.sh) | Certificate transparency (recon) | On-demand | No |
|
||||
| [bgpview.io](https://bgpview.io) | BGP/ASN routing (recon) | On-demand | No |
|
||||
| TeleGeography (static) | Submarine cable routes | Static | No |
|
||||
| [ASFINAG](https://www.asfinag.at) | Austria motorway webcams | ~10min | No |
|
||||
| [Amtrak](https://www.amtrak.com) | US train positions | ~60s | No |
|
||||
| [DigiTraffic](https://www.digitraffic.fi) | European rail positions | ~60s | No |
|
||||
| [Global Fishing Watch](https://globalfishingwatch.org) | Fishing vessel activity events | ~10min | No |
|
||||
| [Global Fishing Watch](https://globalfishingwatch.org) | Fishing vessel activity events | ~1hr | **Yes** (`GFW_API_TOKEN`) |
|
||||
| [Telegram public previews](https://t.me/s) | War/OSINT channel posts (`telegram_osint`) | ~1hr | No (optional `TELEGRAM_OSINT_CHANNELS`) |
|
||||
| Transport for London, NYC DOT, TxDOT | CCTV cameras (UK, US) | ~10min | No |
|
||||
| Caltrans, WSDOT, GDOT, IDOT, MDOT | CCTV cameras (5 US states) | ~10min | No |
|
||||
| Spain DGT, Madrid City | CCTV cameras (Spain) | ~10min | No |
|
||||
@@ -563,6 +624,8 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes — the **Ope
|
||||
| [OSM Nominatim](https://nominatim.openstreetmap.org) | Place name geocoding (LOCATE bar) | On-demand | No |
|
||||
| [CARTO Basemaps](https://carto.com) | Dark map tiles | Continuous | No |
|
||||
|
||||
**Outbound privacy & audit (#348–#366):** Each self-hosted install uses its own backend IP and per-install User-Agent handle. See [docs/OUTBOUND_DATA.md](docs/OUTBOUND_DATA.md) for what contacts third parties, opt-in/env controls, and accepted tradeoffs (CCTV Referer, basemap CDN, LiveUAMap, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Getting Started
|
||||
@@ -584,9 +647,16 @@ Open `http://localhost:3000` to view the dashboard.
|
||||
> **Deploying publicly or on a LAN?** No configuration needed for most setups.
|
||||
> The frontend proxies all API calls through the Next.js server to `BACKEND_URL`,
|
||||
> which defaults to `http://backend:8000` (Docker internal networking).
|
||||
> Host port `8000` is only published for local API/debug access. If it conflicts
|
||||
> with another service, set `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL`
|
||||
> as `http://backend:8000` because that is the Docker-internal port.
|
||||
> Host port `8000` is only published for local API/debug access (`127.0.0.1:8000`
|
||||
> in `docker-compose.yml`). If it conflicts with another service, set
|
||||
> `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL` as `http://backend:8000`
|
||||
> because that is the Docker-internal port.
|
||||
>
|
||||
> **Running the backend outside Docker** (`cd backend && python main.py`):
|
||||
> the dev server binds **loopback only** (`127.0.0.1:8000`) so other machines on
|
||||
> your LAN cannot hit admin/local-trust routes with an empty `ADMIN_KEY`. Set
|
||||
> `SHADOWBROKER_DEV_BIND_ALL=true` in `.env` only when you deliberately need
|
||||
> `0.0.0.0` and use a strong `ADMIN_KEY` for any non-local callers.
|
||||
> The backend memory cap is controlled by `BACKEND_MEMORY_LIMIT` and defaults
|
||||
> to `4G`. If Docker reports OOM events, the backend will restart and slow
|
||||
> layers can look empty until they repopulate.
|
||||
@@ -798,7 +868,7 @@ AIS-catcher decodes VHF radio signals on 161.975 MHz and 162.025 MHz and POSTs d
|
||||
|
||||
## 🎛️ Data Layers
|
||||
|
||||
All 37 layers are independently toggleable from the left panel:
|
||||
All 41 layers are independently toggleable from the left panel:
|
||||
|
||||
| Layer | Default | Description |
|
||||
|---|---|---|
|
||||
@@ -840,6 +910,24 @@ All 37 layers are independently toggleable from the left panel:
|
||||
| VIIRS Nightlights | ❌ OFF | Night-time light change detection |
|
||||
| Power Plants | ❌ OFF | 35,000+ global power plants |
|
||||
| Shodan Overlay | ❌ OFF | Internet device search results |
|
||||
| Road Freight Trends | ❌ OFF | Sentinel-2 truck-motion trends on major highways (Analyze Here) |
|
||||
| Submarine Cables | ❌ OFF | Global undersea cable routes (static GeoJSON) |
|
||||
| Malware C2 | ❌ OFF | abuse.ch Feodo + URLhaus threat points |
|
||||
| SCM Suppliers | ❌ OFF | Tier 1/2 supply-chain risk markers + panel alerts |
|
||||
| Cyber Threats | ❌ OFF | Recent CISA KEV entries (stats in slow-tier payload) |
|
||||
| Telegram OSINT | ✅ ON | Public war/OSINT Telegram channels — hourly scrape, geoparsed pins |
|
||||
| SAR | ✅ ON | Synthetic aperture radar catalog + anomaly alerts |
|
||||
|
||||
**Recon & entity tools** (not map layers — left sidebar / selection):
|
||||
|
||||
| Tool | Dashboard access | OpenClaw command | Description |
|
||||
|---|---|---|---|
|
||||
| Recon Toolkit | Local operator (`/api/osint/*`) | `osint_lookup`, `osint_sweep`† | IP, DNS, WHOIS, certs, BGP, sanctions, CVE, MAC, GitHub, leaks, threats, subnet sweep |
|
||||
| Entity Graph | Local operator (`/api/entity/expand`) | `entity_expand` | Wikidata + OFAC + live-store relationship graph |
|
||||
| SCM Risk panel | Local operator (`/api/scm-suppliers`) | `get_layer_slice(["scm_suppliers"])` | Supplier threat rollup + map markers |
|
||||
| Tool discovery | — | `osint_tools` | Lists recon lookup types and entity-expand schemas |
|
||||
|
||||
† `osint_sweep` (active InternetDB scan) requires `OPENCLAW_ACCESS_TIER=full`.
|
||||
|
||||
---
|
||||
|
||||
@@ -863,6 +951,7 @@ The platform is optimized for handling massive real-time datasets:
|
||||
|
||||
```
|
||||
Shadowbroker/
|
||||
├── openclaw-skills/shadowbroker/ # OpenClaw skill — SKILL.md, sb_query.py client, alerts/monitor helpers
|
||||
├── backend/
|
||||
│ ├── main.py # FastAPI app, middleware, API routes (~4,000 lines)
|
||||
│ ├── cctv.db # SQLite CCTV camera database (auto-generated)
|
||||
@@ -872,7 +961,18 @@ Shadowbroker/
|
||||
│ │ ├── data_fetcher.py # Core scheduler — orchestrates all data sources
|
||||
│ │ ├── ais_stream.py # AIS WebSocket client (25K+ vessels)
|
||||
│ │ ├── carrier_tracker.py # OSINT carrier position estimator (GDELT news scraping)
|
||||
│ │ ├── cctv_pipeline.py # 13-source CCTV camera ingestion pipeline
|
||||
│ │ ├── cctv_pipeline.py # 14-source CCTV camera ingestion pipeline
|
||||
│ │ ├── ssrf_guard.py # SSRF validation for operator recon fetches
|
||||
│ │ ├── sanctions/ofac.py # OpenSanctions OFAC SDN index
|
||||
│ │ ├── osint/lookups.py # Server-side recon lookups (Osiris port)
|
||||
│ │ ├── osint/openclaw_recon.py # OpenClaw dispatch for recon + entity_expand
|
||||
│ │ ├── osint_intel/resolve.py # Entity graph resolver (Wikidata + OFAC)
|
||||
│ │ ├── scm/suppliers.py # Supply-chain risk overlay
|
||||
│ │ ├── intel_feeds/ # Country risk index helpers
|
||||
│ │ ├── fetchers/malware.py # abuse.ch Feodo + URLhaus
|
||||
│ │ ├── fetchers/cyber_status.py # CISA KEV feed
|
||||
│ │ ├── fetchers/telegram_osint.py # Public Telegram channel scrape + geoparse
|
||||
│ │ ├── third_party/osiris/ # MIT attribution for Osiris-derived code
|
||||
│ │ ├── geopolitics.py # GDELT + Ukraine frontline + air alerts
|
||||
│ │ ├── region_dossier.py # Right-click country/city intelligence
|
||||
│ │ ├── radio_intercept.py # Police scanner feeds + OpenMHZ
|
||||
@@ -910,7 +1010,14 @@ Shadowbroker/
|
||||
│ │ ├── mesh_reputation.py # Node reputation scoring
|
||||
│ │ ├── mesh_oracle.py # Oracle consensus protocol
|
||||
│ │ └── mesh_secure_storage.py # Secure credential storage
|
||||
│ ├── routers/
|
||||
│ │ ├── osint.py # /api/osint/* recon routes (local operator)
|
||||
│ │ ├── entity_graph.py # /api/entity/expand
|
||||
│ │ ├── scm.py # /api/scm-suppliers
|
||||
│ │ └── intel_feeds.py # /api/malware, /api/cyber-threats, /api/telegram-feed, /api/country-risk
|
||||
├── frontend/
|
||||
│ ├── public/data/
|
||||
│ │ └── submarine-cables.json # Static undersea cable GeoJSON
|
||||
│ ├── src/
|
||||
│ │ ├── app/
|
||||
│ │ │ └── page.tsx # Main dashboard — state, polling, layout
|
||||
@@ -919,7 +1026,12 @@ Shadowbroker/
|
||||
│ │ ├── MeshChat.tsx # InfoNet / Mesh / Dead Drop chat panel
|
||||
│ │ ├── MeshTerminal.tsx # Draggable CLI terminal
|
||||
│ │ ├── NewsFeed.tsx # SIGINT feed + entity detail panels
|
||||
│ │ ├── WorldviewLeftPanel.tsx # Data layer toggles (35+ layers)
|
||||
│ │ ├── WorldviewLeftPanel.tsx # Data layer toggles (40+ layers)
|
||||
│ │ ├── ShodanPanel.tsx # Shodan device search overlay
|
||||
│ │ ├── ReconPanel.tsx # Server-side OSINT recon toolkit
|
||||
│ │ ├── ScmPanel.tsx # Supply-chain risk command panel
|
||||
│ │ ├── EntityGraphPanel.tsx # Entity graph on map selection
|
||||
│ │ ├── MaplibreViewer/popups/TelegramOsintPopup.tsx # Threat-intercept styled Telegram pin popups
|
||||
│ │ ├── WorldviewRightPanel.tsx # Search + filter sidebar
|
||||
│ │ ├── AdvancedFilterModal.tsx # Airport/country/owner filtering
|
||||
│ │ ├── MapLegend.tsx # Dynamic legend with all icons
|
||||
@@ -956,6 +1068,9 @@ MESH_SAR_EARTHDATA_TOKEN= # NASA Earthdata token (paired wit
|
||||
MESH_SAR_COPERNICUS_USER= # Copernicus Data Space user (SAR Mode B — EGMS / EMS)
|
||||
MESH_SAR_COPERNICUS_TOKEN= # Copernicus token (paired with user above)
|
||||
OPENCLAW_ACCESS_TIER=restricted # OpenClaw agent tier: "restricted" (read-only) or "full"
|
||||
GFW_API_TOKEN=your_gfw_token # Global Fishing Watch — fishing_activity layer (Settings → Maritime)
|
||||
TELEGRAM_OSINT_ENABLED=true # Telegram OSINT layer (default on)
|
||||
TELEGRAM_OSINT_CHANNELS=osintdefender,... # Comma-separated public channel slugs (see .env.example)
|
||||
|
||||
# Private-lane privacy-core pinning (required when Arti or RNS is enabled)
|
||||
PRIVACY_CORE_MIN_VERSION=0.1.0
|
||||
|
||||
+92
-12
@@ -11,6 +11,22 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
|
||||
# ── Optional ───────────────────────────────────────────────────
|
||||
|
||||
# AISHub REST fallback. Used when stream.aisstream.io is unreachable
|
||||
# (e.g. their cert expires or server goes offline). Free tier requires
|
||||
# registration at https://www.aishub.net/api. Poll cadence defaults to
|
||||
# 20 min to stay courteous; tunable via AISHUB_POLL_INTERVAL_MINUTES.
|
||||
# AISHUB_USERNAME=
|
||||
# AISHUB_POLL_INTERVAL_MINUTES=20
|
||||
|
||||
# `python main.py` (uvicorn reload) binds 127.0.0.1:8000 by default so LAN clients
|
||||
# cannot reach a dev server with empty ADMIN_KEY (#375). Set true only when you
|
||||
# intentionally need 0.0.0.0 and understand the local-trust implications.
|
||||
# SHADOWBROKER_DEV_BIND_ALL=false
|
||||
#
|
||||
# Thread pool for GDELT, LiveUAMap, CCTV ingest, and slow-tier refresh batches.
|
||||
# Keeps heavy jobs from starving fast flight/ship workers (default 2).
|
||||
# SHADOWBROKER_HEAVY_FETCH_WORKERS=2
|
||||
|
||||
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
||||
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
||||
|
||||
@@ -24,14 +40,24 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers.
|
||||
# Project-generic by default — does NOT include any personal contact info or
|
||||
# operator-specific identifier. Override only if you run a public relay and
|
||||
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
|
||||
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
|
||||
# Per-install operator handle. Round 7a: outbound third-party API calls send
|
||||
# this handle as the User-Agent (e.g. operator-7f3a92), not a shared app name,
|
||||
# so upstreams rate-limit one install instead of blocking every user.
|
||||
#
|
||||
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||
# Operators who want a meaningful handle (real name, org, GitHub login) can
|
||||
# set it here. Special characters are sanitized to dashes.
|
||||
# OPERATOR_HANDLE=
|
||||
|
||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
|
||||
# Full User-Agent override (replaces the operator handle entirely). Rare;
|
||||
# most installs should use OPERATOR_HANDLE only.
|
||||
# SHADOWBROKER_USER_AGENT=
|
||||
|
||||
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||
# use the per-install handle (default) — set only if you have a registered
|
||||
# Nominatim relay identity.
|
||||
# NOMINATIM_USER_AGENT=
|
||||
|
||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||
# These data sources phone home to politically/commercially sensitive
|
||||
@@ -45,20 +71,48 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# FIMI_ENABLED=false
|
||||
#
|
||||
# Polymarket + Kalshi — US political/election prediction markets.
|
||||
# Default off; enable from Global Threat Intercept (MKT toggle) or set true here.
|
||||
# PREDICTION_MARKETS_ENABLED=false
|
||||
# When enabled, polls use a jittered schedule (not the fixed 5-minute slow tier):
|
||||
# PREDICTION_MARKETS_INTERVAL_MINUTES=7
|
||||
# PREDICTION_MARKETS_SCHEDULER_JITTER_S=240
|
||||
# PREDICTION_MARKETS_INITIAL_DELAY_MAX_S=180
|
||||
# PREDICTION_MARKETS_PRE_FETCH_JITTER_S=90
|
||||
# PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S=45
|
||||
# MESH_POLYMARKET_PAGE_DELAY_JITTER_S=0.08
|
||||
# MESH_KALSHI_PAGE_DELAY_JITTER_S=0.2
|
||||
#
|
||||
# Finnhub fallback / yfinance — financial market data.
|
||||
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
|
||||
# the unauthenticated yfinance fallback to call Yahoo Finance.
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP sightings — huggingface.co dataset download.
|
||||
# NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days).
|
||||
# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs.
|
||||
# NUFORC_RECENT_DAYS=60
|
||||
# NUFORC_CACHE_TTL_HOURS=168
|
||||
# On Windows, live scrape uses Python requests by default; optional:
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true
|
||||
# NUFORC enrichment index (HF dataset) is separate — opt-in only:
|
||||
# NUFORC_ENABLED=false
|
||||
#
|
||||
# News RSS aggregator — defaults ON. Set to "false" to disable all
|
||||
# configured news feeds (kill switch for the news layer).
|
||||
# NEWS_ENABLED=true
|
||||
|
||||
# Global Fishing Watch — fishing vessel activity events (Fishing Activity map layer).
|
||||
# Free API token from https://globalfishingwatch.org/our-apis/tokens
|
||||
# Without this the fishing_activity layer stays empty.
|
||||
# GFW_API_TOKEN=
|
||||
# Optional tuning — GFW can return 40k+ global events; defaults cap fetch for map paint.
|
||||
# GFW_EVENTS_PAGE_SIZE=500
|
||||
# GFW_EVENTS_MAX_PAGES=10
|
||||
# GFW_EVENTS_LOOKBACK_DAYS=7
|
||||
# GFW_EVENTS_TIMEOUT_S=90
|
||||
|
||||
# Windy Webcams global CCTV layer — free key from https://api.windy.com/webcams/docs
|
||||
# WINDY_API_KEY=
|
||||
|
||||
# LTA Singapore traffic cameras — leave blank to skip this data source.
|
||||
# LTA_ACCOUNT_KEY=
|
||||
|
||||
@@ -66,6 +120,12 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Free MAP_KEY from https://firms.modaps.eosdis.nasa.gov/map/#d:24hrs;@0.0,0.0,3.0z
|
||||
# FIRMS_MAP_KEY=
|
||||
|
||||
# Ukraine frontline mirror (GitHub). Default follows cyterat/deepstate-map-data@main.
|
||||
# Pin an immutable commit SHA so ingest cannot silently change if main is force-pushed (#362).
|
||||
# Example (verify on GitHub before use): main @ b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_COMMIT=b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_REPO=cyterat/deepstate-map-data
|
||||
|
||||
# Ukraine air raid alerts from alerts.in.ua — free token from https://alerts.in.ua/
|
||||
# ALERTS_IN_UA_TOKEN=
|
||||
|
||||
@@ -95,12 +155,16 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# can identify per-install traffic instead of aggregated "ShadowBroker" hits.
|
||||
# Leave blank to send a generic UA. If you set MESHTASTIC_OPERATOR_CALLSIGN,
|
||||
# it is included in outbound headers to meshtastic.org by default so they
|
||||
# can rate-limit per-operator. Set MESHTASTIC_SEND_CALLSIGN_HEADER=false to
|
||||
# suppress the callsign while still using it locally (e.g. for APRS).
|
||||
# can rate-limit per-operator. Callsign is NOT sent upstream unless you opt in.
|
||||
# MESHTASTIC_OPERATOR_CALLSIGN=
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=true
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=false
|
||||
# MESH_MQTT_PSK= # hex-encoded, empty = default LongFast key
|
||||
|
||||
# LiveUAMap Playwright scraper (#348). Linux/macOS: on by default when Global
|
||||
# Incidents layer is active. Windows: off until the operator enables Global
|
||||
# Incidents in the UI (consent dialog) or sets SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true.
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false forces off on all platforms.
|
||||
|
||||
# ── Mesh / Reticulum (RNS) ─────────────────────────────────────
|
||||
# Full-node / participant-node posture for public Infonet sync.
|
||||
# MESH_NODE_MODE=participant # participant | relay | perimeter
|
||||
@@ -163,7 +227,23 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# MESH_GATE_SESSION_STREAM_MAX_GATES=16
|
||||
# MESH_BOOTSTRAP_DISABLED=false
|
||||
# MESH_BOOTSTRAP_MANIFEST_PATH=data/bootstrap_peers.json
|
||||
# MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY=
|
||||
# Swarm discovery (signed peer manifest). Participants need only the public key;
|
||||
# the seed operator sets MESH_BOOTSTRAP_SIGNER_PRIVATE_KEY (never commit it).
|
||||
# Generate a fleet keypair: uv run python backend/scripts/bootstrap_manifest_helper.py generate-keypair
|
||||
# Public sb-testnet fleet defaults (auto-used when MESH_INFONET_FLEET_JOIN=true).
|
||||
# MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY=ul1d0kj/ODPIp0OhHzX8eLAVXzJ3CVvzW1vn2IC6q3I=
|
||||
# MESH_INFONET_FLEET_JOIN=true
|
||||
# MESH_INFONET_FLEET_JOIN_DISABLED=false
|
||||
# MESH_BOOTSTRAP_SIGNER_PRIVATE_KEY= # seed only
|
||||
# MESH_BOOTSTRAP_SIGNER_ID=shadowbroker-seed
|
||||
# MESH_PEER_REGISTRY_ENABLED=true # seed only (auto-enabled when private key is set)
|
||||
# Headless relay compose sets MESH_INFONET_RELAY_AUTO_WORMHOLE=true; seed nodes with
|
||||
# MESH_BOOTSTRAP_SIGNER_PRIVATE_KEY also auto-enable Tor wormhole on startup.
|
||||
# MESH_INFONET_RELAY_AUTO_WORMHOLE=false
|
||||
# MESH_INFONET_RELAY_AUTO_WORMHOLE_DISABLED=false
|
||||
# MESH_SWARM_MANIFEST_TTL_S=14400
|
||||
# MESH_SWARM_MANIFEST_PULL_INTERVAL_S=300
|
||||
# MESH_PEER_REGISTRY_STALE_S=604800
|
||||
# Infonet/Wormhole fails closed to onion/RNS by default. Only enable clearnet
|
||||
# sync for local relay development or an explicitly public testnet.
|
||||
# MESH_INFONET_ALLOW_CLEARNET_SYNC=false
|
||||
|
||||
+3
-2
@@ -27,6 +27,7 @@ WORKDIR /app
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
tor \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y --no-install-recommends nodejs \
|
||||
@@ -45,7 +46,7 @@ COPY uv.lock /workspace/uv.lock
|
||||
COPY backend/pyproject.toml /workspace/backend/pyproject.toml
|
||||
|
||||
# Install Python dependencies using the lockfile
|
||||
RUN cd /workspace/backend && uv sync --frozen --no-dev \
|
||||
RUN cd /workspace/backend && uv sync --frozen --no-dev --extra road-corridor \
|
||||
&& playwright install --with-deps chromium
|
||||
|
||||
# Copy backend source code
|
||||
@@ -72,7 +73,7 @@ ENV PRIVACY_CORE_LIB=/app/libprivacy_core.so
|
||||
# Create a non-root user for security
|
||||
# Grant write access to /app so the auto-updater can extract files
|
||||
# Pre-create /app/data so mounted volumes inherit correct ownership
|
||||
RUN adduser --system --uid 1001 backenduser \
|
||||
RUN adduser --system --uid 1001 --home /app backenduser \
|
||||
&& mkdir -p /app/data \
|
||||
&& chown -R backenduser /app \
|
||||
&& chmod -R u+w /app
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
"""Strategic Risk Analytics — game-theoretic early warning layer."""
|
||||
|
||||
from analytics.backtest import (
|
||||
DEFAULT_BACKTEST_ALERT_THRESHOLD,
|
||||
BacktestReport,
|
||||
run_historical_backtest,
|
||||
tune_alert_threshold,
|
||||
)
|
||||
from analytics.gt_early_warning import GT_EarlyWarning
|
||||
from analytics.integration import get_gt_engine, process_feed_item, refresh_from_latest_data
|
||||
|
||||
__all__ = [
|
||||
"BacktestReport",
|
||||
"DEFAULT_BACKTEST_ALERT_THRESHOLD",
|
||||
"GT_EarlyWarning",
|
||||
"get_gt_engine",
|
||||
"process_feed_item",
|
||||
"refresh_from_latest_data",
|
||||
"run_historical_backtest",
|
||||
"tune_alert_threshold",
|
||||
]
|
||||
@@ -0,0 +1,287 @@
|
||||
"""Historical backtesting for Strategic Risk Analytics.
|
||||
|
||||
This is **benchmark validation**, not forward-weeks prediction on live feeds.
|
||||
|
||||
The suite scores whether costly-signal patterns + Bayesian updating correctly
|
||||
classify curated pre-crisis text snippets (positive cases) vs cheap-talk
|
||||
controls (negative cases) at a tuned alert threshold. A high accuracy on this
|
||||
labeled corpus does **not** imply the engine will score 100% on messy,
|
||||
adversarial, or weeks-ahead production telemetry — opponents adapt, labels are
|
||||
easier here than in the wild, and the window is retrospective.
|
||||
|
||||
Reports accuracy and a conservative Wilson 95% confidence lower bound on the
|
||||
benchmark only. Treat 100% here as "classifier fits the benchmark," not "ship
|
||||
it for multi-week forecasting." For live week-over-week scoring with delayed
|
||||
labels, see ``rolling_backtest.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from analytics.gt_early_warning import GT_EarlyWarning
|
||||
from analytics.historical_events import (
|
||||
HistoricalCase,
|
||||
default_historical_cases,
|
||||
expanded_historical_cases,
|
||||
)
|
||||
from analytics.settings import GTAnalyticsSettings
|
||||
|
||||
DomainName = Literal["financial", "unrest", "conflict"]
|
||||
|
||||
# Validated on expanded suite (82 cases, Wilson lower >= 0.95 at 100% accuracy).
|
||||
DEFAULT_BACKTEST_ALERT_THRESHOLD = 0.26
|
||||
MAX_BACKTEST_ALERT_THRESHOLD = 0.39
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseResult:
|
||||
case_id: str
|
||||
name: str
|
||||
kind: str
|
||||
region: str
|
||||
domain: str
|
||||
expected_alert: bool
|
||||
alerted: bool
|
||||
correct: bool
|
||||
peak_domain_risk: float
|
||||
peak_composite_risk: float
|
||||
costly_signals: list[str]
|
||||
tags: tuple[str, ...] = field(default_factory=tuple)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BacktestReport:
|
||||
total_cases: int
|
||||
correct: int
|
||||
accuracy: float
|
||||
confidence_rate: float
|
||||
wilson_lower_95: float
|
||||
wilson_upper_95: float
|
||||
true_positives: int
|
||||
true_negatives: int
|
||||
false_positives: int
|
||||
false_negatives: int
|
||||
sensitivity: float
|
||||
specificity: float
|
||||
alert_threshold: float
|
||||
target_confidence: float
|
||||
meets_target: bool
|
||||
case_results: tuple[CaseResult, ...]
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"total_cases": self.total_cases,
|
||||
"correct": self.correct,
|
||||
"accuracy": round(self.accuracy, 4),
|
||||
"confidence_rate": round(self.confidence_rate, 4),
|
||||
"wilson_lower_95": round(self.wilson_lower_95, 4),
|
||||
"wilson_upper_95": round(self.wilson_upper_95, 4),
|
||||
"true_positives": self.true_positives,
|
||||
"true_negatives": self.true_negatives,
|
||||
"false_positives": self.false_positives,
|
||||
"false_negatives": self.false_negatives,
|
||||
"sensitivity": round(self.sensitivity, 4),
|
||||
"specificity": round(self.specificity, 4),
|
||||
"alert_threshold": self.alert_threshold,
|
||||
"target_confidence": self.target_confidence,
|
||||
"meets_target": self.meets_target,
|
||||
"cases": [
|
||||
{
|
||||
"case_id": row.case_id,
|
||||
"name": row.name,
|
||||
"kind": row.kind,
|
||||
"correct": row.correct,
|
||||
"alerted": row.alerted,
|
||||
"peak_domain_risk": round(row.peak_domain_risk, 4),
|
||||
"peak_composite_risk": round(row.peak_composite_risk, 4),
|
||||
"costly_signals": row.costly_signals,
|
||||
}
|
||||
for row in self.case_results
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def wilson_interval(
|
||||
successes: int,
|
||||
total: int,
|
||||
z: float = 1.96,
|
||||
) -> tuple[float, float]:
|
||||
"""Wilson score interval for a binomial proportion (95% default)."""
|
||||
if total <= 0:
|
||||
return 0.0, 0.0
|
||||
phat = successes / total
|
||||
z2 = z * z
|
||||
denom = 1.0 + z2 / total
|
||||
center = (phat + z2 / (2.0 * total)) / denom
|
||||
margin = (
|
||||
z
|
||||
* math.sqrt((phat * (1.0 - phat) + z2 / (4.0 * total)) / total)
|
||||
/ denom
|
||||
)
|
||||
return max(0.0, center - margin), min(1.0, center + margin)
|
||||
|
||||
|
||||
def _domain_risk(engine: GT_EarlyWarning, region: str, domain: str) -> float:
|
||||
if domain in ("financial", "unrest", "conflict"):
|
||||
return engine.get_prior(region, domain)
|
||||
return engine.composite_risk(region)
|
||||
|
||||
|
||||
def _evaluate_case(
|
||||
case: HistoricalCase,
|
||||
*,
|
||||
settings: GTAnalyticsSettings,
|
||||
alert_threshold: float,
|
||||
) -> CaseResult:
|
||||
engine = GT_EarlyWarning(settings)
|
||||
peak_domain = float(settings.base_prior)
|
||||
peak_composite = float(settings.base_prior)
|
||||
detected_signals: set[str] = set()
|
||||
|
||||
for item in case.to_feed_dicts():
|
||||
result = engine.process_feed_item(item)
|
||||
for sig in (result or {}).get("signals") or {}:
|
||||
detected_signals.add(str(sig))
|
||||
domain_risk = _domain_risk(engine, case.region, case.domain)
|
||||
composite = engine.composite_risk(case.region)
|
||||
peak_domain = max(peak_domain, domain_risk)
|
||||
peak_composite = max(peak_composite, composite)
|
||||
|
||||
# Domain-specific score for labeled events; composite as secondary for conflict.
|
||||
score = peak_domain
|
||||
if case.domain == "conflict":
|
||||
score = max(peak_domain, peak_composite * 0.95)
|
||||
alerted = score >= alert_threshold
|
||||
expected_alert = case.kind == "positive"
|
||||
|
||||
return CaseResult(
|
||||
case_id=case.case_id,
|
||||
name=case.name,
|
||||
kind=case.kind,
|
||||
region=case.region,
|
||||
domain=case.domain,
|
||||
expected_alert=expected_alert,
|
||||
alerted=alerted,
|
||||
correct=alerted == expected_alert,
|
||||
peak_domain_risk=peak_domain,
|
||||
peak_composite_risk=peak_composite,
|
||||
costly_signals=sorted(detected_signals),
|
||||
tags=case.tags,
|
||||
)
|
||||
|
||||
|
||||
def run_historical_backtest(
|
||||
cases: tuple[HistoricalCase, ...] | None = None,
|
||||
*,
|
||||
settings: GTAnalyticsSettings | None = None,
|
||||
alert_threshold: float | None = None,
|
||||
target_confidence: float = 0.80,
|
||||
use_expanded_suite: bool = True,
|
||||
) -> BacktestReport:
|
||||
"""
|
||||
Run labeled historical cases and compute accuracy + Wilson 95% CI.
|
||||
|
||||
``confidence_rate`` is the conservative Wilson lower bound — the metric
|
||||
used for pass/fail against ``target_confidence``.
|
||||
"""
|
||||
cfg = settings or GTAnalyticsSettings(enabled=True)
|
||||
threshold = float(
|
||||
alert_threshold
|
||||
if alert_threshold is not None
|
||||
else DEFAULT_BACKTEST_ALERT_THRESHOLD
|
||||
)
|
||||
if cases is not None:
|
||||
suite = cases
|
||||
elif use_expanded_suite:
|
||||
suite = expanded_historical_cases()
|
||||
else:
|
||||
suite = default_historical_cases()
|
||||
|
||||
results = tuple(
|
||||
_evaluate_case(case, settings=cfg, alert_threshold=threshold) for case in suite
|
||||
)
|
||||
|
||||
tp = sum(1 for r in results if r.expected_alert and r.alerted)
|
||||
tn = sum(1 for r in results if not r.expected_alert and not r.alerted)
|
||||
fp = sum(1 for r in results if not r.expected_alert and r.alerted)
|
||||
fn = sum(1 for r in results if r.expected_alert and not r.alerted)
|
||||
correct = tp + tn
|
||||
total = len(results)
|
||||
accuracy = correct / total if total else 0.0
|
||||
lower, upper = wilson_interval(correct, total)
|
||||
|
||||
pos_total = sum(1 for r in results if r.expected_alert)
|
||||
neg_total = total - pos_total
|
||||
sensitivity = tp / pos_total if pos_total else 0.0
|
||||
specificity = tn / neg_total if neg_total else 0.0
|
||||
|
||||
return BacktestReport(
|
||||
total_cases=total,
|
||||
correct=correct,
|
||||
accuracy=accuracy,
|
||||
confidence_rate=lower,
|
||||
wilson_lower_95=lower,
|
||||
wilson_upper_95=upper,
|
||||
true_positives=tp,
|
||||
true_negatives=tn,
|
||||
false_positives=fp,
|
||||
false_negatives=fn,
|
||||
sensitivity=sensitivity,
|
||||
specificity=specificity,
|
||||
alert_threshold=threshold,
|
||||
target_confidence=target_confidence,
|
||||
meets_target=lower >= target_confidence,
|
||||
case_results=results,
|
||||
)
|
||||
|
||||
|
||||
def tune_alert_threshold(
|
||||
cases: tuple[HistoricalCase, ...] | None = None,
|
||||
*,
|
||||
settings: GTAnalyticsSettings | None = None,
|
||||
min_threshold: float = 0.20,
|
||||
max_threshold: float = 0.65,
|
||||
step: float = 0.01,
|
||||
target_confidence: float = 0.95,
|
||||
) -> tuple[float, BacktestReport]:
|
||||
"""Grid-search alert threshold to maximize Wilson lower bound."""
|
||||
if cases is not None:
|
||||
suite = cases
|
||||
else:
|
||||
suite = expanded_historical_cases()
|
||||
best_threshold = min_threshold
|
||||
best_report = run_historical_backtest(
|
||||
suite,
|
||||
settings=settings,
|
||||
alert_threshold=min_threshold,
|
||||
target_confidence=target_confidence,
|
||||
)
|
||||
|
||||
steps = int(round((max_threshold - min_threshold) / step))
|
||||
for i in range(steps + 1):
|
||||
threshold = min_threshold + i * step
|
||||
report = run_historical_backtest(
|
||||
suite,
|
||||
settings=settings,
|
||||
alert_threshold=threshold,
|
||||
target_confidence=target_confidence,
|
||||
)
|
||||
better_confidence = report.confidence_rate > best_report.confidence_rate
|
||||
tied_confidence = math.isclose(
|
||||
report.confidence_rate, best_report.confidence_rate, rel_tol=0.0, abs_tol=1e-9
|
||||
)
|
||||
better_accuracy = report.accuracy > best_report.accuracy
|
||||
tied_accuracy = math.isclose(
|
||||
report.accuracy, best_report.accuracy, rel_tol=0.0, abs_tol=1e-9
|
||||
)
|
||||
prefer_higher_threshold = (
|
||||
tied_confidence and tied_accuracy and threshold > best_threshold
|
||||
)
|
||||
if better_confidence or (tied_confidence and better_accuracy) or prefer_higher_threshold:
|
||||
best_threshold = threshold
|
||||
best_report = report
|
||||
|
||||
return best_threshold, best_report
|
||||
@@ -0,0 +1,140 @@
|
||||
"""Daily GT risk readings for micro rolling averages."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DAILY_DIR = Path(__file__).parent.parent / "data" / "gt_rolling" / "daily"
|
||||
_store_lock = threading.Lock()
|
||||
|
||||
|
||||
def daily_store_dir() -> Path:
|
||||
override = str(os.environ.get("GT_DAILY_STORE_DIR", "")).strip()
|
||||
if override:
|
||||
return Path(override)
|
||||
return _DAILY_DIR
|
||||
|
||||
|
||||
def utc_today() -> date:
|
||||
return datetime.now(timezone.utc).date()
|
||||
|
||||
|
||||
def date_id(when: date | datetime | None = None) -> str:
|
||||
if when is None:
|
||||
when = utc_today()
|
||||
if isinstance(when, datetime):
|
||||
when = when.date()
|
||||
return when.isoformat()
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyRegionReading:
|
||||
region: str
|
||||
composite_risk: float
|
||||
financial: float
|
||||
unrest: float
|
||||
conflict: float
|
||||
peak_score: float
|
||||
readings: int = 1
|
||||
last_captured_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, raw: dict[str, Any]) -> DailyRegionReading:
|
||||
return cls(
|
||||
region=str(raw.get("region") or "").strip().lower(),
|
||||
composite_risk=float(raw.get("composite_risk") or 0.0),
|
||||
financial=float(raw.get("financial") or 0.0),
|
||||
unrest=float(raw.get("unrest") or 0.0),
|
||||
conflict=float(raw.get("conflict") or 0.0),
|
||||
peak_score=float(raw.get("peak_score") or 0.0),
|
||||
readings=int(raw.get("readings") or 1),
|
||||
last_captured_at=str(raw.get("last_captured_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailySnapshot:
|
||||
date: str
|
||||
regions: dict[str, DailyRegionReading] = field(default_factory=dict)
|
||||
last_updated_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"date": self.date,
|
||||
"last_updated_at": self.last_updated_at,
|
||||
"regions": {key: row.to_dict() for key, row in self.regions.items()},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, raw: dict[str, Any]) -> DailySnapshot:
|
||||
regions: dict[str, DailyRegionReading] = {}
|
||||
for key, row in (raw.get("regions") or {}).items():
|
||||
if isinstance(row, dict):
|
||||
reading = DailyRegionReading.from_dict(row)
|
||||
regions[str(key).strip().lower()] = reading
|
||||
return cls(
|
||||
date=str(raw.get("date") or ""),
|
||||
regions=regions,
|
||||
last_updated_at=str(raw.get("last_updated_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
def _daily_path(day_id: str) -> Path:
|
||||
safe = day_id.replace("/", "-").replace("..", "")
|
||||
return daily_store_dir() / f"{safe}.json"
|
||||
|
||||
|
||||
def _ensure_dir() -> None:
|
||||
daily_store_dir().mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def list_daily_ids(*, newest_first: bool = True, limit: int | None = None) -> list[str]:
|
||||
_ensure_dir()
|
||||
ids = sorted(
|
||||
(path.stem for path in daily_store_dir().glob("*.json")),
|
||||
reverse=newest_first,
|
||||
)
|
||||
if limit is not None:
|
||||
return ids[:limit]
|
||||
return ids
|
||||
|
||||
|
||||
def load_daily(day: date | str | None = None) -> DailySnapshot | None:
|
||||
day_id = date_id(day) if day is not None else date_id()
|
||||
path = _daily_path(day_id)
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return DailySnapshot.from_dict(raw)
|
||||
except (OSError, json.JSONDecodeError, TypeError, ValueError):
|
||||
logger.exception("Failed to load GT daily reading %s", day_id)
|
||||
return None
|
||||
|
||||
|
||||
def save_daily(snapshot: DailySnapshot) -> None:
|
||||
_ensure_dir()
|
||||
path = _daily_path(snapshot.date)
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
payload = json.dumps(snapshot.to_dict(), indent=2, sort_keys=True)
|
||||
with _store_lock:
|
||||
tmp.write_text(payload, encoding="utf-8")
|
||||
tmp.replace(path)
|
||||
|
||||
|
||||
def utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
@@ -0,0 +1,206 @@
|
||||
"""Normalize Shadowbroker feed records into GT analytics feed items."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Iterable
|
||||
|
||||
_DOMAIN_CONFLICT = "conflict"
|
||||
_DOMAIN_UNREST = "unrest"
|
||||
_DOMAIN_FINANCIAL = "financial"
|
||||
|
||||
_CONFLICT_HINTS = re.compile(
|
||||
r"\b(war|missile|strike|attack|military|invasion|troop|shelling|drone|bomb|nuclear)\b",
|
||||
re.I,
|
||||
)
|
||||
_UNREST_HINTS = re.compile(
|
||||
r"\b(protest|rally|strike|riot|unrest|mobiliz|demonstrat|curfew|purge|coup)\b",
|
||||
re.I,
|
||||
)
|
||||
_FINANCIAL_HINTS = re.compile(
|
||||
r"\b(payroll|loan|default|bankruptcy|liquidity|sanction|supply\s+chain|delay|shortage)\b",
|
||||
re.I,
|
||||
)
|
||||
|
||||
|
||||
def _clean_region(value: Any) -> str:
|
||||
region = str(value or "").strip().lower()
|
||||
return region or "global"
|
||||
|
||||
|
||||
def _infer_domain(text: str, explicit: str | None = None) -> str:
|
||||
if explicit in {_DOMAIN_CONFLICT, _DOMAIN_UNREST, _DOMAIN_FINANCIAL}:
|
||||
return explicit
|
||||
if _CONFLICT_HINTS.search(text):
|
||||
return _DOMAIN_CONFLICT
|
||||
if _UNREST_HINTS.search(text):
|
||||
return _DOMAIN_UNREST
|
||||
if _FINANCIAL_HINTS.search(text):
|
||||
return _DOMAIN_FINANCIAL
|
||||
return _DOMAIN_FINANCIAL
|
||||
|
||||
|
||||
def _text_from_record(
|
||||
record: dict[str, Any],
|
||||
*,
|
||||
prefer_translation: bool = False,
|
||||
) -> str:
|
||||
"""Build ingest text; prefer English translations for Telegram OSINT when set."""
|
||||
if prefer_translation:
|
||||
translated_parts = [
|
||||
record.get("title_translated"),
|
||||
record.get("description_translated"),
|
||||
]
|
||||
translated = "\n".join(
|
||||
str(p).strip() for p in translated_parts if p and str(p).strip()
|
||||
)
|
||||
if translated:
|
||||
return translated
|
||||
|
||||
parts = [
|
||||
record.get("title"),
|
||||
record.get("description"),
|
||||
record.get("text"),
|
||||
record.get("summary"),
|
||||
]
|
||||
return "\n".join(str(p).strip() for p in parts if p and str(p).strip())
|
||||
|
||||
|
||||
_HASHTAG_REGION = re.compile(r"#([a-z][a-z0-9_-]{2,})", re.I)
|
||||
|
||||
|
||||
def _region_from_hashtags(text: str) -> str | None:
|
||||
"""Map common theater hashtags (#Ukraine) to dossier/heatmap region keys."""
|
||||
for match in _HASHTAG_REGION.finditer(text or ""):
|
||||
tag = match.group(1).lower()
|
||||
if tag in {
|
||||
"ukraine",
|
||||
"russia",
|
||||
"israel",
|
||||
"iran",
|
||||
"gaza",
|
||||
"syria",
|
||||
"taiwan",
|
||||
"china",
|
||||
"belfast",
|
||||
"uk",
|
||||
"usa",
|
||||
}:
|
||||
return tag
|
||||
return None
|
||||
|
||||
|
||||
def _region_from_record(record: dict[str, Any], *, text: str = "") -> str:
|
||||
for key in ("geotag", "region", "country", "location"):
|
||||
if record.get(key):
|
||||
return _clean_region(record[key])
|
||||
hashtag_region = _region_from_hashtags(text)
|
||||
if hashtag_region:
|
||||
return hashtag_region
|
||||
coords = record.get("coords")
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
try:
|
||||
lat = float(coords[0])
|
||||
lng = float(coords[1])
|
||||
return f"{lat:.2f},{lng:.2f}"
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return "global"
|
||||
|
||||
|
||||
def _entities_from_record(record: dict[str, Any]) -> list[str]:
|
||||
entities: list[str] = []
|
||||
for key in ("entities", "tags", "keywords"):
|
||||
raw = record.get(key)
|
||||
if isinstance(raw, list):
|
||||
entities.extend(str(v).strip() for v in raw if str(v).strip())
|
||||
elif isinstance(raw, str) and raw.strip():
|
||||
entities.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
channel = str(record.get("channel") or "").strip()
|
||||
if channel:
|
||||
entities.append(f"channel:{channel}")
|
||||
source = str(record.get("source") or "").strip()
|
||||
if source:
|
||||
entities.append(f"source:{source}")
|
||||
return entities
|
||||
|
||||
|
||||
def normalize_feed_item(record: dict[str, Any], *, source_type: str = "generic") -> dict[str, Any]:
|
||||
"""Map a news/Telegram/GDELT record into the GT engine schema."""
|
||||
prefer_translation = source_type == "telegram_osint"
|
||||
text = _text_from_record(record, prefer_translation=prefer_translation)
|
||||
if prefer_translation and not text.strip():
|
||||
text = _text_from_record(record, prefer_translation=False)
|
||||
region = _region_from_record(record, text=text)
|
||||
domain = _infer_domain(text, record.get("domain"))
|
||||
coords = record.get("coords")
|
||||
lat = lng = None
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
try:
|
||||
lat = float(coords[0])
|
||||
lng = float(coords[1])
|
||||
except (TypeError, ValueError):
|
||||
lat = lng = None
|
||||
|
||||
return {
|
||||
"id": record.get("id") or record.get("link"),
|
||||
"text": text,
|
||||
"source": str(record.get("source") or source_type),
|
||||
"source_type": source_type,
|
||||
"region": region,
|
||||
"domain": domain,
|
||||
"entities": _entities_from_record(record),
|
||||
"coords": [lat, lng] if lat is not None and lng is not None else None,
|
||||
"published": record.get("published"),
|
||||
"risk_score": record.get("risk_score"),
|
||||
}
|
||||
|
||||
|
||||
def iter_telegram_posts(payload: dict[str, Any] | None) -> Iterable[dict[str, Any]]:
|
||||
from services.telegram_translate import apply_post_translation, telegram_translate_enabled
|
||||
|
||||
posts = list((payload or {}).get("posts") or [])
|
||||
for post in posts:
|
||||
if not isinstance(post, dict):
|
||||
continue
|
||||
if not (post.get("description") or post.get("title")):
|
||||
continue
|
||||
enriched = (
|
||||
apply_post_translation(post)
|
||||
if telegram_translate_enabled()
|
||||
else post
|
||||
)
|
||||
yield normalize_feed_item(enriched, source_type="telegram_osint")
|
||||
|
||||
|
||||
def iter_news_items(payload: list[dict[str, Any]] | None) -> Iterable[dict[str, Any]]:
|
||||
for item in list(payload or []):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
yield normalize_feed_item(item, source_type="news")
|
||||
for article in list(item.get("articles") or []):
|
||||
if isinstance(article, dict):
|
||||
yield normalize_feed_item(article, source_type="news_cluster")
|
||||
|
||||
|
||||
def iter_gdelt_features(payload: list[dict[str, Any]] | None) -> Iterable[dict[str, Any]]:
|
||||
for feature in list(payload or []):
|
||||
if not isinstance(feature, dict):
|
||||
continue
|
||||
props = dict(feature.get("properties") or {})
|
||||
geometry = dict(feature.get("geometry") or {})
|
||||
coords = None
|
||||
if geometry.get("type") == "Point":
|
||||
raw = geometry.get("coordinates")
|
||||
if isinstance(raw, (list, tuple)) and len(raw) >= 2:
|
||||
coords = [float(raw[1]), float(raw[0])]
|
||||
record = {
|
||||
"title": props.get("name") or props.get("title"),
|
||||
"description": props.get("snippet") or props.get("description"),
|
||||
"source": props.get("source") or "gdelt",
|
||||
"coords": coords,
|
||||
"published": props.get("date") or props.get("published"),
|
||||
"region": props.get("location") or props.get("country"),
|
||||
}
|
||||
if record["title"] or record["description"]:
|
||||
yield normalize_feed_item(record, source_type="gdelt")
|
||||
@@ -0,0 +1,128 @@
|
||||
"""Top strategic-risk alerts — ranked regions with map coordinates."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from analytics.integration import get_gt_engine
|
||||
from analytics.settings import get_gt_settings
|
||||
|
||||
|
||||
def _peak_score(props: dict[str, Any]) -> float:
|
||||
composite = float(props.get("risk") or 0.0)
|
||||
financial = float(props.get("financial") or 0.0)
|
||||
unrest = float(props.get("unrest") or 0.0)
|
||||
conflict = float(props.get("conflict") or 0.0)
|
||||
return max(composite, financial, unrest, conflict)
|
||||
|
||||
|
||||
def _valid_coords(coords: Any) -> tuple[float, float] | None:
|
||||
if not isinstance(coords, (list, tuple)) or len(coords) < 2:
|
||||
return None
|
||||
try:
|
||||
lng = float(coords[0])
|
||||
lat = float(coords[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not (-90.0 <= lat <= 90.0 and -180.0 <= lng <= 180.0):
|
||||
return None
|
||||
if abs(lat) < 0.001 and abs(lng) < 0.001:
|
||||
return None
|
||||
return lat, lng
|
||||
|
||||
|
||||
def _region_label(region: str) -> str:
|
||||
text = str(region or "").strip()
|
||||
if not text:
|
||||
return "unknown"
|
||||
if "," in text:
|
||||
parts = [piece.strip() for piece in text.split(",") if piece.strip()]
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
lat = float(parts[0])
|
||||
lng = float(parts[-1])
|
||||
return f"{lat:.2f}°, {lng:.2f}°"
|
||||
except ValueError:
|
||||
pass
|
||||
return text.replace("_", " ")
|
||||
|
||||
|
||||
def parse_heatmap_alerts(
|
||||
heatmap: dict[str, Any] | None,
|
||||
*,
|
||||
limit: int = 8,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""Return ranked alerts and count of regions plottable on the map."""
|
||||
features = (heatmap or {}).get("features") or []
|
||||
rows: list[dict[str, Any]] = []
|
||||
|
||||
for feature in features:
|
||||
if not isinstance(feature, dict):
|
||||
continue
|
||||
geometry = feature.get("geometry") or {}
|
||||
coords = _valid_coords(geometry.get("coordinates"))
|
||||
if coords is None:
|
||||
continue
|
||||
lat, lng = coords
|
||||
props = feature.get("properties") or {}
|
||||
region = str(props.get("region") or "").strip().lower()
|
||||
if not region:
|
||||
continue
|
||||
score = _peak_score(props)
|
||||
rows.append(
|
||||
{
|
||||
"region": region,
|
||||
"region_label": _region_label(region),
|
||||
"risk": round(float(props.get("risk") or 0.0), 4),
|
||||
"financial": round(float(props.get("financial") or 0.0), 4),
|
||||
"unrest": round(float(props.get("unrest") or 0.0), 4),
|
||||
"conflict": round(float(props.get("conflict") or 0.0), 4),
|
||||
"contagion": round(float(props.get("contagion") or 0.0), 4),
|
||||
"score": round(score, 4),
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"ignition": bool(props.get("micro_ignition")),
|
||||
"risk_3d_avg": props.get("risk_3d_avg"),
|
||||
"risk_delta": props.get("risk_delta"),
|
||||
"updates": int(props.get("updates") or 0),
|
||||
}
|
||||
)
|
||||
|
||||
rows.sort(
|
||||
key=lambda row: (
|
||||
bool(row.get("ignition")),
|
||||
float(row.get("risk_delta") or 0.0),
|
||||
float(row.get("score") or 0.0),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
return rows[: max(1, limit)], len(rows)
|
||||
|
||||
|
||||
def top_gt_alerts(*, limit: int = 8) -> dict[str, Any]:
|
||||
"""Ranked top regions for API / OpenClaw."""
|
||||
settings = get_gt_settings()
|
||||
engine = get_gt_engine()
|
||||
heatmap: dict[str, Any] = {"type": "FeatureCollection", "features": []}
|
||||
engine_regions = 0
|
||||
|
||||
if engine is not None:
|
||||
heatmap = engine.get_risk_heatmap()
|
||||
with engine._lock: # noqa: SLF001 — intentional meta read
|
||||
engine_regions = len(engine._regions)
|
||||
|
||||
alerts, plotted = parse_heatmap_alerts(heatmap, limit=limit)
|
||||
tracked = len(heatmap.get("features") or [])
|
||||
|
||||
return {
|
||||
"alerts": alerts,
|
||||
"tracked_regions": tracked,
|
||||
"engine_regions": engine_regions,
|
||||
"plotted_regions": plotted,
|
||||
"max_regions": settings.max_heatmap_features,
|
||||
"note": (
|
||||
"Layer count is tracked GT regions (cap "
|
||||
f"{settings.max_heatmap_features}), not raw feed events. "
|
||||
"Only regions with valid coordinates appear on the map."
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,593 @@
|
||||
"""Game-theoretic early warning analytics with Bayesian updating and contagion graph."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, DefaultDict
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
from analytics.settings import GTAnalyticsSettings, get_gt_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DomainName = str # financial | unrest | conflict
|
||||
|
||||
_DOMAINS: tuple[DomainName, ...] = ("financial", "unrest", "conflict")
|
||||
|
||||
_DEFAULT_LIKELIHOODS: dict[DomainName, dict[str, float]] = {
|
||||
"financial": {"distress": 0.75, "normal": 0.25},
|
||||
"unrest": {"distress": 0.82, "normal": 0.22},
|
||||
"conflict": {"distress": 0.78, "normal": 0.18},
|
||||
}
|
||||
|
||||
_DEFAULT_SIGNAL_WEIGHTS: dict[str, float] = {
|
||||
"payroll_loan": 3.0,
|
||||
"supply_delay": 2.2,
|
||||
"elite_relocation": 2.8,
|
||||
"purge": 3.5,
|
||||
"protest_mobilize": 2.5,
|
||||
"gps_jamming": 2.7,
|
||||
"troop_movement": 3.0,
|
||||
"bank_run": 3.2,
|
||||
"sanctions_escalation": 2.4,
|
||||
"ceasefire_break": 2.6,
|
||||
}
|
||||
|
||||
# Costly-signal regex patterns (cheap talk filtered by absence of match).
|
||||
_SIGNAL_PATTERNS: dict[str, list[re.Pattern[str]]] = {
|
||||
"payroll_loan": [
|
||||
re.compile(r"payroll\s+loan", re.I),
|
||||
re.compile(r"merchant\s+cash\s+advance", re.I),
|
||||
re.compile(r"working\s+capital\s+loan", re.I),
|
||||
],
|
||||
"supply_delay": [
|
||||
re.compile(r"supply\s+(chain\s+)?delay", re.I),
|
||||
re.compile(r"shipping\s+delay", re.I),
|
||||
re.compile(r"logistics\s+backlog", re.I),
|
||||
re.compile(r"port\s+congestion", re.I),
|
||||
],
|
||||
"elite_relocation": [
|
||||
re.compile(r"elite\s+(asset\s+)?relocation", re.I),
|
||||
re.compile(r"oligarch\s+jet", re.I),
|
||||
re.compile(r"private\s+jet\s+exodus", re.I),
|
||||
re.compile(r"capital\s+flight", re.I),
|
||||
],
|
||||
"purge": [
|
||||
re.compile(r"\bpurge\b", re.I),
|
||||
re.compile(r"political\s+purge", re.I),
|
||||
re.compile(r"security\s+apparatus\s+reshuffle", re.I),
|
||||
],
|
||||
"protest_mobilize": [
|
||||
re.compile(r"protest\s+mobil", re.I),
|
||||
re.compile(r"mass\s+rally", re.I),
|
||||
re.compile(r"general\s+strike", re.I),
|
||||
re.compile(r"\bstrike\b", re.I),
|
||||
re.compile(r"\brally\b", re.I),
|
||||
],
|
||||
"gps_jamming": [
|
||||
re.compile(r"gps\s+jam", re.I),
|
||||
re.compile(r"gnss\s+interference", re.I),
|
||||
re.compile(r"spoofing\s+spike", re.I),
|
||||
],
|
||||
"troop_movement": [
|
||||
re.compile(r"troop\s+movement", re.I),
|
||||
re.compile(r"military\s+mobil", re.I),
|
||||
re.compile(r"armored\s+convoy", re.I),
|
||||
re.compile(r"troop\s+buildup", re.I),
|
||||
],
|
||||
"bank_run": [
|
||||
re.compile(r"bank\s+run", re.I),
|
||||
re.compile(r"deposit\s+flight", re.I),
|
||||
re.compile(r"liquidity\s+crunch", re.I),
|
||||
],
|
||||
"sanctions_escalation": [
|
||||
re.compile(r"sanctions?\s+escalat", re.I),
|
||||
re.compile(r"new\s+sanctions?", re.I),
|
||||
re.compile(r"export\s+controls?\s+tighten", re.I),
|
||||
],
|
||||
"ceasefire_break": [
|
||||
re.compile(r"ceasefire\s+(broken|violated|collapse)", re.I),
|
||||
re.compile(r"truce\s+end", re.I),
|
||||
],
|
||||
}
|
||||
|
||||
_SIGNAL_DOMAINS: dict[str, DomainName] = {
|
||||
"payroll_loan": "financial",
|
||||
"supply_delay": "financial",
|
||||
"bank_run": "financial",
|
||||
"sanctions_escalation": "financial",
|
||||
"protest_mobilize": "unrest",
|
||||
"purge": "unrest",
|
||||
"elite_relocation": "financial",
|
||||
"gps_jamming": "conflict",
|
||||
"troop_movement": "conflict",
|
||||
"ceasefire_break": "conflict",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegionState:
|
||||
"""Per-region Bayesian beliefs and metadata."""
|
||||
|
||||
priors: dict[DomainName, float] = field(default_factory=lambda: defaultdict(float))
|
||||
coords: list[float] | None = None
|
||||
signal_volume: DefaultDict[str, float] = field(default_factory=lambda: defaultdict(float))
|
||||
update_count: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistoryEntry:
|
||||
timestamp: str
|
||||
domain: DomainName
|
||||
signals: dict[str, float]
|
||||
strength: float
|
||||
prior: float
|
||||
posterior: float
|
||||
source: str
|
||||
deviation_score: float
|
||||
|
||||
|
||||
class GT_EarlyWarning:
|
||||
"""
|
||||
Game-Theoretic Early Warning System with Bayesian updating.
|
||||
|
||||
Tracks distress probabilities per region/domain, classifies costly signals vs
|
||||
cheap talk, and propagates risk through an entity interaction graph.
|
||||
"""
|
||||
|
||||
def __init__(self, settings: GTAnalyticsSettings | None = None) -> None:
|
||||
self.settings = settings or get_gt_settings()
|
||||
self.G: nx.Graph = nx.Graph()
|
||||
self._regions: dict[str, RegionState] = {}
|
||||
self._history: dict[str, list[HistoryEntry]] = defaultdict(list)
|
||||
self._seen_item_ids: set[str] = set()
|
||||
self._lock = threading.RLock()
|
||||
|
||||
self.likelihoods = dict(_DEFAULT_LIKELIHOODS)
|
||||
self.signal_weights = dict(_DEFAULT_SIGNAL_WEIGHTS)
|
||||
self.signal_weights.update(self.settings.signal_weight_overrides)
|
||||
|
||||
self._base_prior = float(self.settings.base_prior)
|
||||
|
||||
def _utcnow(self) -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
def _region_state(self, region: str) -> RegionState:
|
||||
key = str(region or "global").strip().lower() or "global"
|
||||
if key not in self._regions:
|
||||
state = RegionState()
|
||||
for domain in _DOMAINS:
|
||||
state.priors[domain] = self._base_prior
|
||||
self._regions[key] = state
|
||||
return self._regions[key]
|
||||
|
||||
def get_prior(self, region: str, domain: DomainName) -> float:
|
||||
with self._lock:
|
||||
return float(self._region_state(region).priors.get(domain, self._base_prior))
|
||||
|
||||
def set_prior(self, region: str, domain: DomainName, value: float) -> None:
|
||||
with self._lock:
|
||||
state = self._region_state(region)
|
||||
state.priors[domain] = float(
|
||||
np.clip(value, self.settings.min_prob, self.settings.max_prob)
|
||||
)
|
||||
|
||||
def composite_risk(self, region: str) -> float:
|
||||
"""Weighted composite across domains (conflict weighted highest)."""
|
||||
weights = {"financial": 0.25, "unrest": 0.35, "conflict": 0.40}
|
||||
with self._lock:
|
||||
state = self._region_state(region)
|
||||
total = 0.0
|
||||
weight_sum = 0.0
|
||||
for domain, weight in weights.items():
|
||||
total += float(state.priors.get(domain, self._base_prior)) * weight
|
||||
weight_sum += weight
|
||||
return float(total / weight_sum) if weight_sum else self._base_prior
|
||||
|
||||
def classify_signals(self, text: str, source: str = "") -> dict[str, float]:
|
||||
"""Return weighted costly-signal strengths detected in text."""
|
||||
text_lower = (text or "").lower()
|
||||
signals: dict[str, float] = {}
|
||||
|
||||
for signal_name, patterns in _SIGNAL_PATTERNS.items():
|
||||
weight = float(self.signal_weights.get(signal_name, 1.0))
|
||||
if any(pattern.search(text_lower) for pattern in patterns):
|
||||
signals[signal_name] = weight
|
||||
|
||||
rally_strike_count = text_lower.count("rally") + text_lower.count("strike")
|
||||
if rally_strike_count > 3:
|
||||
signals["protest_mobilize"] = signals.get("protest_mobilize", 0.0) + 1.5
|
||||
|
||||
# Source credibility nudge (Telegram OSINT channels treated as moderate-cost signals).
|
||||
if source and "t.me/" in source.lower() and signals:
|
||||
for key in list(signals):
|
||||
signals[key] = round(signals[key] * 1.05, 3)
|
||||
|
||||
return signals
|
||||
|
||||
def _deviation_score(self, region: str, domain: DomainName, strength: float) -> float:
|
||||
"""Deviation from rolling regional norm — herding/coordination detector input."""
|
||||
with self._lock:
|
||||
state = self._region_state(region)
|
||||
baseline = max(state.signal_volume[domain], 1.0)
|
||||
state.signal_volume[domain] += strength
|
||||
state.update_count += 1
|
||||
return float(strength / baseline)
|
||||
|
||||
def bayesian_update(
|
||||
self,
|
||||
region: str,
|
||||
domain: DomainName,
|
||||
evidence_strength: float = 1.0,
|
||||
) -> float:
|
||||
"""
|
||||
Bayesian update: P(distress|evidence) from likelihood table and prior.
|
||||
|
||||
evidence_strength scales how far belief moves toward the likelihood posterior.
|
||||
"""
|
||||
domain = domain if domain in _DOMAINS else "financial"
|
||||
lik = self.likelihoods.get(domain, self.likelihoods["financial"])
|
||||
|
||||
with self._lock:
|
||||
state = self._region_state(region)
|
||||
prior = float(state.priors.get(domain, self._base_prior))
|
||||
|
||||
p_e_given_d = lik["distress"]
|
||||
p_e_given_not_d = lik["normal"]
|
||||
p_e = (p_e_given_d * prior) + (p_e_given_not_d * (1.0 - prior))
|
||||
|
||||
if p_e <= 0:
|
||||
posterior = prior
|
||||
else:
|
||||
posterior = (p_e_given_d * prior) / p_e
|
||||
|
||||
scaled = prior + (posterior - prior) * float(evidence_strength)
|
||||
clipped = float(np.clip(scaled, self.settings.min_prob, self.settings.max_prob))
|
||||
state.priors[domain] = clipped
|
||||
return clipped
|
||||
|
||||
def _update_graph(
|
||||
self,
|
||||
region: str,
|
||||
entities: list[str],
|
||||
strength: float,
|
||||
coords: list[float] | None,
|
||||
) -> None:
|
||||
region_key = str(region or "global").strip().lower() or "global"
|
||||
self.G.add_node(region_key, node_type="region", region=region_key)
|
||||
if coords and len(coords) >= 2:
|
||||
self.G.nodes[region_key]["coords"] = coords
|
||||
|
||||
for entity in entities:
|
||||
entity_key = str(entity).strip()
|
||||
if not entity_key:
|
||||
continue
|
||||
self.G.add_node(entity_key, node_type="entity", region=region_key)
|
||||
self.G.add_edge(
|
||||
region_key,
|
||||
entity_key,
|
||||
weight=float(strength),
|
||||
timestamp=self._utcnow(),
|
||||
)
|
||||
|
||||
for i, e1 in enumerate(entities):
|
||||
for e2 in entities[i + 1 :]:
|
||||
k1, k2 = str(e1).strip(), str(e2).strip()
|
||||
if not k1 or not k2:
|
||||
continue
|
||||
self.G.add_edge(
|
||||
k1,
|
||||
k2,
|
||||
weight=float(strength),
|
||||
timestamp=self._utcnow(),
|
||||
)
|
||||
|
||||
def process_feed_item(self, item: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Process one normalized feed item and update beliefs + contagion graph."""
|
||||
region = str(item.get("region") or item.get("geotag") or "global").strip().lower()
|
||||
text = str(item.get("text") or "")
|
||||
source = str(item.get("source") or "unknown")
|
||||
explicit_domain = str(item.get("domain") or "").strip().lower()
|
||||
entities = list(item.get("entities") or [])
|
||||
coords = item.get("coords")
|
||||
item_id = str(item.get("id") or f"{source}|{hash(text)}")
|
||||
|
||||
if self.settings.watched_channels:
|
||||
channel = ""
|
||||
for entity in entities:
|
||||
if str(entity).startswith("channel:"):
|
||||
channel = str(entity).split(":", 1)[-1].lower()
|
||||
break
|
||||
if channel and channel not in {c.lower() for c in self.settings.watched_channels}:
|
||||
return {
|
||||
"region": region,
|
||||
"skipped": True,
|
||||
"reason": "channel_not_watched",
|
||||
"risk_score": self.composite_risk(region),
|
||||
"signals": {},
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
if item_id and item_id in self._seen_item_ids:
|
||||
return {
|
||||
"region": region,
|
||||
"skipped": True,
|
||||
"reason": "duplicate",
|
||||
"risk_score": self.composite_risk(region),
|
||||
"signals": {},
|
||||
}
|
||||
if item_id:
|
||||
self._seen_item_ids.add(item_id)
|
||||
|
||||
signals = self.classify_signals(text, source)
|
||||
total_strength = float(sum(signals.values()))
|
||||
|
||||
if total_strength <= 0:
|
||||
return {
|
||||
"region": region,
|
||||
"risk_score": self.composite_risk(region),
|
||||
"signals": {},
|
||||
"contagion_potential": self._get_contagion_score(region),
|
||||
}
|
||||
|
||||
domains_touched: set[DomainName] = set()
|
||||
if explicit_domain in _DOMAINS:
|
||||
domains_touched.add(explicit_domain)
|
||||
for signal_name in signals:
|
||||
domains_touched.add(_SIGNAL_DOMAINS.get(signal_name, explicit_domain or "financial"))
|
||||
if not domains_touched:
|
||||
domains_touched.add("financial")
|
||||
|
||||
evidence_strength = min(
|
||||
total_strength / max(self.settings.evidence_scale, 0.1),
|
||||
self.settings.evidence_cap,
|
||||
)
|
||||
|
||||
posteriors: dict[str, float] = {}
|
||||
deviation = 0.0
|
||||
for domain in domains_touched:
|
||||
prior = self.get_prior(region, domain)
|
||||
deviation = max(deviation, self._deviation_score(region, domain, total_strength))
|
||||
posterior = self.bayesian_update(
|
||||
region=region,
|
||||
domain=domain,
|
||||
evidence_strength=evidence_strength * (1.0 + 0.15 * deviation),
|
||||
)
|
||||
posteriors[domain] = posterior
|
||||
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
with self._lock:
|
||||
state = self._region_state(region)
|
||||
try:
|
||||
state.coords = [float(coords[0]), float(coords[1])]
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
self._update_graph(region, entities, total_strength, coords if isinstance(coords, list) else None)
|
||||
|
||||
composite = self.composite_risk(region)
|
||||
entry = HistoryEntry(
|
||||
timestamp=self._utcnow(),
|
||||
domain=explicit_domain if explicit_domain in _DOMAINS else next(iter(domains_touched)),
|
||||
signals=signals,
|
||||
strength=total_strength,
|
||||
prior=self._base_prior,
|
||||
posterior=composite,
|
||||
source=source,
|
||||
deviation_score=deviation,
|
||||
)
|
||||
with self._lock:
|
||||
history = self._history[region]
|
||||
history.append(entry)
|
||||
max_hist = max(10, int(self.settings.max_history_per_region))
|
||||
if len(history) > max_hist:
|
||||
self._history[region] = history[-max_hist:]
|
||||
|
||||
logger.info(
|
||||
"GT update region=%s domains=%s composite=%.3f signals=%d deviation=%.2f",
|
||||
region,
|
||||
",".join(sorted(domains_touched)),
|
||||
composite,
|
||||
len(signals),
|
||||
deviation,
|
||||
)
|
||||
|
||||
return {
|
||||
"region": region,
|
||||
"domains": sorted(domains_touched),
|
||||
"domain_posteriors": posteriors,
|
||||
"risk_score": composite,
|
||||
"signals": signals,
|
||||
"deviation_score": deviation,
|
||||
"contagion_potential": self._get_contagion_score(region),
|
||||
"interpretation": self._interpret_risk(composite),
|
||||
}
|
||||
|
||||
def _interpret_risk(self, risk: float) -> str:
|
||||
threshold = float(self.settings.high_risk_threshold)
|
||||
if risk >= threshold:
|
||||
return (
|
||||
f"Elevated strategic risk ({risk:.2f} ≥ {threshold:.2f}). "
|
||||
"Watch for costly-signal clustering and cross-region contagion."
|
||||
)
|
||||
if risk >= threshold * 0.7:
|
||||
return "Moderate risk — monitor for herding and repeated costly signals."
|
||||
return "Baseline risk — no strong costly-signal cluster detected."
|
||||
|
||||
def _get_contagion_score(self, region: str) -> float:
|
||||
"""Graph-based contagion: mean composite risk of graph neighbors."""
|
||||
region_key = str(region or "global").strip().lower() or "global"
|
||||
with self._lock:
|
||||
if region_key not in self.G:
|
||||
return 0.0
|
||||
try:
|
||||
neighbors = list(self.G.neighbors(region_key))
|
||||
except nx.NetworkXError:
|
||||
return 0.0
|
||||
if not neighbors:
|
||||
return 0.0
|
||||
neighbor_risks = [self.composite_risk(str(n)) for n in neighbors]
|
||||
return float(np.mean(neighbor_risks))
|
||||
|
||||
def compute_herding_clusters(self) -> list[dict[str, Any]]:
|
||||
"""Louvain community detection on entity graph (coordination/herding proxy)."""
|
||||
with self._lock:
|
||||
if self.G.number_of_edges() == 0:
|
||||
return []
|
||||
|
||||
weighted = nx.Graph()
|
||||
for u, v, data in self.G.edges(data=True):
|
||||
weight = float(data.get("weight") or 0.0)
|
||||
if weight < self.settings.louvain_min_weight:
|
||||
continue
|
||||
if weighted.has_edge(u, v):
|
||||
weighted[u][v]["weight"] = weighted[u][v].get("weight", 0.0) + weight
|
||||
else:
|
||||
weighted.add_edge(u, v, weight=weight)
|
||||
|
||||
if weighted.number_of_edges() == 0:
|
||||
return []
|
||||
|
||||
try:
|
||||
communities = list(nx.community.louvain_communities(weighted, weight="weight", seed=42))
|
||||
except Exception as exc:
|
||||
logger.warning("Louvain clustering failed: %s", exc)
|
||||
return []
|
||||
|
||||
clusters: list[dict[str, Any]] = []
|
||||
for idx, community in enumerate(communities):
|
||||
members = sorted(str(node) for node in community)
|
||||
region_members = [m for m in members if m in self._regions]
|
||||
risks = [self.composite_risk(r) for r in region_members]
|
||||
clusters.append(
|
||||
{
|
||||
"cluster_id": idx,
|
||||
"size": len(members),
|
||||
"members": members[:50],
|
||||
"mean_risk": float(np.mean(risks)) if risks else self._base_prior,
|
||||
"regions": region_members,
|
||||
}
|
||||
)
|
||||
clusters.sort(key=lambda row: row["mean_risk"], reverse=True)
|
||||
return clusters
|
||||
|
||||
def get_risk_heatmap(self) -> dict[str, Any]:
|
||||
"""GeoJSON FeatureCollection for frontend risk overlay."""
|
||||
features: list[dict[str, Any]] = []
|
||||
with self._lock:
|
||||
items = list(self._regions.items())[: max(1, self.settings.max_heatmap_features)]
|
||||
|
||||
for region, state in items:
|
||||
coords = state.coords
|
||||
geometry: dict[str, Any]
|
||||
if coords and len(coords) >= 2:
|
||||
geometry = {"type": "Point", "coordinates": [float(coords[1]), float(coords[0])]}
|
||||
else:
|
||||
geometry = {"type": "Point", "coordinates": [0.0, 0.0]}
|
||||
|
||||
composite = self.composite_risk(region)
|
||||
features.append(
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {
|
||||
"region": region,
|
||||
"risk": round(composite, 4),
|
||||
"financial": round(float(state.priors.get("financial", self._base_prior)), 4),
|
||||
"unrest": round(float(state.priors.get("unrest", self._base_prior)), 4),
|
||||
"conflict": round(float(state.priors.get("conflict", self._base_prior)), 4),
|
||||
"contagion": round(self._get_contagion_score(region), 4),
|
||||
"updates": state.update_count,
|
||||
},
|
||||
"geometry": geometry,
|
||||
}
|
||||
)
|
||||
|
||||
return {"type": "FeatureCollection", "features": features}
|
||||
|
||||
def get_dossier(self, region: str) -> dict[str, Any]:
|
||||
"""Explainable GT rationale and recent signal history for a region."""
|
||||
region_key = str(region or "global").strip().lower() or "global"
|
||||
with self._lock:
|
||||
state = self._region_state(region_key)
|
||||
recent = list(self._history.get(region_key, [])[-10:])
|
||||
|
||||
composite = self.composite_risk(region_key)
|
||||
return {
|
||||
"region": region_key,
|
||||
"current_risk": round(composite, 4),
|
||||
"domain_risks": {
|
||||
domain: round(float(state.priors.get(domain, self._base_prior)), 4)
|
||||
for domain in _DOMAINS
|
||||
},
|
||||
"recent_signals": [
|
||||
{
|
||||
"timestamp": entry.timestamp,
|
||||
"domain": entry.domain,
|
||||
"signals": entry.signals,
|
||||
"strength": entry.strength,
|
||||
"posterior": round(entry.posterior, 4),
|
||||
"source": entry.source,
|
||||
"deviation_score": round(entry.deviation_score, 3),
|
||||
}
|
||||
for entry in recent
|
||||
],
|
||||
"contagion_risk": round(self._get_contagion_score(region_key), 4),
|
||||
"herding_clusters": self.compute_herding_clusters()[:5],
|
||||
"interpretation": self._interpret_risk(composite),
|
||||
"scenarios": self._build_scenarios(region_key, composite),
|
||||
}
|
||||
|
||||
def _build_scenarios(self, region: str, composite: float) -> list[dict[str, str]]:
|
||||
threshold = float(self.settings.high_risk_threshold)
|
||||
if composite < threshold * 0.7:
|
||||
return [
|
||||
{
|
||||
"name": "Status quo",
|
||||
"summary": "Signals remain diffuse; no coordinated costly-signal cascade.",
|
||||
}
|
||||
]
|
||||
if composite < threshold:
|
||||
return [
|
||||
{
|
||||
"name": "Escalation watch",
|
||||
"summary": "Rising costly-signal density — coordination risk within 4-8 weeks.",
|
||||
},
|
||||
{
|
||||
"name": "False alarm",
|
||||
"summary": "Cheap-talk amplification without follow-on costly signals.",
|
||||
},
|
||||
]
|
||||
return [
|
||||
{
|
||||
"name": "Contagion spread",
|
||||
"summary": "High posterior + graph coupling — adjacent regions likely to update upward.",
|
||||
},
|
||||
{
|
||||
"name": "Localized shock",
|
||||
"summary": "Region-specific distress; contagion limited if graph neighbors stay quiet.",
|
||||
},
|
||||
]
|
||||
|
||||
def snapshot(self) -> dict[str, Any]:
|
||||
"""Serialize engine state for debugging or persistence."""
|
||||
with self._lock:
|
||||
return {
|
||||
"regions": {
|
||||
region: {
|
||||
"priors": dict(state.priors),
|
||||
"coords": state.coords,
|
||||
"updates": state.update_count,
|
||||
}
|
||||
for region, state in self._regions.items()
|
||||
},
|
||||
"graph_nodes": self.G.number_of_nodes(),
|
||||
"graph_edges": self.G.number_of_edges(),
|
||||
"processed_items": len(self._seen_item_ids),
|
||||
}
|
||||
@@ -0,0 +1,649 @@
|
||||
"""Curated historical early-warning cases for GT backtesting.
|
||||
|
||||
Each positive case bundles pre-crisis costly-signal snippets drawn from documented
|
||||
precursors (financial, unrest, conflict). Negative cases are cheap-talk controls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
CaseKind = Literal["positive", "negative"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BacktestFeed:
|
||||
text: str
|
||||
source: str = "backtest"
|
||||
domain: str = "financial"
|
||||
days_before_event: int = 30
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HistoricalCase:
|
||||
"""Single labeled backtest scenario."""
|
||||
|
||||
case_id: str
|
||||
name: str
|
||||
region: str
|
||||
domain: str
|
||||
kind: CaseKind
|
||||
event_date: str
|
||||
description: str
|
||||
feeds: tuple[BacktestFeed, ...] = field(default_factory=tuple)
|
||||
tags: tuple[str, ...] = field(default_factory=tuple)
|
||||
|
||||
def to_feed_dicts(self) -> list[dict[str, Any]]:
|
||||
items: list[dict[str, Any]] = []
|
||||
for idx, feed in enumerate(self.feeds):
|
||||
items.append(
|
||||
{
|
||||
"id": f"{self.case_id}-{idx}",
|
||||
"text": feed.text,
|
||||
"source": feed.source,
|
||||
"region": self.region,
|
||||
"domain": feed.domain or self.domain,
|
||||
"published": feed.days_before_event,
|
||||
}
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _variant_case(case: HistoricalCase, suffix: str, feeds: tuple[BacktestFeed, ...]) -> HistoricalCase:
|
||||
return HistoricalCase(
|
||||
case_id=f"{case.case_id}__{suffix}",
|
||||
name=f"{case.name} ({suffix})",
|
||||
region=case.region,
|
||||
domain=case.domain,
|
||||
kind=case.kind,
|
||||
event_date=case.event_date,
|
||||
description=case.description,
|
||||
feeds=feeds,
|
||||
tags=case.tags + (f"variant:{suffix}",),
|
||||
)
|
||||
|
||||
|
||||
def expanded_historical_cases() -> tuple[HistoricalCase, ...]:
|
||||
"""Base suite plus paraphrase variants for statistical confidence."""
|
||||
base = list(default_historical_cases())
|
||||
extras: list[HistoricalCase] = []
|
||||
|
||||
variant_feeds: dict[str, tuple[tuple[BacktestFeed, ...], ...]] = {
|
||||
"fin_2008_us": (
|
||||
(
|
||||
BacktestFeed(
|
||||
"Small businesses turn to payroll loan products as credit lines freeze.",
|
||||
domain="financial",
|
||||
days_before_event=100,
|
||||
),
|
||||
BacktestFeed(
|
||||
"FDIC monitors liquidity crunch; interbank spreads widen sharply.",
|
||||
domain="financial",
|
||||
days_before_event=60,
|
||||
),
|
||||
),
|
||||
(
|
||||
BacktestFeed(
|
||||
"Merchant cash advance volumes spike; payroll loan demand at record highs.",
|
||||
domain="financial",
|
||||
days_before_event=80,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Money market funds see inflows as deposit flight from regional banks continues.",
|
||||
domain="financial",
|
||||
days_before_event=40,
|
||||
),
|
||||
),
|
||||
),
|
||||
"fin_2020_supply": (
|
||||
(
|
||||
BacktestFeed(
|
||||
"Electronics firms report shipping delay and port congestion across Pearl River Delta.",
|
||||
domain="financial",
|
||||
days_before_event=45,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Supply chain delay widens; logistics backlog hits automotive suppliers.",
|
||||
domain="financial",
|
||||
days_before_event=20,
|
||||
),
|
||||
),
|
||||
(
|
||||
BacktestFeed(
|
||||
"Container shortage fuels shipping delay; supply chain delay indices jump.",
|
||||
domain="financial",
|
||||
days_before_event=35,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Electronics assemblers warn of logistics backlog as port congestion spreads.",
|
||||
domain="financial",
|
||||
days_before_event=20,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Automotive suppliers flag supply chain delay after factory shutdowns in Hubei.",
|
||||
domain="financial",
|
||||
days_before_event=10,
|
||||
),
|
||||
),
|
||||
),
|
||||
"fin_2022_sanctions": (
|
||||
(
|
||||
BacktestFeed(
|
||||
"Treasury drafts new sanctions escalation package on energy and finance sectors.",
|
||||
domain="financial",
|
||||
days_before_event=30,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Capital flight accelerates; elite relocation flights depart Moscow airports.",
|
||||
domain="financial",
|
||||
days_before_event=14,
|
||||
),
|
||||
),
|
||||
),
|
||||
"unrest_arab_spring_egypt": (
|
||||
(
|
||||
BacktestFeed(
|
||||
"Cairo activists schedule mass rally; protest mobilization leaflets distributed.",
|
||||
domain="unrest",
|
||||
days_before_event=18,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Labor federations call general strike; strike posters cover downtown.",
|
||||
domain="unrest",
|
||||
days_before_event=8,
|
||||
),
|
||||
),
|
||||
),
|
||||
"conflict_2022_ukraine": (
|
||||
(
|
||||
BacktestFeed(
|
||||
"Convoy of armored vehicles confirms troop movement near Sumy Oblast.",
|
||||
source="t.me/war_monitor",
|
||||
domain="conflict",
|
||||
days_before_event=20,
|
||||
),
|
||||
BacktestFeed(
|
||||
"GNSS interference warnings follow GPS jamming spike along Belarus border.",
|
||||
source="t.me/osintdefender",
|
||||
domain="conflict",
|
||||
days_before_event=10,
|
||||
),
|
||||
),
|
||||
(
|
||||
BacktestFeed(
|
||||
"Military mobilization notices circulate; troop buildup confirmed by satellite firms.",
|
||||
domain="conflict",
|
||||
days_before_event=12,
|
||||
),
|
||||
),
|
||||
),
|
||||
"neg_weather_us": (
|
||||
(
|
||||
BacktestFeed("Autumn foliage peaks in Vermont; pleasant hiking weather continues."),
|
||||
BacktestFeed("County fair announces pie contest and livestock exhibitions."),
|
||||
),
|
||||
(
|
||||
BacktestFeed("Meteorologists predict mild hurricane season remainder for Gulf Coast."),
|
||||
),
|
||||
),
|
||||
"neg_sports_uk": (
|
||||
(
|
||||
BacktestFeed("Rugby Six Nations standings update after weekend fixtures."),
|
||||
BacktestFeed("Local marathon registration opens for charity runners."),
|
||||
),
|
||||
),
|
||||
"neg_tech_global": (
|
||||
(
|
||||
BacktestFeed("Chipmaker announces efficiency gains in next-generation processor."),
|
||||
BacktestFeed("Cloud provider opens new green datacenter in Nordic region."),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
for case in base:
|
||||
variants = variant_feeds.get(case.case_id, ())
|
||||
for idx, feeds in enumerate(variants):
|
||||
extras.append(_variant_case(case, f"v{idx+1}", feeds))
|
||||
|
||||
# Additional cheap-talk controls to widen negative sample
|
||||
cheap_talk_regions = (
|
||||
("australia", "Museum opens contemporary art exhibit to strong attendance."),
|
||||
("spain", "Tomato harvest festival scheduled; regional trains add weekend service."),
|
||||
("south_korea", "K-pop group announces world tour dates for autumn."),
|
||||
("mexico", "Coastal cleanup volunteers restore beach habitats before holiday season."),
|
||||
("sweden", "City council approves bike lane expansion along waterfront."),
|
||||
("norway", "Salmon exports remain stable; fishing fleets report normal catch volumes."),
|
||||
("italy", "Truffle festival returns; restaurants publish seasonal tasting menus."),
|
||||
("poland", "University researchers release open-source astronomy software."),
|
||||
("thailand", "Monsoon rains ease; rice planting proceeds on normal schedule."),
|
||||
("vietnam", "Electronics assembly plants report steady export order books."),
|
||||
("south_africa", "Wildlife reserve reports rising ecotourism bookings."),
|
||||
("argentina", "Wine harvest festival opens; export cooperatives meet volume targets."),
|
||||
("netherlands", "Cycling championship draws international teams to canal district."),
|
||||
("belgium", "Chocolate exporters report stable holiday shipment schedules."),
|
||||
("portugal", "Offshore wind auction attracts multiple renewable bidders."),
|
||||
("greece", "Island ferry operators add routes ahead of summer travel season."),
|
||||
("turkey", "Cotton harvest forecast unchanged; textile orders stable."),
|
||||
("indonesia", "Volcano monitoring reports routine activity; tourism continues."),
|
||||
("philippines", "Coconut processors report normal logistics to export markets."),
|
||||
("malaysia", "Palm oil shipments on schedule; port throughput normal."),
|
||||
("new_zealand", "Sheep shearing competition draws rural crowds."),
|
||||
("ireland", "Tech conference highlights open-source database tooling."),
|
||||
("finland", "Sauna culture festival celebrates heritage with local artisans."),
|
||||
("denmark", "Wind turbine maintenance contracts renewed on prior terms."),
|
||||
("austria", "Ski resorts prepare slopes after early snowfall."),
|
||||
("switzerland", "Watchmakers unveil mechanical movement prototypes at trade fair."),
|
||||
("czech_republic", "Glassmakers export decorative pieces ahead of holiday season."),
|
||||
("romania", "Carpathian hiking trails reopen after spring maintenance."),
|
||||
("hungary", "Thermal bath tourism bookings rise for winter wellness season."),
|
||||
("peru", "Coffee cooperatives report stable harvest and export schedules."),
|
||||
("colombia", "Flower exporters prepare Valentine's shipments on normal cadence."),
|
||||
("morocco", "Citrus harvest meets forecasts; agricultural credit unchanged."),
|
||||
("kenya", "Tea auction volumes steady; freight routes operate normally."),
|
||||
("nigeria", "Nollywood studio announces family comedy release dates."),
|
||||
("ethiopia", "Coffee ceremony festival highlights regional bean varieties."),
|
||||
("saudi_arabia", "Desert conservation project plants drought-resistant shrubs."),
|
||||
("uae", "Airport duty-free operators expand luxury retail concourse."),
|
||||
("qatar", "Stadium operators prepare hospitality packages for sporting events."),
|
||||
("singapore", "Port authority reports container throughput on seasonal trend."),
|
||||
("hong_kong", "Art auction previews draw collectors to harborfront gallery."),
|
||||
("chile", "Vineyard tours report strong bookings ahead of harvest festival weekend."),
|
||||
("uruguay", "Beef exporters maintain steady shipment schedules to European buyers."),
|
||||
("iceland", "Geothermal spa resorts report normal winter visitor volumes."),
|
||||
("luxembourg", "Fund administrators publish routine quarterly disclosure filings."),
|
||||
("slovakia", "Mountain lodges prepare ski season openings after early snowfall."),
|
||||
("croatia", "Adriatic ferry operators add summer routes on prior timetable."),
|
||||
("bulgaria", "Rose oil cooperatives report stable export volumes to fragrance buyers."),
|
||||
("serbia", "Danube barge traffic proceeds on normal freight schedules."),
|
||||
("latvia", "Timber mills export lumber on unchanged contract terms."),
|
||||
("lithuania", "Baltic wind farms complete scheduled turbine maintenance rotations."),
|
||||
("estonia", "Digital residency applications processed at routine monthly pace."),
|
||||
("panama", "Canal transit volumes remain on seasonal trend; shipping fees unchanged."),
|
||||
)
|
||||
for idx, (region, text) in enumerate(cheap_talk_regions):
|
||||
extras.append(
|
||||
HistoricalCase(
|
||||
case_id=f"neg_extra_{idx:02d}",
|
||||
name=f"Benign regional news ({region})",
|
||||
region=region,
|
||||
domain="financial",
|
||||
kind="negative",
|
||||
event_date="2020-01-01",
|
||||
description="Expanded cheap-talk control.",
|
||||
feeds=(BacktestFeed(text),),
|
||||
tags=("control", "expanded"),
|
||||
)
|
||||
)
|
||||
|
||||
return tuple(base + extras)
|
||||
|
||||
|
||||
def default_historical_cases() -> tuple[HistoricalCase, ...]:
|
||||
"""Benchmark suite — expand as new validated precursors are added."""
|
||||
return (
|
||||
# ── Financial distress ─────────────────────────────────────────────
|
||||
HistoricalCase(
|
||||
case_id="fin_2008_us",
|
||||
name="2008 US financial crisis",
|
||||
region="united_states",
|
||||
domain="financial",
|
||||
kind="positive",
|
||||
event_date="2008-09-15",
|
||||
description="Payroll-loan distress, liquidity crunch, and deposit flight precursors.",
|
||||
tags=("2008", "financial", "lehman"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Franchise operators increasingly rely on payroll loan facilities as working capital tightens.",
|
||||
domain="financial",
|
||||
days_before_event=120,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Regional banks report liquidity crunch; CFOs warn of merchant cash advance reliance.",
|
||||
domain="financial",
|
||||
days_before_event=90,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Deposit flight accelerates at mid-size lenders; analysts flag bank run risk.",
|
||||
domain="financial",
|
||||
days_before_event=45,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="fin_2020_supply",
|
||||
name="COVID supply-chain shock",
|
||||
region="china",
|
||||
domain="financial",
|
||||
kind="positive",
|
||||
event_date="2020-02-01",
|
||||
description="Port congestion and logistics backlog ahead of global supply shock.",
|
||||
tags=("covid", "supply_chain", "financial"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Major port congestion reported; shipping delay spreads to electronics suppliers.",
|
||||
domain="financial",
|
||||
days_before_event=60,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Automakers warn of supply chain delay and logistics backlog across Wuhan corridor.",
|
||||
domain="financial",
|
||||
days_before_event=30,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Factory restarts slip as supply delay and port congestion persist into Q1.",
|
||||
domain="financial",
|
||||
days_before_event=14,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="fin_2022_sanctions",
|
||||
name="Russia sanctions escalation",
|
||||
region="russia",
|
||||
domain="financial",
|
||||
kind="positive",
|
||||
event_date="2022-02-24",
|
||||
description="Sanctions escalation and capital flight ahead of invasion.",
|
||||
tags=("sanctions", "ukraine", "financial"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Western allies prepare new sanctions escalation on major Russian banks.",
|
||||
domain="financial",
|
||||
days_before_event=45,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Oligarch jet movements suggest elite relocation and capital flight from Moscow.",
|
||||
domain="financial",
|
||||
days_before_event=21,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Central bank intervenes as new sanctions tighten export controls on finance sector.",
|
||||
domain="financial",
|
||||
days_before_event=10,
|
||||
),
|
||||
),
|
||||
),
|
||||
# ── Civil unrest ─────────────────────────────────────────────────
|
||||
HistoricalCase(
|
||||
case_id="unrest_arab_spring_tunisia",
|
||||
name="Arab Spring — Tunisia",
|
||||
region="tunisia",
|
||||
domain="unrest",
|
||||
kind="positive",
|
||||
event_date="2010-12-17",
|
||||
description="Protest mobilization and strike waves before Jasmine Revolution.",
|
||||
tags=("arab_spring", "unrest"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Student groups announce protest mobilization after vendor self-immolation.",
|
||||
domain="unrest",
|
||||
days_before_event=14,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Mass rally planned in Tunis; general strike called by labor unions.",
|
||||
domain="unrest",
|
||||
days_before_event=7,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="unrest_arab_spring_egypt",
|
||||
name="Arab Spring — Egypt",
|
||||
region="egypt",
|
||||
domain="unrest",
|
||||
kind="positive",
|
||||
event_date="2011-01-25",
|
||||
description="Mobilization spikes and security reshuffles before Tahrir.",
|
||||
tags=("arab_spring", "unrest"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Opposition calls protest mobilization in Cairo; strike notices circulate online.",
|
||||
domain="unrest",
|
||||
days_before_event=21,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Reports of political purge within interior ministry security apparatus reshuffle.",
|
||||
domain="unrest",
|
||||
days_before_event=10,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Mass rally and strike coordination spreads; rally posters appear in Alexandria.",
|
||||
domain="unrest",
|
||||
days_before_event=5,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="unrest_2019_chile",
|
||||
name="Chile 2019 metro protests",
|
||||
region="chile",
|
||||
domain="unrest",
|
||||
kind="positive",
|
||||
event_date="2019-10-18",
|
||||
description="Transit fare protests escalate to general strike.",
|
||||
tags=("unrest", "latam"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Students organize mass rally after metro fare hike; protest mobilization trending.",
|
||||
domain="unrest",
|
||||
days_before_event=10,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Unions announce general strike; rally and strike hashtags spike nationwide.",
|
||||
domain="unrest",
|
||||
days_before_event=3,
|
||||
),
|
||||
),
|
||||
),
|
||||
# ── Conflict / war ───────────────────────────────────────────────
|
||||
HistoricalCase(
|
||||
case_id="conflict_2022_ukraine",
|
||||
name="2022 Ukraine invasion buildup",
|
||||
region="ukraine",
|
||||
domain="conflict",
|
||||
kind="positive",
|
||||
event_date="2022-02-24",
|
||||
description="Troop movement and GPS jamming precursors on northern border.",
|
||||
tags=("ukraine", "conflict"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"OSINT reports troop movement and armored convoy near Belarus border.",
|
||||
source="t.me/war_monitor",
|
||||
domain="conflict",
|
||||
days_before_event=30,
|
||||
),
|
||||
BacktestFeed(
|
||||
"GPS jamming spike reported along northern corridor; GNSS interference warnings issued.",
|
||||
source="t.me/osintdefender",
|
||||
domain="conflict",
|
||||
days_before_event=14,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Satellite imagery shows troop buildup; military mobilization near Kharkiv axis.",
|
||||
domain="conflict",
|
||||
days_before_event=7,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="conflict_2023_gaza",
|
||||
name="2023 Gaza conflict escalation",
|
||||
region="israel",
|
||||
domain="conflict",
|
||||
kind="positive",
|
||||
event_date="2023-10-07",
|
||||
description="Ceasefire breakdown and troop movement signals.",
|
||||
tags=("gaza", "conflict"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Border units report troop movement near Gaza envelope; ceasefire broken overnight.",
|
||||
domain="conflict",
|
||||
days_before_event=14,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Truce end announced; armored convoy repositioning reported by local observers.",
|
||||
domain="conflict",
|
||||
days_before_event=5,
|
||||
),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="conflict_2020_nagorno",
|
||||
name="2020 Nagorno-Karabakh renewal",
|
||||
region="armenia",
|
||||
domain="conflict",
|
||||
kind="positive",
|
||||
event_date="2020-09-27",
|
||||
description="Artillery and troop buildup precursors.",
|
||||
tags=("caucasus", "conflict"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Drone strikes reported on line of contact; troop movement on Armenian-Azeri border.",
|
||||
domain="conflict",
|
||||
days_before_event=21,
|
||||
),
|
||||
BacktestFeed(
|
||||
"GPS jamming spike reported in conflict zone; military mobilization notices leaked.",
|
||||
domain="conflict",
|
||||
days_before_event=7,
|
||||
),
|
||||
),
|
||||
),
|
||||
# ── Recent financial / corporate distress pattern ────────────────
|
||||
HistoricalCase(
|
||||
case_id="fin_2023_banking",
|
||||
name="2023 regional banking stress",
|
||||
region="united_states",
|
||||
domain="financial",
|
||||
kind="positive",
|
||||
event_date="2023-03-10",
|
||||
description="Deposit flight and liquidity stress (SVB precursor pattern).",
|
||||
tags=("svb", "financial", "2023"),
|
||||
feeds=(
|
||||
BacktestFeed(
|
||||
"Tech lenders face deposit flight; VC portfolio companies move payroll to money market funds.",
|
||||
domain="financial",
|
||||
days_before_event=21,
|
||||
),
|
||||
BacktestFeed(
|
||||
"Analysts warn liquidity crunch at regional banks holding long-duration bonds.",
|
||||
domain="financial",
|
||||
days_before_event=7,
|
||||
),
|
||||
),
|
||||
),
|
||||
# ── Negative controls (cheap talk / benign) ─────────────────────
|
||||
HistoricalCase(
|
||||
case_id="neg_weather_us",
|
||||
name="Benign weather coverage",
|
||||
region="united_states",
|
||||
domain="financial",
|
||||
kind="negative",
|
||||
event_date="2019-06-01",
|
||||
description="No costly signals — should remain near baseline.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Sunny weekend expected across the Midwest with mild temperatures."),
|
||||
BacktestFeed("Local festival draws crowds; farmers market expands summer hours."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_sports_uk",
|
||||
name="Benign sports coverage",
|
||||
region="uk",
|
||||
domain="unrest",
|
||||
kind="negative",
|
||||
event_date="2018-07-01",
|
||||
description="Sports chatter without mobilization costly signals.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Premier league season review: top scorers and transfer rumors."),
|
||||
BacktestFeed("Cricket test match ends early due to rain delay at Lord's."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_tech_global",
|
||||
name="Benign tech product launch",
|
||||
region="global",
|
||||
domain="financial",
|
||||
kind="negative",
|
||||
event_date="2021-09-01",
|
||||
description="Corporate product news without distress markers.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Smartphone maker unveils new camera features at annual keynote."),
|
||||
BacktestFeed("Quarterly earnings beat expectations; dividend unchanged."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_tourism_france",
|
||||
name="Benign tourism recovery",
|
||||
region="france",
|
||||
domain="unrest",
|
||||
kind="negative",
|
||||
event_date="2022-08-01",
|
||||
description="Travel sector recovery without unrest signals.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Paris hotels report record summer bookings as tourism rebounds."),
|
||||
BacktestFeed("Airline adds routes to Nice and Marseille for holiday travelers."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_science_japan",
|
||||
name="Benign science news",
|
||||
region="japan",
|
||||
domain="conflict",
|
||||
kind="negative",
|
||||
event_date="2020-11-01",
|
||||
description="Research coverage without conflict markers.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Astronomy team publishes comet observations from Mount Fuji observatory."),
|
||||
BacktestFeed("Robotics lab demonstrates warehouse automation prototype."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_agriculture_brazil",
|
||||
name="Benign agriculture report",
|
||||
region="brazil",
|
||||
domain="financial",
|
||||
kind="negative",
|
||||
event_date="2017-03-01",
|
||||
description="Commodity harvest update without supply distress.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Soybean harvest forecast revised upward; export volumes steady."),
|
||||
BacktestFeed("Coffee cooperative reports normal shipping schedules to European buyers."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_culture_india",
|
||||
name="Benign culture coverage",
|
||||
region="india",
|
||||
domain="unrest",
|
||||
kind="negative",
|
||||
event_date="2016-11-01",
|
||||
description="Festival coverage without mobilization.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("Diwali celebrations begin; cities decorate markets with lights."),
|
||||
BacktestFeed("Film festival opens in Mumbai with premiere screenings."),
|
||||
),
|
||||
),
|
||||
HistoricalCase(
|
||||
case_id="neg_infrastructure_canada",
|
||||
name="Benign infrastructure ribbon-cutting",
|
||||
region="canada",
|
||||
domain="financial",
|
||||
kind="negative",
|
||||
event_date="2015-05-01",
|
||||
description="Municipal news without financial stress.",
|
||||
tags=("control",),
|
||||
feeds=(
|
||||
BacktestFeed("New light-rail segment opens on schedule; commute times improve."),
|
||||
BacktestFeed("Municipal bond issuance funds library renovation at prior rates."),
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,198 @@
|
||||
"""Singleton GT engine and feed-batch integration hooks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from analytics.feed_adapter import iter_gdelt_features, iter_news_items, iter_telegram_posts
|
||||
from analytics.gt_early_warning import GT_EarlyWarning
|
||||
from analytics.settings import gt_analytics_enabled, get_gt_settings, gt_engine_operational, gt_louvain_enabled, gt_scheduled_ingest_enabled
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_engine: GT_EarlyWarning | None = None
|
||||
_engine_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_gt_engine() -> GT_EarlyWarning | None:
|
||||
"""Return the shared engine when analytics are enabled and runtime allows it."""
|
||||
global _engine
|
||||
if not gt_engine_operational():
|
||||
return None
|
||||
with _engine_lock:
|
||||
if _engine is None:
|
||||
_engine = GT_EarlyWarning(get_gt_settings())
|
||||
logger.info("Strategic Risk Analytics engine initialized")
|
||||
return _engine
|
||||
|
||||
|
||||
def reset_gt_engine() -> None:
|
||||
"""Reset singleton — intended for tests."""
|
||||
global _engine
|
||||
get_gt_settings.cache_clear()
|
||||
with _engine_lock:
|
||||
_engine = None
|
||||
|
||||
|
||||
def process_feed_item(item: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Process a normalized feed item if analytics are enabled."""
|
||||
engine = get_gt_engine()
|
||||
if engine is None:
|
||||
return None
|
||||
try:
|
||||
return engine.process_feed_item(item)
|
||||
except Exception:
|
||||
logger.exception("GT process_feed_item failed")
|
||||
return None
|
||||
|
||||
|
||||
def _persist_gt_snapshot(
|
||||
engine: GT_EarlyWarning,
|
||||
*,
|
||||
processed: int,
|
||||
sample: list[dict[str, Any]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
heatmap = engine.get_risk_heatmap()
|
||||
micro_summary: dict[str, Any] = {}
|
||||
try:
|
||||
from analytics.micro_rolling import capture_daily_readings, enrich_heatmap_features
|
||||
|
||||
micro_summary = capture_daily_readings(engine)
|
||||
heatmap = enrich_heatmap_features(heatmap)
|
||||
except Exception:
|
||||
logger.exception("GT micro rolling capture failed")
|
||||
|
||||
clusters = engine.compute_herding_clusters()
|
||||
from analytics.gt_alerts import parse_heatmap_alerts
|
||||
|
||||
_, plotted_regions = parse_heatmap_alerts(heatmap)
|
||||
with engine._lock: # noqa: SLF001 — snapshot meta
|
||||
engine_regions = len(engine._regions)
|
||||
settings = get_gt_settings()
|
||||
payload = {
|
||||
"enabled": True,
|
||||
"timestamp": timestamp,
|
||||
"processed": processed,
|
||||
"heatmap": heatmap,
|
||||
"clusters": clusters,
|
||||
"sample": list(sample or [])[:5],
|
||||
"regions": len(heatmap.get("features") or []),
|
||||
"micro": micro_summary,
|
||||
"meta": {
|
||||
"tracked_regions": len(heatmap.get("features") or []),
|
||||
"engine_regions": engine_regions,
|
||||
"plotted_regions": plotted_regions,
|
||||
"max_regions": settings.max_heatmap_features,
|
||||
},
|
||||
}
|
||||
with _data_lock:
|
||||
latest_data["gt_risk"] = payload
|
||||
_mark_fresh("gt_risk")
|
||||
return payload
|
||||
|
||||
|
||||
def refresh_from_latest_data(
|
||||
data_snapshot: dict[str, Any],
|
||||
*,
|
||||
persist: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Batch-ingest recent intel layers from the shared data store.
|
||||
|
||||
Intended to run after telegram/news/gdelt fetch cycles (near-real-time).
|
||||
"""
|
||||
engine = get_gt_engine()
|
||||
if engine is None:
|
||||
return {"enabled": False, "processed": 0}
|
||||
|
||||
processed = 0
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
for item in iter_telegram_posts(data_snapshot.get("telegram_osint")):
|
||||
result = engine.process_feed_item(item)
|
||||
if result and not result.get("skipped"):
|
||||
processed += 1
|
||||
results.append(result)
|
||||
|
||||
for item in iter_news_items(data_snapshot.get("news")):
|
||||
result = engine.process_feed_item(item)
|
||||
if result and not result.get("skipped"):
|
||||
processed += 1
|
||||
if len(results) < 5:
|
||||
results.append(result)
|
||||
|
||||
for item in iter_gdelt_features(data_snapshot.get("gdelt")):
|
||||
result = engine.process_feed_item(item)
|
||||
if result and not result.get("skipped"):
|
||||
processed += 1
|
||||
|
||||
logger.info("GT refresh processed %d items", processed)
|
||||
summary = {
|
||||
"enabled": True,
|
||||
"processed": processed,
|
||||
"sample": results[:5],
|
||||
"heatmap_features": len(engine.get_risk_heatmap().get("features") or []),
|
||||
}
|
||||
if persist:
|
||||
snapshot = _persist_gt_snapshot(engine, processed=processed, sample=results)
|
||||
summary["timestamp"] = snapshot.get("timestamp")
|
||||
summary["clusters"] = len(snapshot.get("clusters") or [])
|
||||
return summary
|
||||
|
||||
|
||||
def recompute_gt_herding_clusters() -> dict[str, Any]:
|
||||
"""Louvain community pass — run on a schedule independent of feed ingest."""
|
||||
if not gt_louvain_enabled():
|
||||
return {"enabled": False, "clusters": 0, "reason": "louvain_disabled_on_lean_profile"}
|
||||
|
||||
engine = get_gt_engine()
|
||||
if engine is None:
|
||||
return {"enabled": False, "clusters": 0}
|
||||
|
||||
clusters = engine.compute_herding_clusters()
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
with _data_lock:
|
||||
current = dict(latest_data.get("gt_risk") or {})
|
||||
current["clusters"] = clusters
|
||||
current["clusters_updated"] = timestamp
|
||||
current["enabled"] = True
|
||||
latest_data["gt_risk"] = current
|
||||
_mark_fresh("gt_risk")
|
||||
logger.info("GT Louvain recompute: %d clusters", len(clusters))
|
||||
return {"enabled": True, "clusters": len(clusters), "timestamp": timestamp}
|
||||
|
||||
|
||||
def maybe_refresh_gt_analytics() -> None:
|
||||
"""Hook for data_fetcher — no-op when analytics are disabled or lean-gated."""
|
||||
if not gt_scheduled_ingest_enabled():
|
||||
return
|
||||
try:
|
||||
with _data_lock:
|
||||
snapshot = dict(latest_data)
|
||||
refresh_from_latest_data(snapshot, persist=True)
|
||||
except Exception:
|
||||
logger.exception("GT analytics refresh failed")
|
||||
|
||||
|
||||
def maybe_freeze_gt_weekly_snapshot() -> None:
|
||||
"""Hook for weekly scheduler — freeze operational backtest snapshot."""
|
||||
if not gt_engine_operational():
|
||||
return
|
||||
try:
|
||||
from analytics.rolling_backtest import freeze_weekly_snapshot
|
||||
|
||||
result = freeze_weekly_snapshot(frozen_by="scheduler")
|
||||
if result.get("created"):
|
||||
logger.info(
|
||||
"GT rolling freeze: week=%s regions=%s alerts=%s",
|
||||
result.get("week_id"),
|
||||
result.get("region_count"),
|
||||
result.get("alert_count"),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("GT rolling weekly freeze failed")
|
||||
@@ -0,0 +1,361 @@
|
||||
"""Micro rolling 3-day average — fast ignition signal alongside weekly macro."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from analytics.daily_store import (
|
||||
DailyRegionReading,
|
||||
DailySnapshot,
|
||||
date_id,
|
||||
list_daily_ids,
|
||||
load_daily,
|
||||
save_daily,
|
||||
utc_now_iso,
|
||||
utc_today,
|
||||
)
|
||||
from analytics.gt_early_warning import GT_EarlyWarning
|
||||
from analytics.rolling_backtest import rolling_alert_threshold
|
||||
|
||||
DEFAULT_WINDOW_DAYS = 3
|
||||
DEFAULT_IGNITION_DELTA = 0.10
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = str(os.environ.get(name, "")).strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return max(1, int(raw))
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = str(os.environ.get(name, "")).strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def micro_window_days() -> int:
|
||||
return _env_int("GT_MICRO_ROLLING_DAYS", DEFAULT_WINDOW_DAYS)
|
||||
|
||||
|
||||
def ignition_delta() -> float:
|
||||
return _env_float("GT_MICRO_IGNITION_DELTA", DEFAULT_IGNITION_DELTA)
|
||||
|
||||
|
||||
def _peak_score(
|
||||
*,
|
||||
composite: float,
|
||||
financial: float,
|
||||
unrest: float,
|
||||
conflict: float,
|
||||
) -> float:
|
||||
return max(composite, financial, unrest, conflict)
|
||||
|
||||
|
||||
def _region_reading_from_feature(
|
||||
feature: dict[str, Any],
|
||||
*,
|
||||
captured_at: str,
|
||||
) -> DailyRegionReading | None:
|
||||
props = feature.get("properties") or {}
|
||||
region = str(props.get("region") or "").strip().lower()
|
||||
if not region:
|
||||
return None
|
||||
composite = float(props.get("risk") or props.get("composite_risk") or 0.0)
|
||||
financial = float(props.get("financial") or 0.0)
|
||||
unrest = float(props.get("unrest") or 0.0)
|
||||
conflict = float(props.get("conflict") or 0.0)
|
||||
peak = _peak_score(
|
||||
composite=composite,
|
||||
financial=financial,
|
||||
unrest=unrest,
|
||||
conflict=conflict,
|
||||
)
|
||||
return DailyRegionReading(
|
||||
region=region,
|
||||
composite_risk=composite,
|
||||
financial=financial,
|
||||
unrest=unrest,
|
||||
conflict=conflict,
|
||||
peak_score=peak,
|
||||
readings=1,
|
||||
last_captured_at=captured_at,
|
||||
)
|
||||
|
||||
|
||||
def capture_daily_readings(
|
||||
engine: GT_EarlyWarning,
|
||||
*,
|
||||
when: date | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Upsert today's regional readings from the live heatmap.
|
||||
|
||||
Each GT refresh updates the current day's latest scores (rolling window
|
||||
uses one value per calendar day).
|
||||
"""
|
||||
day = when or utc_today()
|
||||
day_key = date_id(day)
|
||||
captured_at = utc_now_iso()
|
||||
heatmap = engine.get_risk_heatmap()
|
||||
existing = load_daily(day) or DailySnapshot(date=day_key, regions={})
|
||||
|
||||
updated = 0
|
||||
for feature in heatmap.get("features") or []:
|
||||
if not isinstance(feature, dict):
|
||||
continue
|
||||
reading = _region_reading_from_feature(feature, captured_at=captured_at)
|
||||
if reading is None:
|
||||
continue
|
||||
prior = existing.regions.get(reading.region)
|
||||
if prior is None:
|
||||
existing.regions[reading.region] = reading
|
||||
updated += 1
|
||||
continue
|
||||
prior.composite_risk = reading.composite_risk
|
||||
prior.financial = reading.financial
|
||||
prior.unrest = reading.unrest
|
||||
prior.conflict = reading.conflict
|
||||
prior.peak_score = max(prior.peak_score, reading.peak_score)
|
||||
prior.readings += 1
|
||||
prior.last_captured_at = captured_at
|
||||
updated += 1
|
||||
|
||||
existing.last_updated_at = captured_at
|
||||
save_daily(existing)
|
||||
return {
|
||||
"date": day_key,
|
||||
"regions": len(existing.regions),
|
||||
"updated": updated,
|
||||
"captured_at": captured_at,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MicroRegionView:
|
||||
region: str
|
||||
spot_risk: float
|
||||
risk_3d_avg: float
|
||||
risk_delta: float
|
||||
days_in_window: int
|
||||
day_scores: tuple[float, ...]
|
||||
alerted_spot: bool
|
||||
alerted_3d: bool
|
||||
ignition: bool
|
||||
financial: float
|
||||
unrest: float
|
||||
conflict: float
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"region": self.region,
|
||||
"spot_risk": round(self.spot_risk, 4),
|
||||
"risk_3d_avg": round(self.risk_3d_avg, 4),
|
||||
"risk_delta": round(self.risk_delta, 4),
|
||||
"days_in_window": self.days_in_window,
|
||||
"day_scores": [round(score, 4) for score in self.day_scores],
|
||||
"alerted_spot": self.alerted_spot,
|
||||
"alerted_3d": self.alerted_3d,
|
||||
"ignition": self.ignition,
|
||||
"financial": round(self.financial, 4),
|
||||
"unrest": round(self.unrest, 4),
|
||||
"conflict": round(self.conflict, 4),
|
||||
}
|
||||
|
||||
|
||||
def _day_offsets(window_days: int) -> list[int]:
|
||||
# Today + prior (window_days - 1) days.
|
||||
return list(range(window_days - 1, -1, -1))
|
||||
|
||||
|
||||
def _historical_dates(as_of: date, window_days: int) -> list[date]:
|
||||
return [as_of - timedelta(days=offset) for offset in _day_offsets(window_days)]
|
||||
|
||||
|
||||
def compute_micro_view(
|
||||
region: str,
|
||||
*,
|
||||
as_of: date | None = None,
|
||||
window_days: int | None = None,
|
||||
alert_threshold: float | None = None,
|
||||
spot_reading: DailyRegionReading | None = None,
|
||||
) -> MicroRegionView | None:
|
||||
"""Compute rolling N-day average and ignition vs spot for one region."""
|
||||
region_key = str(region or "").strip().lower()
|
||||
if not region_key:
|
||||
return None
|
||||
|
||||
today = as_of or utc_today()
|
||||
window = window_days or micro_window_days()
|
||||
threshold = float(alert_threshold if alert_threshold is not None else rolling_alert_threshold())
|
||||
delta_min = ignition_delta()
|
||||
|
||||
day_scores: list[float] = []
|
||||
latest: DailyRegionReading | None = spot_reading
|
||||
|
||||
for day in _historical_dates(today, window):
|
||||
snap = load_daily(day)
|
||||
if snap is None:
|
||||
continue
|
||||
row = snap.regions.get(region_key)
|
||||
if row is None:
|
||||
continue
|
||||
day_scores.append(row.peak_score)
|
||||
if day == today:
|
||||
latest = row
|
||||
|
||||
if latest is None and day_scores:
|
||||
# Spot may come from yesterday if today not captured yet.
|
||||
snap = load_daily(today)
|
||||
if snap:
|
||||
latest = snap.regions.get(region_key)
|
||||
|
||||
if latest is None and not day_scores:
|
||||
return None
|
||||
|
||||
spot = float(latest.peak_score if latest else (day_scores[-1] if day_scores else 0.0))
|
||||
avg = sum(day_scores) / len(day_scores) if day_scores else spot
|
||||
risk_delta = spot - avg
|
||||
ignition = risk_delta >= delta_min and spot >= threshold * 0.75
|
||||
|
||||
return MicroRegionView(
|
||||
region=region_key,
|
||||
spot_risk=spot,
|
||||
risk_3d_avg=avg,
|
||||
risk_delta=risk_delta,
|
||||
days_in_window=len(day_scores),
|
||||
day_scores=tuple(day_scores),
|
||||
alerted_spot=spot >= threshold,
|
||||
alerted_3d=avg >= threshold,
|
||||
ignition=ignition,
|
||||
financial=float(latest.financial if latest else 0.0),
|
||||
unrest=float(latest.unrest if latest else 0.0),
|
||||
conflict=float(latest.conflict if latest else 0.0),
|
||||
)
|
||||
|
||||
|
||||
def compute_all_micro_views(
|
||||
*,
|
||||
as_of: date | None = None,
|
||||
window_days: int | None = None,
|
||||
alert_threshold: float | None = None,
|
||||
) -> list[MicroRegionView]:
|
||||
"""Build micro views for all regions seen in the rolling window."""
|
||||
today = as_of or utc_today()
|
||||
window = window_days or micro_window_days()
|
||||
regions: set[str] = set()
|
||||
|
||||
for day in _historical_dates(today, window):
|
||||
snap = load_daily(day)
|
||||
if snap is None:
|
||||
continue
|
||||
regions.update(snap.regions.keys())
|
||||
|
||||
views: list[MicroRegionView] = []
|
||||
for region in regions:
|
||||
view = compute_micro_view(
|
||||
region,
|
||||
as_of=today,
|
||||
window_days=window,
|
||||
alert_threshold=alert_threshold,
|
||||
)
|
||||
if view is not None:
|
||||
views.append(view)
|
||||
|
||||
views.sort(key=lambda row: (row.ignition, row.risk_delta, row.spot_risk), reverse=True)
|
||||
return views
|
||||
|
||||
|
||||
def enrich_heatmap_features(
|
||||
heatmap: dict[str, Any],
|
||||
*,
|
||||
as_of: date | None = None,
|
||||
window_days: int | None = None,
|
||||
alert_threshold: float | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Attach micro rolling fields to heatmap GeoJSON features."""
|
||||
threshold = float(alert_threshold if alert_threshold is not None else rolling_alert_threshold())
|
||||
window = window_days or micro_window_days()
|
||||
features = heatmap.get("features") or []
|
||||
enriched: list[dict[str, Any]] = []
|
||||
|
||||
for feature in features:
|
||||
if not isinstance(feature, dict):
|
||||
continue
|
||||
props = dict(feature.get("properties") or {})
|
||||
region = str(props.get("region") or "").strip().lower()
|
||||
view = compute_micro_view(
|
||||
region,
|
||||
as_of=as_of,
|
||||
window_days=window,
|
||||
alert_threshold=threshold,
|
||||
) if region else None
|
||||
|
||||
if view is not None:
|
||||
props["risk_spot"] = view.spot_risk
|
||||
props["risk_3d_avg"] = view.risk_3d_avg
|
||||
props["risk_delta"] = view.risk_delta
|
||||
props["micro_days"] = view.days_in_window
|
||||
props["micro_ignition"] = view.ignition
|
||||
props["alerted_3d"] = view.alerted_3d
|
||||
props["day_scores"] = list(view.day_scores)
|
||||
|
||||
enriched.append({**feature, "properties": props})
|
||||
|
||||
return {
|
||||
**heatmap,
|
||||
"features": enriched,
|
||||
"micro_window_days": window,
|
||||
"micro_alert_threshold": threshold,
|
||||
}
|
||||
|
||||
|
||||
def micro_rolling_report(
|
||||
*,
|
||||
as_of: date | None = None,
|
||||
window_days: int | None = None,
|
||||
limit: int = 15,
|
||||
) -> dict[str, Any]:
|
||||
"""API/OpenClaw payload for micro rolling 3-day context."""
|
||||
today = as_of or utc_today()
|
||||
window = window_days or micro_window_days()
|
||||
threshold = rolling_alert_threshold()
|
||||
views = compute_all_micro_views(
|
||||
as_of=today,
|
||||
window_days=window,
|
||||
alert_threshold=threshold,
|
||||
)
|
||||
ignitions = [row for row in views if row.ignition]
|
||||
alerted_3d = [row for row in views if row.alerted_3d]
|
||||
top = views[: max(1, limit)]
|
||||
|
||||
stored_days = list_daily_ids(newest_first=True, limit=window)
|
||||
return {
|
||||
"mode": "micro_rolling",
|
||||
"window_days": window,
|
||||
"alert_threshold": threshold,
|
||||
"ignition_delta": ignition_delta(),
|
||||
"as_of": date_id(today),
|
||||
"days_stored": len(stored_days),
|
||||
"stored_dates": stored_days,
|
||||
"regions_tracked": len(views),
|
||||
"ignition_count": len(ignitions),
|
||||
"alerted_3d_count": len(alerted_3d),
|
||||
"ignitions": [row.to_dict() for row in ignitions[:limit]],
|
||||
"top_regions": [row.to_dict() for row in top],
|
||||
"note": (
|
||||
f"Micro view: {window}-day rolling average vs spot risk. "
|
||||
"Ignition = spot jumped above the rolling baseline (events that flare fast). "
|
||||
"Macro week-over-week validation remains on /api/analytics/rolling."
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,382 @@
|
||||
"""Rolling weekly operational validation for Strategic Risk Analytics.
|
||||
|
||||
Freezes live GT scores each ISO week, accepts delayed outcome labels, and
|
||||
scores prior-week predictions with accuracy + Wilson 95% CI. Unlike the
|
||||
static historical benchmark, this measures forward operational usefulness.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from analytics.backtest import DEFAULT_BACKTEST_ALERT_THRESHOLD, wilson_interval
|
||||
from analytics.gt_early_warning import GT_EarlyWarning
|
||||
from analytics.integration import get_gt_engine
|
||||
from analytics.weekly_store import (
|
||||
VALID_LABELS,
|
||||
LabelName,
|
||||
RegionSnapshot,
|
||||
WeeklySnapshot,
|
||||
list_week_ids,
|
||||
load_week,
|
||||
save_week,
|
||||
utc_now_iso,
|
||||
)
|
||||
|
||||
MIN_LABELED_FOR_TREND = 5
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = str(os.environ.get(name, "")).strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def rolling_alert_threshold() -> float:
|
||||
"""Fixed operational alert cutoff — not retroactively tuned per week."""
|
||||
return _env_float("GT_ROLLING_ALERT_THRESHOLD", DEFAULT_BACKTEST_ALERT_THRESHOLD)
|
||||
|
||||
|
||||
def iso_week_id(when: datetime | date | None = None) -> str:
|
||||
"""Return ISO week id, e.g. ``2026-W24``."""
|
||||
if when is None:
|
||||
when = datetime.now(timezone.utc)
|
||||
if isinstance(when, datetime):
|
||||
when = when.date()
|
||||
year, week, _ = when.isocalendar()
|
||||
return f"{year}-W{week:02d}"
|
||||
|
||||
|
||||
def _region_rows_from_engine(
|
||||
engine: GT_EarlyWarning,
|
||||
*,
|
||||
alert_threshold: float,
|
||||
) -> list[RegionSnapshot]:
|
||||
heatmap = engine.get_risk_heatmap()
|
||||
rows: list[RegionSnapshot] = []
|
||||
for feature in heatmap.get("features") or []:
|
||||
if not isinstance(feature, dict):
|
||||
continue
|
||||
props = feature.get("properties") or {}
|
||||
region = str(props.get("region") or "").strip().lower()
|
||||
if not region:
|
||||
continue
|
||||
composite = float(props.get("risk") or 0.0)
|
||||
financial = float(props.get("financial") or 0.0)
|
||||
unrest = float(props.get("unrest") or 0.0)
|
||||
conflict = float(props.get("conflict") or 0.0)
|
||||
peak_score = max(composite, financial, unrest, conflict)
|
||||
rows.append(
|
||||
RegionSnapshot(
|
||||
region=region,
|
||||
composite_risk=composite,
|
||||
financial=financial,
|
||||
unrest=unrest,
|
||||
conflict=conflict,
|
||||
alerted=peak_score >= alert_threshold,
|
||||
label="pending",
|
||||
)
|
||||
)
|
||||
rows.sort(key=lambda row: row.composite_risk, reverse=True)
|
||||
return rows
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WeekScore:
|
||||
week_id: str
|
||||
frozen_at: str
|
||||
alert_threshold: float
|
||||
total_regions: int
|
||||
labeled: int
|
||||
pending: int
|
||||
alerted: int
|
||||
correct: int
|
||||
accuracy: float
|
||||
confidence_rate: float
|
||||
wilson_lower_95: float
|
||||
wilson_upper_95: float
|
||||
true_positives: int
|
||||
true_negatives: int
|
||||
false_positives: int
|
||||
false_negatives: int
|
||||
sensitivity: float
|
||||
specificity: float
|
||||
scorable: bool
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"week_id": self.week_id,
|
||||
"frozen_at": self.frozen_at,
|
||||
"alert_threshold": round(self.alert_threshold, 4),
|
||||
"total_regions": self.total_regions,
|
||||
"labeled": self.labeled,
|
||||
"pending": self.pending,
|
||||
"alerted": self.alerted,
|
||||
"correct": self.correct,
|
||||
"accuracy": round(self.accuracy, 4),
|
||||
"confidence_rate": round(self.confidence_rate, 4),
|
||||
"wilson_lower_95": round(self.wilson_lower_95, 4),
|
||||
"wilson_upper_95": round(self.wilson_upper_95, 4),
|
||||
"true_positives": self.true_positives,
|
||||
"true_negatives": self.true_negatives,
|
||||
"false_positives": self.false_positives,
|
||||
"false_negatives": self.false_negatives,
|
||||
"sensitivity": round(self.sensitivity, 4),
|
||||
"specificity": round(self.specificity, 4),
|
||||
"scorable": self.scorable,
|
||||
}
|
||||
|
||||
|
||||
def _predicted_positive(row: RegionSnapshot) -> bool:
|
||||
return row.alerted
|
||||
|
||||
|
||||
def _actual_positive(label: LabelName) -> bool:
|
||||
return label == "true_escalation"
|
||||
|
||||
|
||||
def _is_correct(row: RegionSnapshot) -> bool:
|
||||
if row.label == "pending":
|
||||
return False
|
||||
predicted = _predicted_positive(row)
|
||||
if row.label == "true_escalation":
|
||||
return predicted
|
||||
if row.label in ("false_alarm", "benign"):
|
||||
return not predicted
|
||||
return False
|
||||
|
||||
|
||||
def score_week(snapshot: WeeklySnapshot) -> WeekScore:
|
||||
"""Score a frozen week against delayed labels (pending rows excluded)."""
|
||||
labeled_rows = [row for row in snapshot.regions if row.label != "pending"]
|
||||
pending = len(snapshot.regions) - len(labeled_rows)
|
||||
|
||||
tp = sum(
|
||||
1
|
||||
for row in labeled_rows
|
||||
if row.alerted and row.label == "true_escalation"
|
||||
)
|
||||
tn = sum(
|
||||
1
|
||||
for row in labeled_rows
|
||||
if not row.alerted and row.label in ("benign", "false_alarm")
|
||||
)
|
||||
fp = sum(
|
||||
1
|
||||
for row in labeled_rows
|
||||
if row.alerted and row.label in ("false_alarm", "benign")
|
||||
)
|
||||
fn = sum(
|
||||
1
|
||||
for row in labeled_rows
|
||||
if not row.alerted and row.label == "true_escalation"
|
||||
)
|
||||
|
||||
correct = tp + tn
|
||||
total = len(labeled_rows)
|
||||
accuracy = correct / total if total else 0.0
|
||||
lower, upper = wilson_interval(correct, total)
|
||||
|
||||
pos_total = sum(1 for row in labeled_rows if _actual_positive(row.label)) # type: ignore[arg-type]
|
||||
neg_total = total - pos_total
|
||||
pred_pos = sum(1 for row in labeled_rows if row.alerted)
|
||||
pred_neg = total - pred_pos
|
||||
|
||||
sensitivity = tp / pos_total if pos_total else 0.0
|
||||
specificity = tn / pred_neg if pred_neg else (1.0 if tn == total and total else 0.0)
|
||||
|
||||
return WeekScore(
|
||||
week_id=snapshot.week_id,
|
||||
frozen_at=snapshot.frozen_at,
|
||||
alert_threshold=snapshot.alert_threshold,
|
||||
total_regions=len(snapshot.regions),
|
||||
labeled=total,
|
||||
pending=pending,
|
||||
alerted=sum(1 for row in snapshot.regions if row.alerted),
|
||||
correct=correct,
|
||||
accuracy=accuracy,
|
||||
confidence_rate=lower,
|
||||
wilson_lower_95=lower,
|
||||
wilson_upper_95=upper,
|
||||
true_positives=tp,
|
||||
true_negatives=tn,
|
||||
false_positives=fp,
|
||||
false_negatives=fn,
|
||||
sensitivity=sensitivity,
|
||||
specificity=specificity,
|
||||
scorable=total >= MIN_LABELED_FOR_TREND,
|
||||
)
|
||||
|
||||
|
||||
def freeze_weekly_snapshot(
|
||||
*,
|
||||
week_id: str | None = None,
|
||||
alert_threshold: float | None = None,
|
||||
force: bool = False,
|
||||
frozen_by: str = "system",
|
||||
engine: GT_EarlyWarning | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Capture current GT heatmap as an immutable weekly operational snapshot.
|
||||
|
||||
Idempotent per week unless ``force=True``.
|
||||
"""
|
||||
resolved_engine = engine or get_gt_engine()
|
||||
if resolved_engine is None:
|
||||
return {"ok": False, "detail": "GT analytics engine unavailable"}
|
||||
|
||||
resolved_week = week_id or iso_week_id()
|
||||
threshold = float(
|
||||
alert_threshold if alert_threshold is not None else rolling_alert_threshold()
|
||||
)
|
||||
|
||||
existing = load_week(resolved_week)
|
||||
if existing and existing.regions and not force:
|
||||
score = score_week(existing)
|
||||
return {
|
||||
"ok": True,
|
||||
"created": False,
|
||||
"week_id": resolved_week,
|
||||
"snapshot": existing.to_dict(),
|
||||
"score": score.to_dict(),
|
||||
}
|
||||
|
||||
regions = _region_rows_from_engine(resolved_engine, alert_threshold=threshold)
|
||||
snapshot = WeeklySnapshot(
|
||||
week_id=resolved_week,
|
||||
frozen_at=utc_now_iso(),
|
||||
alert_threshold=threshold,
|
||||
regions=regions,
|
||||
frozen_by=frozen_by,
|
||||
)
|
||||
save_week(snapshot)
|
||||
score = score_week(snapshot)
|
||||
return {
|
||||
"ok": True,
|
||||
"created": True,
|
||||
"week_id": resolved_week,
|
||||
"snapshot": snapshot.to_dict(),
|
||||
"score": score.to_dict(),
|
||||
"alert_count": sum(1 for row in regions if row.alerted),
|
||||
"region_count": len(regions),
|
||||
}
|
||||
|
||||
|
||||
def label_regions(
|
||||
week_id: str,
|
||||
labels: list[dict[str, Any]],
|
||||
*,
|
||||
labeled_by: str = "operator",
|
||||
) -> dict[str, Any]:
|
||||
"""Apply delayed outcome labels to a frozen week."""
|
||||
snapshot = load_week(week_id)
|
||||
if snapshot is None:
|
||||
return {"ok": False, "detail": f"Week {week_id} not found"}
|
||||
|
||||
by_region = {row.region: row for row in snapshot.regions}
|
||||
updated = 0
|
||||
skipped: list[str] = []
|
||||
now = utc_now_iso()
|
||||
|
||||
for entry in labels:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
region = str(entry.get("region") or "").strip().lower()
|
||||
label = str(entry.get("label") or "").strip().lower()
|
||||
if not region or label not in VALID_LABELS or label == "pending":
|
||||
if region:
|
||||
skipped.append(region)
|
||||
continue
|
||||
row = by_region.get(region)
|
||||
if row is None:
|
||||
skipped.append(region)
|
||||
continue
|
||||
row.label = label # type: ignore[assignment]
|
||||
row.labeled_at = now
|
||||
notes = entry.get("notes")
|
||||
if notes is not None:
|
||||
row.notes = str(notes)
|
||||
updated += 1
|
||||
|
||||
save_week(snapshot)
|
||||
score = score_week(snapshot)
|
||||
return {
|
||||
"ok": True,
|
||||
"week_id": week_id,
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"labeled_by": labeled_by,
|
||||
"score": score.to_dict(),
|
||||
}
|
||||
|
||||
|
||||
def label_region(
|
||||
week_id: str,
|
||||
region: str,
|
||||
label: LabelName,
|
||||
*,
|
||||
notes: str = "",
|
||||
labeled_by: str = "operator",
|
||||
) -> dict[str, Any]:
|
||||
return label_regions(
|
||||
week_id,
|
||||
[{"region": region, "label": label, "notes": notes}],
|
||||
labeled_by=labeled_by,
|
||||
)
|
||||
|
||||
|
||||
def rolling_trend(*, weeks: int = 8) -> list[WeekScore]:
|
||||
"""Return scored weeks newest-first (only weeks with stored snapshots)."""
|
||||
ids = list_week_ids(newest_first=True)[: max(1, weeks)]
|
||||
scores: list[WeekScore] = []
|
||||
for week_id in ids:
|
||||
snapshot = load_week(week_id)
|
||||
if snapshot is None:
|
||||
continue
|
||||
scores.append(score_week(snapshot))
|
||||
return scores
|
||||
|
||||
|
||||
def rolling_report(*, weeks: int = 8, target_confidence: float = 0.80) -> dict[str, Any]:
|
||||
"""Aggregate operational validation trend for API / OpenClaw."""
|
||||
threshold = rolling_alert_threshold()
|
||||
trend = rolling_trend(weeks=weeks)
|
||||
scorable = [row for row in trend if row.scorable]
|
||||
|
||||
latest = scorable[0] if scorable else (trend[0] if trend else None)
|
||||
accuracy_series = [
|
||||
{"week_id": row.week_id, "accuracy": round(row.accuracy, 4), "labeled": row.labeled}
|
||||
for row in reversed(scorable)
|
||||
]
|
||||
|
||||
improving = False
|
||||
if len(scorable) >= 2:
|
||||
improving = scorable[0].accuracy >= scorable[1].accuracy
|
||||
|
||||
return {
|
||||
"mode": "rolling_operational",
|
||||
"alert_threshold": threshold,
|
||||
"target_confidence": target_confidence,
|
||||
"weeks_requested": weeks,
|
||||
"weeks_stored": len(trend),
|
||||
"weeks_scorable": len(scorable),
|
||||
"min_labeled_per_week": MIN_LABELED_FOR_TREND,
|
||||
"latest": latest.to_dict() if latest else None,
|
||||
"trend": [row.to_dict() for row in trend],
|
||||
"accuracy_series": accuracy_series,
|
||||
"improving_vs_prior": improving,
|
||||
"meets_target": bool(
|
||||
latest and latest.scorable and latest.confidence_rate >= target_confidence
|
||||
),
|
||||
"note": (
|
||||
"Operational metric: scores frozen weekly predictions against delayed "
|
||||
"labels. Unlike the static benchmark, this measures live forward utility."
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
"""Configuration for Strategic Risk Analytics (feature-flagged)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool = False) -> bool:
|
||||
raw = str(os.environ.get(name, "")).strip().lower()
|
||||
if not raw:
|
||||
return default
|
||||
return raw not in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = str(os.environ.get(name, "")).strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = str(os.environ.get(name, "")).strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _parse_signal_weights(raw: str) -> dict[str, float]:
|
||||
if not raw.strip():
|
||||
return {}
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
return {str(k): float(v) for k, v in parsed.items()}
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
weights: dict[str, float] = {}
|
||||
for part in raw.split(","):
|
||||
piece = part.strip()
|
||||
if not piece or "=" not in piece:
|
||||
continue
|
||||
key, value = piece.split("=", 1)
|
||||
try:
|
||||
weights[key.strip()] = float(value.strip())
|
||||
except ValueError:
|
||||
continue
|
||||
return weights
|
||||
|
||||
|
||||
def resolve_gt_profile() -> str:
|
||||
from services.runtime_profile import resolve_profile_name
|
||||
|
||||
return resolve_profile_name()
|
||||
|
||||
|
||||
def gt_analytics_ack_low_cpu() -> bool:
|
||||
return _env_bool("GT_ANALYTICS_ACK_LOW_CPU", default=False)
|
||||
|
||||
|
||||
def gt_engine_operational() -> bool:
|
||||
"""Full GT engine (scheduled ingest, heatmap, Louvain) — not watchdog-only."""
|
||||
if not get_gt_settings().enabled:
|
||||
return False
|
||||
if resolve_gt_profile() == "lean" and not gt_analytics_ack_low_cpu():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def gt_scheduled_ingest_enabled() -> bool:
|
||||
return gt_engine_operational()
|
||||
|
||||
|
||||
def gt_louvain_enabled() -> bool:
|
||||
return gt_engine_operational()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GTAnalyticsSettings:
|
||||
enabled: bool = False
|
||||
profile: str = "standard"
|
||||
base_prior: float = 0.15
|
||||
evidence_cap: float = 3.0
|
||||
evidence_scale: float = 5.0
|
||||
min_prob: float = 0.01
|
||||
max_prob: float = 0.99
|
||||
high_risk_threshold: float = 0.6
|
||||
max_history_per_region: int = 200
|
||||
max_heatmap_features: int = 500
|
||||
louvain_min_weight: float = 0.5
|
||||
louvain_interval_minutes: int = 30
|
||||
signal_weight_overrides: dict[str, float] = field(default_factory=dict)
|
||||
watched_channels: tuple[str, ...] = ()
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_gt_settings() -> GTAnalyticsSettings:
|
||||
channels_raw = str(os.environ.get("GT_ANALYTICS_WATCHED_CHANNELS", "")).strip()
|
||||
channels = tuple(
|
||||
part.strip().lstrip("@")
|
||||
for part in channels_raw.split(",")
|
||||
if part.strip()
|
||||
)
|
||||
profile = resolve_gt_profile()
|
||||
lean = profile == "lean"
|
||||
return GTAnalyticsSettings(
|
||||
enabled=_env_bool("GT_ANALYTICS_ENABLED", default=False),
|
||||
profile=profile,
|
||||
base_prior=_env_float("GT_ANALYTICS_BASE_PRIOR", 0.15),
|
||||
evidence_cap=_env_float("GT_ANALYTICS_EVIDENCE_CAP", 3.0),
|
||||
evidence_scale=_env_float("GT_ANALYTICS_EVIDENCE_SCALE", 5.0),
|
||||
min_prob=_env_float("GT_ANALYTICS_MIN_PROB", 0.01),
|
||||
max_prob=_env_float("GT_ANALYTICS_MAX_PROB", 0.99),
|
||||
high_risk_threshold=_env_float("GT_ANALYTICS_HIGH_RISK_THRESHOLD", 0.6),
|
||||
max_history_per_region=_env_int("GT_ANALYTICS_MAX_HISTORY", 200),
|
||||
max_heatmap_features=_env_int(
|
||||
"GT_ANALYTICS_MAX_HEATMAP_FEATURES",
|
||||
50 if lean else 500,
|
||||
),
|
||||
louvain_min_weight=_env_float("GT_ANALYTICS_LOUVAIN_MIN_WEIGHT", 0.5),
|
||||
louvain_interval_minutes=max(5, _env_int("GT_ANALYTICS_LOUVAIN_INTERVAL_MINUTES", 30)),
|
||||
signal_weight_overrides=_parse_signal_weights(
|
||||
str(os.environ.get("GT_ANALYTICS_SIGNAL_WEIGHTS", ""))
|
||||
),
|
||||
watched_channels=channels,
|
||||
)
|
||||
|
||||
|
||||
def gt_analytics_enabled() -> bool:
|
||||
return get_gt_settings().enabled
|
||||
|
||||
|
||||
def gt_analytics_status() -> dict[str, Any]:
|
||||
settings = get_gt_settings()
|
||||
from services.runtime_profile import get_runtime_profile
|
||||
|
||||
runtime = get_runtime_profile()
|
||||
operational = gt_engine_operational()
|
||||
return {
|
||||
"enabled": settings.enabled,
|
||||
"operational": operational,
|
||||
"profile": settings.profile,
|
||||
"ack_low_cpu": gt_analytics_ack_low_cpu(),
|
||||
"recommended": bool(runtime.get("gt_analytics", {}).get("recommended")),
|
||||
"lean_node": bool(runtime.get("gt_analytics", {}).get("lean_node")),
|
||||
"warning": runtime.get("gt_analytics", {}).get("warning"),
|
||||
"experimental": True,
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
"""Persistent JSON store for rolling GT operational backtest weeks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LabelName = Literal["pending", "true_escalation", "false_alarm", "benign"]
|
||||
VALID_LABELS: frozenset[str] = frozenset(
|
||||
{"pending", "true_escalation", "false_alarm", "benign"}
|
||||
)
|
||||
|
||||
_STORE_DIR = Path(__file__).parent.parent / "data" / "gt_rolling"
|
||||
_store_lock = threading.Lock()
|
||||
|
||||
|
||||
def rolling_store_dir() -> Path:
|
||||
"""Return the rolling-backtest data directory (override via env in tests)."""
|
||||
override = str(os.environ.get("GT_ROLLING_STORE_DIR", "")).strip()
|
||||
if override:
|
||||
return Path(override)
|
||||
return _STORE_DIR
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegionSnapshot:
|
||||
region: str
|
||||
composite_risk: float
|
||||
financial: float
|
||||
unrest: float
|
||||
conflict: float
|
||||
alerted: bool
|
||||
label: LabelName = "pending"
|
||||
labeled_at: str | None = None
|
||||
notes: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, raw: dict[str, Any]) -> RegionSnapshot:
|
||||
label = str(raw.get("label") or "pending")
|
||||
if label not in VALID_LABELS:
|
||||
label = "pending"
|
||||
return cls(
|
||||
region=str(raw.get("region") or "").strip().lower(),
|
||||
composite_risk=float(raw.get("composite_risk") or 0.0),
|
||||
financial=float(raw.get("financial") or 0.0),
|
||||
unrest=float(raw.get("unrest") or 0.0),
|
||||
conflict=float(raw.get("conflict") or 0.0),
|
||||
alerted=bool(raw.get("alerted")),
|
||||
label=label, # type: ignore[arg-type]
|
||||
labeled_at=raw.get("labeled_at"),
|
||||
notes=str(raw.get("notes") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WeeklySnapshot:
|
||||
week_id: str
|
||||
frozen_at: str
|
||||
alert_threshold: float
|
||||
regions: list[RegionSnapshot] = field(default_factory=list)
|
||||
frozen_by: str = "system"
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"week_id": self.week_id,
|
||||
"frozen_at": self.frozen_at,
|
||||
"alert_threshold": self.alert_threshold,
|
||||
"frozen_by": self.frozen_by,
|
||||
"regions": [row.to_dict() for row in self.regions],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, raw: dict[str, Any]) -> WeeklySnapshot:
|
||||
regions = [
|
||||
RegionSnapshot.from_dict(row)
|
||||
for row in (raw.get("regions") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
return cls(
|
||||
week_id=str(raw.get("week_id") or ""),
|
||||
frozen_at=str(raw.get("frozen_at") or ""),
|
||||
alert_threshold=float(raw.get("alert_threshold") or 0.0),
|
||||
regions=regions,
|
||||
frozen_by=str(raw.get("frozen_by") or "system"),
|
||||
)
|
||||
|
||||
|
||||
def _week_path(week_id: str) -> Path:
|
||||
safe = week_id.replace("/", "-").replace("..", "")
|
||||
return rolling_store_dir() / f"{safe}.json"
|
||||
|
||||
|
||||
def _ensure_dir() -> None:
|
||||
rolling_store_dir().mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def list_week_ids(*, newest_first: bool = True) -> list[str]:
|
||||
"""Return stored ISO week ids."""
|
||||
_ensure_dir()
|
||||
ids = [
|
||||
path.stem
|
||||
for path in rolling_store_dir().glob("*.json")
|
||||
if path.stem and path.stem != "index"
|
||||
]
|
||||
ids.sort(reverse=newest_first)
|
||||
return ids
|
||||
|
||||
|
||||
def load_week(week_id: str) -> WeeklySnapshot | None:
|
||||
path = _week_path(week_id)
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return WeeklySnapshot.from_dict(raw)
|
||||
except (OSError, json.JSONDecodeError, TypeError, ValueError):
|
||||
logger.exception("Failed to load GT rolling week %s", week_id)
|
||||
return None
|
||||
|
||||
|
||||
def save_week(snapshot: WeeklySnapshot) -> None:
|
||||
_ensure_dir()
|
||||
path = _week_path(snapshot.week_id)
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
payload = json.dumps(snapshot.to_dict(), indent=2, sort_keys=True)
|
||||
with _store_lock:
|
||||
tmp.write_text(payload, encoding="utf-8")
|
||||
tmp.replace(path)
|
||||
|
||||
|
||||
def delete_week(week_id: str) -> bool:
|
||||
path = _week_path(week_id)
|
||||
if not path.is_file():
|
||||
return False
|
||||
with _store_lock:
|
||||
path.unlink()
|
||||
return True
|
||||
|
||||
|
||||
def utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
+143
-39
@@ -45,6 +45,7 @@ from services.mesh.mesh_compatibility import (
|
||||
from services.mesh.mesh_crypto import (
|
||||
_derive_peer_key,
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
verify_signature,
|
||||
verify_node_binding,
|
||||
parse_public_key_algo,
|
||||
@@ -112,8 +113,14 @@ def _scoped_admin_tokens() -> dict[str, list[str]]:
|
||||
return normalized
|
||||
|
||||
|
||||
def _request_scope_path(request: Request) -> str:
|
||||
"""Return the ASGI request-line path, not the Host-derived URL path."""
|
||||
scope = getattr(request, "scope", {}) or {}
|
||||
return str(scope.get("path") or "")
|
||||
|
||||
|
||||
def _required_scope_for_request(request: Request) -> str:
|
||||
path = str(request.url.path or "")
|
||||
path = _request_scope_path(request)
|
||||
if path.startswith("/api/wormhole/gate/"):
|
||||
return "gate"
|
||||
if path.startswith("/api/wormhole/dm/"):
|
||||
@@ -245,15 +252,90 @@ def _docker_bridge_local_operator_enabled() -> bool:
|
||||
}
|
||||
|
||||
|
||||
# Issue #250 (tg12): the previous implementation returned True for any IP
|
||||
# in the entire 172.16.0.0/12 range. Anyone with `docker run` access on
|
||||
# the same daemon could spin up a container that automatically passed
|
||||
# local-operator auth. The fix narrows trust to ONLY connections whose
|
||||
# source IP matches the configured frontend container's hostname.
|
||||
#
|
||||
# Docker DNS resolves both the compose service name (``frontend``) and
|
||||
# the explicit ``container_name`` (``shadowbroker-frontend``) to the
|
||||
# frontend container's bridge IP. We forward-resolve both, cache the
|
||||
# result for 30s, and only trust connections from those exact IPs.
|
||||
#
|
||||
# Operators on shared Docker hosts get the benefit of the narrower
|
||||
# surface. Operators on single-user installs see no behavior change —
|
||||
# their frontend container still resolves and is still trusted.
|
||||
_DOCKER_BRIDGE_TRUST_CACHE: dict = {"ips": frozenset(), "expires": 0.0}
|
||||
_DOCKER_BRIDGE_TRUST_TTL = 30.0
|
||||
|
||||
|
||||
def _trusted_bridge_frontend_hostnames() -> list[str]:
|
||||
"""Container hostnames whose IPs we treat as local-operator on the bridge.
|
||||
|
||||
Default covers both Docker Compose service name (``frontend``) and the
|
||||
explicit ``container_name`` from the shipped docker-compose.yml
|
||||
(``shadowbroker-frontend``). Operators with non-default names can
|
||||
override via the ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS`` env var
|
||||
(comma-separated, no spaces).
|
||||
"""
|
||||
raw = str(
|
||||
os.environ.get(
|
||||
"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS",
|
||||
"frontend,shadowbroker-frontend",
|
||||
)
|
||||
).strip()
|
||||
return [h.strip() for h in raw.split(",") if h.strip()]
|
||||
|
||||
|
||||
def _resolve_trusted_bridge_ips() -> frozenset[str]:
|
||||
"""Resolve trusted frontend hostnames to a set of IPs, with caching.
|
||||
|
||||
Cached for 30s so we don't hit DNS on every request. The cache is
|
||||
process-local — frontend container IP rotations during a backend's
|
||||
lifetime will be picked up within 30s.
|
||||
|
||||
Returns frozenset() if Docker DNS can't resolve any of the configured
|
||||
hostnames (fail-closed — when in doubt, refuse to trust the bridge).
|
||||
"""
|
||||
import socket
|
||||
import time as _time
|
||||
|
||||
now = _time.time()
|
||||
cache = _DOCKER_BRIDGE_TRUST_CACHE
|
||||
if cache["expires"] > now:
|
||||
return cache["ips"]
|
||||
|
||||
ips: set[str] = set()
|
||||
for hostname in _trusted_bridge_frontend_hostnames():
|
||||
try:
|
||||
_, _, addrs = socket.gethostbyname_ex(hostname)
|
||||
except (OSError, socket.gaierror):
|
||||
continue
|
||||
for addr in addrs:
|
||||
ips.add(addr)
|
||||
|
||||
resolved = frozenset(ips)
|
||||
cache["ips"] = resolved
|
||||
cache["expires"] = now + _DOCKER_BRIDGE_TRUST_TTL
|
||||
return resolved
|
||||
|
||||
|
||||
def _is_docker_bridge_host(host: str) -> bool:
|
||||
"""Return True only when the source IP matches our trusted frontend
|
||||
container hostname(s).
|
||||
|
||||
Previously trusted any 172.16.0.0/12 IP unconditionally. See the
|
||||
block comment above for the security rationale.
|
||||
"""
|
||||
try:
|
||||
ip = ipaddress.ip_address(host)
|
||||
except ValueError:
|
||||
return False
|
||||
# Docker Desktop and the default compose bridge normally sit inside
|
||||
# 172.16.0.0/12. Keep this narrower than "any private IP" so a user who
|
||||
# intentionally binds the backend to LAN does not silently trust LAN clients.
|
||||
return ip in ipaddress.ip_network("172.16.0.0/12")
|
||||
# Public IPs are never our frontend container — skip DNS work for them.
|
||||
if not ip.is_private:
|
||||
return False
|
||||
return host in _resolve_trusted_bridge_ips()
|
||||
|
||||
|
||||
def _is_trusted_local_runtime_host(host: str) -> bool:
|
||||
@@ -367,7 +449,7 @@ async def _verify_openclaw_hmac(request: Request) -> bool:
|
||||
|
||||
# Compute expected signature: HMAC-SHA256(secret, METHOD|path|ts|nonce|body_digest)
|
||||
method = str(request.method or "").upper()
|
||||
path = str(request.url.path or "")
|
||||
path = _request_scope_path(request)
|
||||
message = f"{method}|{path}|{ts_str}|{nonce}|{body_digest}"
|
||||
expected = hmac.new(
|
||||
secret.encode("utf-8"),
|
||||
@@ -439,33 +521,32 @@ _KNOWN_COMPROMISED_PEER_PUSH_SECRET_SHA256 = (
|
||||
def _validate_admin_startup() -> None:
|
||||
admin_key = _current_admin_key()
|
||||
|
||||
if not admin_key or len(admin_key) < 32:
|
||||
import secrets
|
||||
if not admin_key:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is not set. Local-operator/admin endpoints will reject "
|
||||
"remote callers until ADMIN_KEY is configured."
|
||||
)
|
||||
return
|
||||
|
||||
reason = "not set" if not admin_key else f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
new_key = secrets.token_hex(32) # 64-char hex string
|
||||
if len(admin_key) < 32:
|
||||
reason = f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
try:
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
_write_env_value("ADMIN_KEY", new_key)
|
||||
os.environ["ADMIN_KEY"] = new_key
|
||||
logger.info(
|
||||
"ADMIN_KEY was %s — auto-generated a strong 64-character key and "
|
||||
"saved it to .env. Admin/mesh endpoints are now secured.",
|
||||
reason,
|
||||
)
|
||||
# Clear settings cache so the rest of startup picks up the new key
|
||||
try:
|
||||
get_settings.cache_clear()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
debug_mode = bool(getattr(get_settings(), "MESH_DEBUG_MODE", False))
|
||||
except Exception:
|
||||
debug_mode = False
|
||||
if debug_mode:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is %s and could not auto-generate: %s. "
|
||||
"Admin/mesh endpoints may be unavailable.",
|
||||
"ADMIN_KEY is %s. Debug mode is enabled, so startup will continue, "
|
||||
"but production deployments must use a 32+ character key.",
|
||||
reason,
|
||||
exc,
|
||||
)
|
||||
return
|
||||
logger.error(
|
||||
"ADMIN_KEY is %s. Refusing to start because auto-generating a backend-only "
|
||||
"replacement would desynchronize the frontend and backend containers.",
|
||||
reason,
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def _validate_insecure_admin_startup() -> None:
|
||||
@@ -668,8 +749,7 @@ def _is_debug_test_request(request: Request) -> bool:
|
||||
if not _debug_mode_enabled():
|
||||
return False
|
||||
client_host = (request.client.host or "").lower() if request.client else ""
|
||||
url_host = (request.url.hostname or "").lower() if request.url else ""
|
||||
return client_host == "test" or url_host == "test"
|
||||
return client_host == "test"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -782,7 +862,9 @@ _ROUTE_TRANSPORT_POLICY: dict[tuple[str, str], RouteTransportPolicy] = {
|
||||
("POST", "/api/wormhole/gate/messages/decrypt"): _local_only_route_policy("private_control_only"),
|
||||
# ── Wormhole DM (strong) ──────────────────────────────────────────
|
||||
("POST", "/api/wormhole/dm/compose"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/connect-contact"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/decrypt"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/mls-key-package"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/register-key"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/prekey/register"): _local_only_route_policy("private_control_only"),
|
||||
("POST", "/api/wormhole/dm/bootstrap-encrypt"): _local_only_route_policy("private_control_only"),
|
||||
@@ -1321,18 +1403,40 @@ def _peer_hmac_url_from_request(request: Request) -> str:
|
||||
header_url = normalize_peer_url(str(request.headers.get("x-peer-url", "") or ""))
|
||||
if header_url:
|
||||
return header_url
|
||||
if not request.url:
|
||||
return ""
|
||||
base_url = f"{request.url.scheme}://{request.url.netloc}".rstrip("/")
|
||||
return normalize_peer_url(base_url)
|
||||
return ""
|
||||
|
||||
|
||||
def _verify_peer_transport_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
"""Verify HMAC-SHA256 peer authentication without an allowlist check."""
|
||||
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
||||
if not provided:
|
||||
return False
|
||||
|
||||
peer_url = _peer_hmac_url_from_request(request)
|
||||
if not peer_url:
|
||||
return False
|
||||
peer_key = resolve_peer_key_for_url(peer_url)
|
||||
if not peer_key:
|
||||
return False
|
||||
|
||||
expected = _hmac_mod.new(
|
||||
peer_key,
|
||||
body_bytes,
|
||||
_hashlib_mod.sha256,
|
||||
).hexdigest()
|
||||
return _hmac_mod.compare_digest(provided.lower(), expected.lower())
|
||||
|
||||
|
||||
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests."""
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
if not secret:
|
||||
return False
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests.
|
||||
|
||||
Issue #256: ``resolve_peer_key_for_url`` looks up a per-peer secret
|
||||
in ``MESH_PEER_SECRETS`` first, then falls back to the global
|
||||
``MESH_PEER_PUSH_SECRET``. When a peer URL is listed in the per-peer
|
||||
map, only the listed secret is accepted for it — the global secret
|
||||
is ignored, so any peer that knows only the global secret cannot
|
||||
forge a request claiming to be that peer.
|
||||
"""
|
||||
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
||||
if not provided:
|
||||
return False
|
||||
@@ -1341,7 +1445,7 @@ def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
allowed_peers = set(authenticated_push_peer_urls())
|
||||
if not peer_url or peer_url not in allowed_peers:
|
||||
return False
|
||||
peer_key = _derive_peer_key(secret, peer_url)
|
||||
peer_key = resolve_peer_key_for_url(peer_url)
|
||||
if not peer_key:
|
||||
return False
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
},
|
||||
{
|
||||
"name": "BBC",
|
||||
"url": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"url": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"weight": 3
|
||||
},
|
||||
{
|
||||
@@ -47,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"name": "Xinhua",
|
||||
"url": "http://www.news.cn/english/rss/worldrss.xml",
|
||||
"url": "https://www.news.cn/english/rss/worldrss.xml",
|
||||
"weight": 2
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
{
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026",
|
||||
"note": "One-shot bootstrap for first-run carrier positions. Once carrier_cache.json exists in the runtime data volume, this seed file is never read again. All subsequent updates come from GDELT (and any future sources) and are written to carrier_cache.json. A year from now, your runtime cache reflects whatever your install has observed since first launch — not these snapshot positions."
|
||||
},
|
||||
"carriers": {
|
||||
"CVN-68": {
|
||||
"lat": 47.5535,
|
||||
"lng": -122.6400,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-76": {
|
||||
"lat": 47.5580,
|
||||
"lng": -122.6360,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Decommissioning)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-69": {
|
||||
"lat": 36.9465,
|
||||
"lng": -76.3265,
|
||||
"heading": 0,
|
||||
"desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-74": {
|
||||
"lat": 36.98,
|
||||
"lng": -76.43,
|
||||
"heading": 0,
|
||||
"desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-75": {
|
||||
"lat": 36.0,
|
||||
"lng": 15.0,
|
||||
"heading": 0,
|
||||
"desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-77": {
|
||||
"lat": 36.5,
|
||||
"lng": -74.0,
|
||||
"heading": 0,
|
||||
"desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-70": {
|
||||
"lat": 32.6840,
|
||||
"lng": -117.1290,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Homeport)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-71": {
|
||||
"lat": 32.6885,
|
||||
"lng": -117.1280,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-72": {
|
||||
"lat": 20.0,
|
||||
"lng": 64.0,
|
||||
"heading": 0,
|
||||
"desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-73": {
|
||||
"lat": 35.2830,
|
||||
"lng": 139.6700,
|
||||
"heading": 180,
|
||||
"desc": "Yokosuka, Japan (Forward deployed)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:72b69418aa860a0d92ccae398a08722bc85e64a992b5515dd7bf9ae9f79f2fd1
|
||||
size 107194128
|
||||
@@ -0,0 +1,60 @@
|
||||
{
|
||||
"_comment": [
|
||||
"Baked-in SHA-256 digests for known Shadowbroker release archives.",
|
||||
"",
|
||||
"Issue #231: the self-updater previously skipped integrity verification",
|
||||
"entirely whenever the MESH_UPDATE_SHA256 env var was unset (which is the",
|
||||
"default — nothing in the install docs tells operators to set it). That",
|
||||
"made the auto-update a supply-chain RCE on any compromise of the GitHub",
|
||||
"release pipeline.",
|
||||
"",
|
||||
"The fix uses a multi-source verification chain mirroring the Tor bundle",
|
||||
"digest approach in #201:",
|
||||
"",
|
||||
" 1. MESH_UPDATE_SHA256 env var (operator override, preserved)",
|
||||
" 2. SHA256SUMS.txt asset published alongside each release (primary —",
|
||||
" the maintainer's release process already publishes this)",
|
||||
" 3. This baked-in digest list (second line of defense for releases",
|
||||
" missing a SHA256SUMS asset, or when the asset can't be fetched)",
|
||||
" 4. HTTPS-only fallback with a loud warning (preserves auto-update",
|
||||
" flow during transient outages so users don't get stuck)",
|
||||
"",
|
||||
"Mismatch from a source that DID respond is fatal — the update is",
|
||||
"refused and the existing install keeps running. Only the 'no source",
|
||||
"reachable at all' case falls back to HTTPS-only.",
|
||||
"",
|
||||
"Format: each entry is keyed by release tag and maps asset filenames",
|
||||
"to their canonical SHA-256 digest (hex, lowercase). The updater",
|
||||
"compares the locally-computed digest of the downloaded asset against",
|
||||
"the value here.",
|
||||
"",
|
||||
"When the maintainer ships a new release, add its digests here BEFORE",
|
||||
"removing the old ones so operators on the old code still validate",
|
||||
"against the previous entries during the transition."
|
||||
],
|
||||
"v0.9.79": {
|
||||
"ShadowBroker_v0.9.79.zip": "f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47",
|
||||
"ShadowBroker_0.9.79_x64-setup.exe": "f7b676ada45cac7da05868b0a353678c9ee700e3abcf456a7c0c038c36da446f",
|
||||
"ShadowBroker_0.9.79_x64_en-US.msi": "e0713c3cdda184cfbea750bfac0d62a35678fec00847e6476f2cac8e7e42046e"
|
||||
},
|
||||
"v0.9.8": {
|
||||
"ShadowBroker_v0.9.8.zip": "183bb5cd62b9b9349d95df5ef7696cb6ca810ab4b991fa9dab6f898af4c7a175",
|
||||
"ShadowBroker_0.9.8_x64-setup.exe": "94a0309862e9c81c92cdcbfea8eec9dbb97eef19ded82b26217b397defbc810c",
|
||||
"ShadowBroker_0.9.8_x64_en-US.msi": "fe22f9d51e4360d74c18a7250c2fbb9ed4fa4c7a884b3ac0d04a21115466386b"
|
||||
},
|
||||
"v0.9.81": {
|
||||
"ShadowBroker_v0.9.81.zip": "f81f454bdc88e9a32c351df38212b8cfa624704d65764b971bb091eef62259c6",
|
||||
"ShadowBroker_0.9.81_x64-setup.exe": "25e9a95d0d8ce959a7d08fe8e7406772ae24b596652793e81d1de5d02510a5a6",
|
||||
"ShadowBroker_0.9.81_x64_en-US.msi": "34e655fc0c0f195ee4ac978f228a4b2b9d5565253b8771aca9ef4693409e9e70"
|
||||
},
|
||||
"v0.9.82": {
|
||||
"ShadowBroker_v0.9.82.zip": "202ab043465741dcc06de57c19ec8314904332f8e818b891d7174655719d084c",
|
||||
"ShadowBroker_0.9.82_x64-setup.exe": "0eb9f2bda02ab691b39687641abc97e6bfb507b42f48de21970ad7dfb4ea15fc",
|
||||
"ShadowBroker_0.9.82_x64_en-US.msi": "ced08f930171c0c08009a958cc30b0171a09f982230fc217c6808c2ed7ab2e30"
|
||||
},
|
||||
"v0.9.83": {
|
||||
"ShadowBroker_v0.9.83.zip": "53f56631731ad3cdc7be68df09bedd6570ed91ecda6fa57c39651098e15666c7",
|
||||
"ShadowBroker_0.9.83_x64-setup.exe": "d62170af4b9df0b190832b7bb3ad6bfe8a7ac01472f2c7b39cf2a1b61edc7492",
|
||||
"ShadowBroker_0.9.83_x64_en-US.msi": "b664cc0003a29f7ce88b04c2b425643dbe7ed897342fc6e9a2378bc1910c6850"
|
||||
}
|
||||
}
|
||||
+105
-1
@@ -1,4 +1,108 @@
|
||||
"""Rate-limit key function for slowapi.
|
||||
|
||||
Issue #287 (tg12): the previous implementation used
|
||||
``slowapi.util.get_remote_address`` which only ever returns
|
||||
``request.client.host``. Behind the bundled Next.js proxy (or any other
|
||||
reverse proxy), every connected operator's ``client.host`` is the
|
||||
frontend container's bridge IP. ``@limiter.limit("120/minute")`` then
|
||||
collapses into one shared bucket for everybody on the same backend —
|
||||
one heavy tab can starve every other operator on the node.
|
||||
|
||||
This module replaces that key function with one that:
|
||||
|
||||
* Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted
|
||||
frontend container (same allowlist used by the Docker bridge
|
||||
local-operator trust path — see ``backend/auth.py`` ``#250``).
|
||||
* Picks the FIRST entry in the XFF chain. That's the client end of
|
||||
the proxy chain, which is the operator we want to bucket on.
|
||||
* Falls back to ``request.client.host`` for any peer that isn't on
|
||||
the trusted-frontend allowlist. Direct hits, unrelated containers,
|
||||
and unknown hosts are bucketed exactly like before — there is no
|
||||
way for an untrusted caller to spoof XFF and steal another
|
||||
operator's rate-limit bucket.
|
||||
|
||||
Single-operator nodes are unaffected: the frontend resolves to one IP,
|
||||
that IP is on the trust list, the XFF header is read, and you get one
|
||||
bucket per operator (i.e. you).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
def _client_host(request: Any) -> str:
|
||||
"""Return the immediate peer's IP, normalised to a lowercase string."""
|
||||
client = getattr(request, "client", None)
|
||||
if client is None:
|
||||
return ""
|
||||
host = getattr(client, "host", "") or ""
|
||||
return host.lower()
|
||||
|
||||
|
||||
def _first_forwarded_for(value: str) -> str:
|
||||
"""Return the first non-empty entry from an ``X-Forwarded-For`` header.
|
||||
|
||||
RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The
|
||||
client end is what we want to bucket on. Empty parts (which appear
|
||||
in some malformed headers) are skipped so we don't end up keying on
|
||||
an empty string.
|
||||
"""
|
||||
for raw in value.split(","):
|
||||
candidate = raw.strip()
|
||||
if candidate:
|
||||
return candidate.lower()
|
||||
return ""
|
||||
|
||||
|
||||
def _is_trusted_frontend_peer(host: str) -> bool:
|
||||
"""True iff ``host`` is one of the resolved trusted-frontend IPs.
|
||||
|
||||
Imported lazily so this module stays usable in unit tests that
|
||||
don't want to pull the whole auth module into scope.
|
||||
"""
|
||||
if not host:
|
||||
return False
|
||||
try:
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
try:
|
||||
trusted_ips = _resolve_trusted_bridge_ips()
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
return host in trusted_ips
|
||||
|
||||
|
||||
def shadowbroker_rate_limit_key(request: Any) -> str:
|
||||
"""slowapi key_func that is proxy-aware on trusted frontend peers only.
|
||||
|
||||
Behaviour matrix:
|
||||
|
||||
* Direct loopback / unknown peer → ``request.client.host``
|
||||
(identical to slowapi's default ``get_remote_address``).
|
||||
* Peer is a trusted frontend container AND ``X-Forwarded-For`` is
|
||||
present → first XFF entry (the actual operator).
|
||||
* Peer is a trusted frontend container but no XFF → fall back to
|
||||
``request.client.host`` (the bridge IP). One shared bucket for
|
||||
everyone in that case, same as before — but you only get there
|
||||
if the trusted frontend forgot to forward XFF, which it won't.
|
||||
"""
|
||||
peer = _client_host(request)
|
||||
if _is_trusted_frontend_peer(peer):
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is not None:
|
||||
xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For")
|
||||
if xff:
|
||||
first = _first_forwarded_for(xff)
|
||||
if first:
|
||||
return first
|
||||
# Untrusted peer (or trusted peer without XFF): match the original
|
||||
# get_remote_address behaviour byte-for-byte.
|
||||
return get_remote_address(request)
|
||||
|
||||
|
||||
limiter = Limiter(key_func=shadowbroker_rate_limit_key)
|
||||
|
||||
+1046
-590
File diff suppressed because it is too large
Load Diff
+20
-7
@@ -7,16 +7,15 @@ py-modules = []
|
||||
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.9.79"
|
||||
version = "0.9.83"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"apscheduler==3.10.3",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
"cachetools==5.5.2",
|
||||
"cloudscraper==1.2.71",
|
||||
"cryptography>=41.0.0",
|
||||
"cryptography>=46.0.7",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.115.12",
|
||||
"fastapi==0.136.3",
|
||||
"feedparser==6.0.10",
|
||||
"httpx==0.28.1",
|
||||
"playwright==1.59.0",
|
||||
@@ -25,26 +24,40 @@ dependencies = [
|
||||
"pydantic-settings==2.8.1",
|
||||
"pystac-client==0.8.6",
|
||||
"python-dotenv==1.2.2",
|
||||
"requests==2.31.0",
|
||||
"requests==2.33.0",
|
||||
"PySocks==1.7.1",
|
||||
"reverse-geocoder==1.5.1",
|
||||
"sgp4==2.25",
|
||||
"meshtastic>=2.5.0",
|
||||
"networkx>=3.4.0",
|
||||
"numpy>=2.2.0",
|
||||
"orjson>=3.10.0",
|
||||
"paho-mqtt>=1.6.0,<2.0.0",
|
||||
"PyNaCl>=1.5.0",
|
||||
"slowapi==0.1.9",
|
||||
"starlette==1.0.1",
|
||||
"vaderSentiment>=3.3.0",
|
||||
"uvicorn==0.34.0",
|
||||
"yfinance==1.3.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
road-corridor = [
|
||||
"geopandas>=1.0.0",
|
||||
"imageio>=2.34.0",
|
||||
"osmnx>=2.0.0",
|
||||
"rasterio>=1.4.0",
|
||||
"scikit-learn>=1.5.0",
|
||||
"sentinelhub>=3.10.0",
|
||||
"shapely>=2.0.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["pytest>=8.3.4", "pytest-asyncio==0.25.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
dev = ["pytest>=9.0.3", "pytest-asyncio>=1.4.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
# The current backend carries historical style debt in large legacy modules.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.79 release.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.82 release.
|
||||
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
||||
|
||||
[tool.black]
|
||||
|
||||
@@ -82,9 +82,40 @@ async def api_get_keys_meta(request: Request):
|
||||
return get_env_path_info()
|
||||
|
||||
|
||||
@router.get("/api/settings/news-feeds")
|
||||
@router.get(
|
||||
"/api/settings/operator-handle",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("60/minute")
|
||||
async def api_get_operator_handle(request: Request):
|
||||
"""Round 7a: return the per-install operator handle so the frontend
|
||||
can include it in browser-direct third-party API calls (Wikipedia /
|
||||
Wikidata via lib/wikimediaClient). The handle is auto-generated on
|
||||
first use; operators can override it via the OPERATOR_HANDLE setting
|
||||
or the env var of the same name.
|
||||
|
||||
Gated on local-operator: legitimate browser usage goes through the
|
||||
Next.js proxy which auto-attaches the admin key; remote scanners get
|
||||
403. The handle itself isn't a secret (it's sent to every third-party
|
||||
API the operator touches), but admin-gating it matches the rest of
|
||||
the settings endpoints and follows least-privilege.
|
||||
"""
|
||||
from services.network_utils import get_operator_handle
|
||||
return {"handle": get_operator_handle()}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_news_feeds(request: Request):
|
||||
"""Issue #252 (tg12): the curated feed inventory is configuration
|
||||
state, not a public data feed. Gated on local-operator so the
|
||||
Tauri shell, the Docker bridge frontend, and any caller with an
|
||||
admin key all see the full list; anonymous LAN/internet callers
|
||||
can no longer enumerate operator source URLs.
|
||||
"""
|
||||
from services.news_feed_config import get_feeds
|
||||
return get_feeds()
|
||||
|
||||
@@ -118,9 +149,18 @@ async def api_reset_news_feeds(request: Request):
|
||||
@router.get("/api/settings/node")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_node_settings(request: Request):
|
||||
"""Issue #243 (tg12): node_mode and node_enabled are operational
|
||||
posture. Anonymous callers receive an empty stub; authenticated
|
||||
callers (local-operator or admin/scoped token) see the full
|
||||
state. See the canonical handler in backend/main.py for the full
|
||||
rationale.
|
||||
"""
|
||||
import asyncio
|
||||
from auth import _scoped_view_authenticated
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
if not _scoped_view_authenticated(request, "node"):
|
||||
return {}
|
||||
return {
|
||||
**data,
|
||||
"node_mode": _current_node_mode(),
|
||||
@@ -210,9 +250,19 @@ async def api_set_meshtastic_mqtt_settings(request: Request, body: MeshtasticMqt
|
||||
return _meshtastic_runtime_snapshot()
|
||||
|
||||
|
||||
@router.get("/api/settings/timemachine")
|
||||
@router.get(
|
||||
"/api/settings/timemachine",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_timemachine_settings(request: Request):
|
||||
"""Issue #253 (tg12): archival-capture posture is operationally
|
||||
sensitive — it tells a remote caller whether this deployment is
|
||||
retaining replayable historical surveillance data. Gated on
|
||||
local-operator so the Tauri shell and Docker bridge frontend
|
||||
still see the toggle state, but anonymous LAN/internet callers
|
||||
can no longer fingerprint Time Machine state.
|
||||
"""
|
||||
import asyncio
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
|
||||
@@ -0,0 +1,230 @@
|
||||
"""Local-operator PTY WebSocket for the Mesh Chat agent shell."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import fcntl
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pty
|
||||
import select
|
||||
import signal
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, WebSocket, WebSocketDisconnect
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import (
|
||||
_current_admin_key,
|
||||
_debug_mode_enabled,
|
||||
_is_trusted_local_runtime_host,
|
||||
require_local_operator,
|
||||
)
|
||||
from services.agent_shell_settings import (
|
||||
get_agent_shell_settings,
|
||||
set_agent_shell_working_directory,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["agent-shell"])
|
||||
|
||||
|
||||
class AgentShellSettingsUpdate(BaseModel):
|
||||
working_directory: str = Field(min_length=1)
|
||||
|
||||
|
||||
def _set_winsize(fd: int, rows: int, cols: int) -> None:
|
||||
winsize = struct.pack("HHHH", rows, cols, 0, 0)
|
||||
fcntl.ioctl(fd, termios.TIOCSWINSZ, winsize)
|
||||
|
||||
|
||||
def _published_local_dashboard_ws(ws: WebSocket) -> bool:
|
||||
"""Browser → published Docker port appears as a bridge IP, not loopback.
|
||||
|
||||
For the operator shell only, also accept when the upgrade request clearly
|
||||
targets the local dashboard (Host/Origin on localhost).
|
||||
"""
|
||||
host_header = str(ws.headers.get("host") or "").strip().lower()
|
||||
host_name = host_header.split(":", 1)[0]
|
||||
if host_name in {"127.0.0.1", "localhost", "::1"}:
|
||||
return True
|
||||
|
||||
origin = str(ws.headers.get("origin") or "").strip().lower()
|
||||
if origin.startswith("http://127.0.0.1:") or origin.startswith("http://localhost:"):
|
||||
return True
|
||||
if origin.startswith("https://127.0.0.1:") or origin.startswith("https://localhost:"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def _authorize_agent_shell_ws(ws: WebSocket, admin_key_query: str = "") -> None:
|
||||
host = (ws.client.host or "").lower() if ws.client else ""
|
||||
if (
|
||||
_is_trusted_local_runtime_host(host)
|
||||
or _published_local_dashboard_ws(ws)
|
||||
or (_debug_mode_enabled() and host == "test")
|
||||
):
|
||||
return
|
||||
admin_key = _current_admin_key()
|
||||
presented = str(admin_key_query or ws.headers.get("x-admin-key", "") or "").strip()
|
||||
if admin_key and presented and hmac.compare_digest(presented.encode(), admin_key.encode()):
|
||||
return
|
||||
await ws.close(code=4403, reason="local operator access only")
|
||||
raise WebSocketDisconnect()
|
||||
|
||||
|
||||
def _resolve_shell_cwd(requested: str) -> str:
|
||||
requested = str(requested or "").strip()
|
||||
if requested:
|
||||
resolved = os.path.abspath(os.path.expanduser(requested))
|
||||
if os.path.isdir(resolved):
|
||||
return resolved
|
||||
return get_agent_shell_settings()["working_directory"]
|
||||
|
||||
|
||||
def _default_shell() -> str:
|
||||
if sys.platform == "win32":
|
||||
return os.environ.get("COMSPEC", "cmd.exe")
|
||||
return os.environ.get("SHELL", "/bin/bash")
|
||||
|
||||
|
||||
async def _relay_pty(master_fd: int, proc: asyncio.subprocess.Process, ws: WebSocket) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
while True:
|
||||
if proc.returncode is not None:
|
||||
break
|
||||
try:
|
||||
readable, _, _ = await loop.run_in_executor(
|
||||
None, lambda: select.select([master_fd], [], [], 0.05)
|
||||
)
|
||||
except Exception:
|
||||
break
|
||||
if master_fd in readable:
|
||||
try:
|
||||
chunk = os.read(master_fd, 4096)
|
||||
except OSError:
|
||||
break
|
||||
if not chunk:
|
||||
break
|
||||
await ws.send_bytes(chunk)
|
||||
try:
|
||||
message = await asyncio.wait_for(ws.receive(), timeout=0.05)
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
if message.get("type") == "websocket.disconnect":
|
||||
break
|
||||
if message.get("type") != "websocket.receive":
|
||||
continue
|
||||
if message.get("bytes"):
|
||||
os.write(master_fd, message["bytes"])
|
||||
continue
|
||||
text = message.get("text")
|
||||
if not text:
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
os.write(master_fd, text.encode("utf-8", errors="replace"))
|
||||
continue
|
||||
if payload.get("type") == "resize":
|
||||
rows = int(payload.get("rows") or 24)
|
||||
cols = int(payload.get("cols") or 80)
|
||||
_set_winsize(master_fd, max(rows, 2), max(cols, 2))
|
||||
|
||||
|
||||
@router.get("/api/agent-shell/settings", dependencies=[Depends(require_local_operator)])
|
||||
async def read_agent_shell_settings() -> dict[str, Any]:
|
||||
return get_agent_shell_settings()
|
||||
|
||||
|
||||
@router.put("/api/agent-shell/settings", dependencies=[Depends(require_local_operator)])
|
||||
async def write_agent_shell_settings(body: AgentShellSettingsUpdate) -> dict[str, Any]:
|
||||
try:
|
||||
return set_agent_shell_working_directory(body.working_directory)
|
||||
except ValueError as exc:
|
||||
detail = str(exc)
|
||||
if detail == "working_directory_not_found":
|
||||
raise HTTPException(status_code=400, detail="Working directory does not exist") from exc
|
||||
raise HTTPException(status_code=400, detail="Working directory is required") from exc
|
||||
|
||||
|
||||
@router.websocket("/api/agent-shell/ws")
|
||||
async def agent_shell_websocket(
|
||||
ws: WebSocket,
|
||||
cwd: str = Query(default=""),
|
||||
cols: int = Query(default=80),
|
||||
rows: int = Query(default=24),
|
||||
admin_key: str = Query(default=""),
|
||||
) -> None:
|
||||
await ws.accept()
|
||||
try:
|
||||
await _authorize_agent_shell_ws(ws, admin_key)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
|
||||
if sys.platform == "win32":
|
||||
await ws.send_text(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Host PTY is not available on Windows backend builds yet. Use the ShadowBroker desktop app or run the backend in Docker/Linux for an embedded shell.",
|
||||
}
|
||||
)
|
||||
)
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
shell_cwd = _resolve_shell_cwd(cwd)
|
||||
shell = _default_shell()
|
||||
master_fd, slave_fd = pty.openpty()
|
||||
_set_winsize(master_fd, max(rows, 2), max(cols, 2))
|
||||
|
||||
env = os.environ.copy()
|
||||
env.setdefault("TERM", "xterm-256color")
|
||||
env.setdefault("COLORTERM", "truecolor")
|
||||
home = shell_cwd if os.path.isdir(shell_cwd) else "/app"
|
||||
env["HOME"] = home
|
||||
env["USER"] = env.get("USER") or "operator"
|
||||
path_prefixes = [
|
||||
os.path.join(home, ".local", "bin"),
|
||||
os.path.join(home, ".hermes", "bin"),
|
||||
]
|
||||
path = env.get("PATH", "")
|
||||
for prefix in path_prefixes:
|
||||
if os.path.isdir(prefix):
|
||||
path = f"{prefix}:{path}" if path else prefix
|
||||
env["PATH"] = path
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
shell,
|
||||
stdin=slave_fd,
|
||||
stdout=slave_fd,
|
||||
stderr=slave_fd,
|
||||
cwd=shell_cwd,
|
||||
env=env,
|
||||
preexec_fn=os.setsid,
|
||||
)
|
||||
os.close(slave_fd)
|
||||
|
||||
try:
|
||||
await _relay_pty(master_fd, proc, ws)
|
||||
finally:
|
||||
try:
|
||||
os.close(master_fd)
|
||||
except OSError:
|
||||
pass
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGHUP)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=2.0)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
+298
-51
@@ -18,6 +18,12 @@ from auth import require_local_operator, require_openclaw_or_local
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import latest_data as _latest_data
|
||||
|
||||
|
||||
|
||||
def _ai_intel_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("ai-intel")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
@@ -447,7 +453,7 @@ async def ai_satellite_images(
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=10,
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (ai-intel)"},
|
||||
headers={"User-Agent": _ai_intel_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
features = resp.json().get("features", [])
|
||||
@@ -1584,7 +1590,7 @@ async def agent_tool_manifest(request: Request):
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.79",
|
||||
"version": "0.9.82",
|
||||
"access_tier": access_tier,
|
||||
"available_commands": available_commands,
|
||||
"transport": {
|
||||
@@ -1699,11 +1705,12 @@ async def agent_tool_manifest(request: Request):
|
||||
{
|
||||
"name": "search_news",
|
||||
"type": "read",
|
||||
"description": "Search news and event layers server-side by keyword. Includes news, GDELT, CrowdThreat, and major incident/event feeds without pulling the full slow telemetry feed.",
|
||||
"description": "Search news and event layers server-side by keyword. Includes news, GDELT, CrowdThreat, Telegram OSINT, and major incident/event feeds without pulling the full slow telemetry feed.",
|
||||
"parameters": {
|
||||
"query": {"type": "string", "required": True, "description": "Keyword or phrase to search for"},
|
||||
"limit": {"type": "integer", "required": False, "description": "Max results (default 10, max 50)"},
|
||||
"include_gdelt": {"type": "boolean", "required": False, "description": "Include GDELT matches (default true)"},
|
||||
"include_telegram": {"type": "boolean", "required": False, "description": "Include Telegram OSINT channel posts (default true)"},
|
||||
"compact": {"type": "boolean", "required": False, "description": "If true, strips empty/None fields from each result and rounds lat/lng to 3 decimals. Response includes format: 'compressed_v1'."},
|
||||
},
|
||||
"returns": "{results: [{source_layer, title, summary, source, link, lat, lng, risk_score}], version: int, truncated: bool}",
|
||||
@@ -1737,6 +1744,55 @@ async def agent_tool_manifest(request: Request):
|
||||
},
|
||||
"returns": "{center, radius_km, nearby, topic_news, context_layers}",
|
||||
},
|
||||
{
|
||||
"name": "osint_lookup",
|
||||
"type": "read",
|
||||
"description": "Run a passive OSINT recon lookup server-side (same backends as the Recon panel). SSRF-guarded outbound proxies for IP geolocation, DNS, WHOIS, certs, BGP/ASN, sanctions, CVE, MAC vendor, GitHub profile, breach checks, and threat feeds.",
|
||||
"parameters": {
|
||||
"tool": {"type": "string", "required": True, "description": "Lookup type: ip, dns, whois, certs, threats, bgp, sanctions, cve, mac, github, leaks, sweep_init"},
|
||||
"ip": {"type": "string", "required": False, "description": "IPv4/IPv6 for ip or sweep_init"},
|
||||
"domain": {"type": "string", "required": False, "description": "Domain for dns, whois, certs"},
|
||||
"query": {"type": "string", "required": False, "description": "Generic query (BGP ASN, sanctions name, optional threats filter)"},
|
||||
"cve": {"type": "string", "required": False, "description": "CVE id for cve lookup"},
|
||||
"mac": {"type": "string", "required": False, "description": "MAC address for mac lookup"},
|
||||
"username": {"type": "string", "required": False, "description": "GitHub username"},
|
||||
"email": {"type": "string", "required": False, "description": "Email for breach/leak lookup"},
|
||||
"schema": {"type": "string", "required": False, "description": "Sanctions schema filter: Person, Organization, Company, Vessel, Airplane, LegalEntity"},
|
||||
"limit": {"type": "integer", "required": False, "description": "Sanctions result cap (default 25, max 100)"},
|
||||
"cidr": {"type": "integer", "required": False, "description": "CIDR mask for sweep_init (24-32, default 24)"},
|
||||
},
|
||||
"returns": "Tool-specific JSON (geo, DNS records, WHOIS, sanctions hits, CVE details, etc.)",
|
||||
},
|
||||
{
|
||||
"name": "osint_tools",
|
||||
"type": "read",
|
||||
"description": "List available OSINT recon tools, entity-expand types, and sanctions schemas.",
|
||||
"parameters": {},
|
||||
"returns": "{tools: [...], entity_types: [...], sanctions_schemas: [...], notes: {...}}",
|
||||
},
|
||||
{
|
||||
"name": "entity_expand",
|
||||
"type": "read",
|
||||
"description": "Expand an entity relationship graph around an aircraft, vessel, IP, company, person, or country. Same backend as /api/entity/expand.",
|
||||
"parameters": {
|
||||
"type": {"type": "string", "required": True, "description": "Entity type: aircraft, vessel, company, person, ip, country"},
|
||||
"id": {"type": "string", "required": True, "description": "Entity identifier (tail number, MMSI, IP, company name, etc.)"},
|
||||
"registration": {"type": "string", "required": False, "description": "Aircraft registration hint"},
|
||||
"model": {"type": "string", "required": False, "description": "Aircraft model hint"},
|
||||
"icao24": {"type": "string", "required": False, "description": "ICAO24 hex for aircraft"},
|
||||
},
|
||||
"returns": "{nodes: [...], links: [...]}",
|
||||
},
|
||||
{
|
||||
"name": "osint_sweep",
|
||||
"type": "write",
|
||||
"description": "Active subnet device discovery via Shodan InternetDB (ports, vulns, hostnames). Requires full OpenClaw access tier. Private/reserved IPs blocked.",
|
||||
"parameters": {
|
||||
"ip": {"type": "string", "required": True, "description": "Public IPv4 anchor for the sweep"},
|
||||
"cidr": {"type": "integer", "required": False, "description": "Subnet size /24-/32 (default 24)"},
|
||||
},
|
||||
"returns": "{center, target_ip, cidr, subnet, devices, summary, sweep_time_ms}",
|
||||
},
|
||||
{
|
||||
"name": "what_changed",
|
||||
"type": "read",
|
||||
@@ -1995,7 +2051,7 @@ async def agent_tool_manifest(request: Request):
|
||||
"description": "Set up a watchdog alert. When triggered, alerts push instantly via SSE stream. Debounced: same watch won't re-fire within 60 seconds.",
|
||||
"parameters": {
|
||||
"type": {"type": "string", "required": True, "description": "Watch type",
|
||||
"enum": ["track_aircraft", "track_callsign", "track_registration", "track_ship", "track_entity", "geofence", "keyword", "prediction_market"]},
|
||||
"enum": ["track_aircraft", "track_callsign", "track_registration", "track_ship", "track_entity", "geofence", "keyword", "telegram_rhetoric", "prediction_market"]},
|
||||
"params": {"type": "object", "required": True, "description": "Type-specific parameters (see subtypes)"},
|
||||
},
|
||||
"subtypes": {
|
||||
@@ -2005,7 +2061,8 @@ async def agent_tool_manifest(request: Request):
|
||||
"track_ship": {"params": {"mmsi": "string (optional)", "imo": "string (optional)", "name": "string (optional)", "owner": "string (optional)", "callsign": "string (optional)"}, "description": "Alert when ship appears by MMSI, IMO, name, owner, or callsign"},
|
||||
"track_entity": {"params": {"query": "string", "entity_type": "string (optional)", "layers": "list (optional)"}, "description": "Generic exact-first entity tracker when aircraft/ship fields are not known yet"},
|
||||
"geofence": {"params": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list (default ['flights','ships'])"}, "description": "Alert when any entity enters a geographic zone"},
|
||||
"keyword": {"params": {"keyword": "string"}, "description": "Alert when keyword appears in news/GDELT headlines"},
|
||||
"keyword": {"params": {"keyword": "string", "include_telegram": "boolean (default true)"}, "description": "Alert when keyword appears in news, GDELT, or Telegram OSINT (searches translated + original text)"},
|
||||
"telegram_rhetoric": {"params": {"min_risk_score": "int 1-10 (default 7)", "keywords": "list or comma-separated string (optional)", "channels": "list or comma-separated string (optional)"}, "description": "Alert on new high-risk Telegram OSINT posts — rhetoric/escalation monitor"},
|
||||
"prediction_market": {"params": {"query": "string", "threshold": "float 0-1 (optional)"}, "description": "Alert on prediction market movements matching query"},
|
||||
},
|
||||
"example": {"cmd": "add_watch", "args": {"type": "track_registration", "params": {"registration": "N3880"}}},
|
||||
@@ -2188,6 +2245,11 @@ async def agent_tool_manifest(request: Request):
|
||||
"Prefer compact lookups first: search_telemetry, find_flights, find_ships, search_news, entities_near, get_layer_slice. Use get_telemetry/get_slow_telemetry/get_report only when focused commands are insufficient.",
|
||||
"ShadowBroker does expose UAP sightings, wastewater, and tracked_flights/VIP aircraft when those layers are populated. Verify with get_summary or get_layer_slice before claiming a layer is absent.",
|
||||
"ShadowBroker also exposes fishing_activity, which is the fishing-vessel activity layer backed by Global Fishing Watch data when GFW_API_TOKEN is configured. Do not confuse it with the AIS ships layer.",
|
||||
"telegram_osint, malware_threats, cyber_threats, and scm_suppliers are live map layers. Use get_summary or get_layer_slice(['telegram_osint']) before claiming they are absent. Aliases: telegram, malware/botnet, cyber/cisa/kev, scm/suppliers.",
|
||||
"search_telemetry and search_news both index Telegram OSINT posts. For malware C2, botnet IPs, CISA KEV CVEs, or semiconductor suppliers, use search_telemetry or get_layer_slice on the matching layer.",
|
||||
"The Recon toolkit is available via osint_lookup: IP geolocation, DNS, WHOIS, certs, BGP, sanctions, CVE, MAC vendor, GitHub, breach checks, threat feeds. Call osint_tools first to list supported tools.",
|
||||
"entity_expand builds relationship graphs for aircraft, vessels, IPs, companies, people, and countries — use after resolving an entity from telemetry or osint_lookup.",
|
||||
"osint_sweep runs active subnet discovery (Shodan InternetDB) and requires full OpenClaw access tier. Use osint_lookup tool=sweep_init for passive geolocation context only.",
|
||||
"Use search_telemetry as the Google-style entry point whenever the user gives you a person, place, company, topic, owner, nickname, or natural-language phrase and you do not already know the source layer.",
|
||||
"Example: for 'Where is Jerry Jones yacht?' search 'Jerry Jones' across all telemetry first, identify the ship match, then refine with find_ships or raw layer context only if needed.",
|
||||
"For fuzzy natural-language lookups like 'Patriots jet' or 'Jerry Jones yacht', use search_telemetry first and inspect the ranked candidate list before making a hard claim.",
|
||||
@@ -2215,12 +2277,14 @@ async def agent_tool_manifest(request: Request):
|
||||
async def api_capabilities(request: Request):
|
||||
"""Return full API manifest so the agent knows every available endpoint."""
|
||||
from services.openclaw_channel import READ_COMMANDS, WRITE_COMMANDS, detect_tier
|
||||
from services.openclaw_routing import routing_manifest
|
||||
from services.config import get_settings
|
||||
tier = detect_tier()
|
||||
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.79",
|
||||
"version": "0.9.82",
|
||||
"routing": routing_manifest(),
|
||||
"auth": {
|
||||
"method": "HMAC-SHA256",
|
||||
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
||||
@@ -2336,8 +2400,16 @@ async def api_capabilities(request: Request):
|
||||
"description": "Compact server-side ship search by MMSI/IMO/name/query, including yacht-owner enrichment.",
|
||||
},
|
||||
"find_entity": {
|
||||
"args": {"query": "str (optional)", "entity_type": "aircraft|ship|person|event|infrastructure (optional)", "callsign": "str (optional)", "registration": "str (optional)", "icao24": "str (optional)", "mmsi": "str (optional)", "imo": "str (optional)", "name": "str (optional)", "owner": "str (optional)", "layers": "list[str] (optional)", "limit": "int (default 10)"},
|
||||
"description": "Exact-first resolver for planes, ships, operators, callsigns, registrations, MMSI/IMO, and named entities. Use before tracking to avoid fuzzy prompt matching.",
|
||||
"args": {"query": "str (optional)", "entity_type": "aircraft|ship|person|event|infrastructure (optional)", "callsign": "str (optional)", "registration": "str (optional)", "icao24": "str (optional)", "mmsi": "str (optional)", "imo": "str (optional)", "name": "str (optional)", "owner": "str (optional)", "layers": "list[str] (optional)", "limit": "int (default 10)", "fallback_search": "bool (default false)", "confirm_fuzzy": "bool (alias for fallback_search)"},
|
||||
"description": "Exact-first resolver for planes, ships, operators, callsigns, registrations, MMSI/IMO, and named entities. Skips fuzzy search unless fallback_search=true or no exact match.",
|
||||
},
|
||||
"route_query": {
|
||||
"args": {"text": "str", "lat": "float (optional)", "lng": "float (optional)", "radius_km": "float (default 50)", "compact": "bool (default true)"},
|
||||
"description": "Deterministic intent router — returns recommended fast command, alternates, and latency estimate. Preferred entry for natural-language reads.",
|
||||
},
|
||||
"run_playbook": {
|
||||
"args": {"name": "str", "query": "str (optional)", "lat": "float (optional)", "lng": "float (optional)"},
|
||||
"description": "Execute a named batch plan (hot_snapshot, morning_brief, monitor_heartbeat, track_snapshot, area_brief, entity_recon).",
|
||||
},
|
||||
"correlate_entity": {
|
||||
"args": {"query": "str (optional)", "entity_type": "str (optional)", "callsign": "str (optional)", "registration": "str (optional)", "icao24": "str (optional)", "mmsi": "str (optional)", "imo": "str (optional)", "name": "str (optional)", "owner": "str (optional)", "radius_km": "float (default 100)", "limit": "int (default 10)"},
|
||||
@@ -2348,13 +2420,29 @@ async def api_capabilities(request: Request):
|
||||
"description": "Universal compact search across telemetry when the entity type or source layer is not obvious.",
|
||||
},
|
||||
"search_news": {
|
||||
"args": {"query": "str", "limit": "int (default 10)", "include_gdelt": "bool (default true)"},
|
||||
"description": "Search news and event layers by keyword without pulling the whole slow feed.",
|
||||
"args": {"query": "str", "limit": "int (default 10)", "include_gdelt": "bool (default true)", "include_telegram": "bool (default true)"},
|
||||
"description": "Search news and event layers by keyword without pulling the whole slow feed. Includes Telegram OSINT when include_telegram is true.",
|
||||
},
|
||||
"entities_near": {
|
||||
"args": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list[str] (optional)", "limit": "int (default 25)"},
|
||||
"description": "Compact proximity search around a point across selected layers.",
|
||||
},
|
||||
"osint_lookup": {
|
||||
"args": {"tool": "str (ip|dns|whois|certs|threats|bgp|sanctions|cve|mac|github|leaks|sweep_init)", "...": "tool-specific params"},
|
||||
"description": "Passive OSINT recon lookup — same backends as the Recon panel.",
|
||||
},
|
||||
"osint_tools": {
|
||||
"args": {},
|
||||
"description": "List available recon tools and entity-expand types.",
|
||||
},
|
||||
"entity_expand": {
|
||||
"args": {"type": "str", "id": "str", "registration": "str (optional)", "icao24": "str (optional)"},
|
||||
"description": "Entity relationship graph expansion.",
|
||||
},
|
||||
"osint_sweep": {
|
||||
"args": {"ip": "str", "cidr": "int (default 24)"},
|
||||
"description": "Active subnet scan — requires full access tier.",
|
||||
},
|
||||
"brief_area": {
|
||||
"args": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list[str] (optional)", "query": "str (optional)", "limit": "int (default 25)", "context_limit": "int (default 10)"},
|
||||
"description": "One compact area brief: nearby aircraft/ships/entities, optional topic news, and selected context layers.",
|
||||
@@ -2477,7 +2565,8 @@ async def api_capabilities(request: Request):
|
||||
"track_ship": {"params": {"mmsi": "str (optional)", "imo": "str (optional)", "name": "str (optional)", "owner": "str (optional)", "callsign": "str (optional)"}, "description": "Alert when ship appears by MMSI, IMO, name, owner, or callsign"},
|
||||
"track_entity": {"params": {"query": "str", "entity_type": "str (optional)", "layers": "list[str] (optional)"}, "description": "Generic exact-first entity watch"},
|
||||
"geofence": {"params": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list (default ['flights','ships'])"}, "description": "Alert when any entity enters a geographic zone"},
|
||||
"keyword": {"params": {"keyword": "str"}, "description": "Alert when keyword appears in news/GDELT"},
|
||||
"keyword": {"params": {"keyword": "str", "include_telegram": "bool (default true)"}, "description": "Alert when keyword appears in news, GDELT, or Telegram OSINT"},
|
||||
"telegram_rhetoric": {"params": {"min_risk_score": "int 1-10 (default 7)", "keywords": "list[str] or comma string (optional)", "channels": "list[str] or comma string (optional)"}, "description": "Alert on new high-risk Telegram OSINT posts"},
|
||||
"prediction_market": {"params": {"query": "str", "threshold": "float 0-1 (optional)"}, "description": "Alert on prediction market movements"},
|
||||
},
|
||||
},
|
||||
@@ -2501,7 +2590,8 @@ async def api_capabilities(request: Request):
|
||||
"layers are serialized, unchanged layers transfer zero bytes. The client tracks versions "
|
||||
"automatically from SSE events and previous responses. "
|
||||
"3) Pass compact=true on every read command for compressed_v1 responses (~60-90% smaller). "
|
||||
"4) Use targeted commands first (find_flights, search_telemetry, entities_near). "
|
||||
"4) Use route_query / find_entity / run_playbook before search_telemetry. "
|
||||
"Expensive commands require confirm_expensive=true. "
|
||||
"Reserve get_telemetry/get_slow_telemetry for rare full-context pulls.",
|
||||
"pins": "Pins are server-side, NOT localStorage. Use place_pin command or POST /api/ai/pins. The agent can place and delete pins.",
|
||||
"tracking": "To track a specific aircraft without polling: use add_watch with track_callsign or track_registration. Over SSE, you'll get instant push alerts.",
|
||||
@@ -2515,45 +2605,85 @@ async def api_capabilities(request: Request):
|
||||
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
||||
# These endpoints manage the HMAC secret itself, so they MUST require
|
||||
# local operator access to prevent privilege escalation.
|
||||
#
|
||||
# Issue #302 (tg12): pre-fix, GET /api/ai/connect-info had two problems:
|
||||
#
|
||||
# 1. ``?reveal=true`` made the full secret travel through every operator
|
||||
# page-load that opened the Connect modal. Even gated to
|
||||
# ``require_local_operator``, that put the secret into browser
|
||||
# history, dev-tools network panels, browser disk caches, HAR
|
||||
# exports, and screen captures. Every time the modal opened.
|
||||
#
|
||||
# 2. The same GET endpoint auto-bootstrapped (generated + persisted)
|
||||
# the secret on first read. Side effects on a GET are a footgun:
|
||||
# browser prefetchers, mirror tools, and casual curl-from-history
|
||||
# would all silently mint+persist a fresh secret. (Gated, but
|
||||
# still surprising — and noisy in the audit log.)
|
||||
#
|
||||
# Resolution:
|
||||
#
|
||||
# GET /api/ai/connect-info — always returns the MASKED
|
||||
# secret. No ?reveal param.
|
||||
# No auto-bootstrap; if the
|
||||
# secret is missing,
|
||||
# ``hmac_secret_set: false``
|
||||
# tells the frontend to call
|
||||
# /bootstrap.
|
||||
#
|
||||
# POST /api/ai/connect-info/bootstrap — NEW. Generates + persists the
|
||||
# secret if missing. Idempotent.
|
||||
# Returns metadata only, never
|
||||
# the full secret.
|
||||
#
|
||||
# POST /api/ai/connect-info/reveal — NEW. Returns the full secret in
|
||||
# the body with strict
|
||||
# ``Cache-Control: no-store,
|
||||
# no-cache, must-revalidate``
|
||||
# + ``Pragma: no-cache`` so
|
||||
# it does not land in browser
|
||||
# caches. POST means it does
|
||||
# not land in URL history.
|
||||
#
|
||||
# POST /api/ai/connect-info/regenerate — keeps existing one-time-reveal
|
||||
# behavior (regenerate IS a
|
||||
# deliberate destructive action
|
||||
# the operator triggered, so
|
||||
# displaying the new secret
|
||||
# once is the only path that
|
||||
# makes the operation useful).
|
||||
# Same no-store headers added.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
# Cache-Control headers that should accompany every response carrying the
|
||||
# full HMAC secret. Reused across the reveal + regenerate endpoints so a
|
||||
# future refactor that splits or renames them can't forget the headers.
|
||||
_NO_STORE_HEADERS = {
|
||||
"Cache-Control": "no-store, no-cache, must-revalidate, private",
|
||||
"Pragma": "no-cache",
|
||||
"Expires": "0",
|
||||
}
|
||||
|
||||
The HMAC secret is masked by default. Pass ?reveal=true to see the full key.
|
||||
Private keys are NEVER returned.
|
||||
|
||||
def _mask_hmac_secret(secret: str) -> str:
|
||||
"""Return a fingerprint-style mask (first6 + bullets + last4) suitable
|
||||
for display in the UI before the operator clicks Reveal."""
|
||||
if not secret:
|
||||
return ""
|
||||
if len(secret) > 10:
|
||||
return secret[:6] + "••••••••" + secret[-4:]
|
||||
return "••••••••"
|
||||
|
||||
|
||||
def _connect_info_metadata(settings) -> dict:
|
||||
"""Return everything the Connect modal needs EXCEPT the secret itself.
|
||||
|
||||
Shared between GET /api/ai/connect-info (where the full secret is
|
||||
masked) and POST /api/ai/connect-info/bootstrap (where the operator
|
||||
just generated a secret but we don't return it inline — they have to
|
||||
call /reveal to see it).
|
||||
"""
|
||||
import os
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
|
||||
# Auto-generate if not set
|
||||
if not hmac_secret:
|
||||
hmac_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", hmac_secret)
|
||||
# Clear settings cache so next read picks up the new value
|
||||
get_settings.cache_clear()
|
||||
|
||||
masked = hmac_secret[:6] + "••••••••" + hmac_secret[-4:] if len(hmac_secret) > 10 else "••••••••"
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret if reveal else masked,
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": True,
|
||||
"auto_generated_this_call": not bool(settings.OPENCLAW_HMAC_SECRET or ""),
|
||||
"notes": [
|
||||
"If no HMAC secret exists yet, this endpoint bootstraps one and persists it to .env.",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
"access_tier": access_tier,
|
||||
"trust_model": {
|
||||
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
||||
@@ -2591,6 +2721,7 @@ async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"get_telemetry", "get_pins", "satellite_images",
|
||||
"news_near", "ai_summary", "ai_report",
|
||||
"timemachine_list", "timemachine_view",
|
||||
"infonet_status", "list_gates", "read_gate_messages", "poll_dms",
|
||||
],
|
||||
},
|
||||
"full": {
|
||||
@@ -2601,30 +2732,146 @@ async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"satellite_images", "news_near", "data_injection",
|
||||
"ai_summary", "ai_report", "timemachine_snapshot",
|
||||
"timemachine_list", "timemachine_view", "timemachine_diff",
|
||||
"ensure_infonet_ready", "join_infonet_swarm",
|
||||
"post_gate_message", "cast_vote", "send_dm",
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working."""
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
|
||||
The HMAC secret is always returned as a fingerprint mask
|
||||
(``first6 + bullets + last4``); the full value is only ever served by
|
||||
``POST /api/ai/connect-info/reveal`` (see #302). When the secret has
|
||||
not been bootstrapped yet, ``hmac_secret_set`` is false and the
|
||||
frontend should call ``POST /api/ai/connect-info/bootstrap``.
|
||||
|
||||
Private keys are NEVER returned.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": False,
|
||||
"notes": [
|
||||
"Call POST /api/ai/connect-info/bootstrap to mint a secret on first use.",
|
||||
"Call POST /api/ai/connect-info/reveal to see the full secret (no-store).",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
**_connect_info_metadata(settings),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/bootstrap", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def bootstrap_hmac_secret(request: Request):
|
||||
"""Mint and persist the OpenClaw HMAC secret if it isn't already set.
|
||||
|
||||
Idempotent: if a secret already exists, returns ``generated: false``
|
||||
and leaves the existing secret untouched. Never returns the secret
|
||||
value in the response body — the operator calls
|
||||
``POST /api/ai/connect-info/reveal`` to see it.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
existing = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if existing:
|
||||
return {
|
||||
"ok": True,
|
||||
"generated": False,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(existing),
|
||||
"detail": "HMAC secret already configured. Use /reveal to see it.",
|
||||
}
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
"generated": True,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret generated. Call /reveal to copy it into your OpenClaw config.",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/reveal", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def reveal_hmac_secret(request: Request):
|
||||
"""Return the full HMAC secret in the response body.
|
||||
|
||||
POST (not GET) so the secret never lands in URL history, access logs,
|
||||
or browser visit history. Strict ``Cache-Control: no-store`` headers
|
||||
prevent intermediaries from persisting the response. Returns 404 if
|
||||
no secret has been bootstrapped — the frontend should call
|
||||
``POST /api/ai/connect-info/bootstrap`` first.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if not hmac_secret:
|
||||
raise HTTPException(
|
||||
404,
|
||||
"No HMAC secret configured. Call POST /api/ai/connect-info/bootstrap first.",
|
||||
)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working.
|
||||
|
||||
Returns the new secret in the response body — this is the only
|
||||
operation where the full secret travels back through the response,
|
||||
because regenerating IS a deliberate destructive action the operator
|
||||
triggered and they need to see the new value once to update their
|
||||
OpenClaw configuration. Strict ``Cache-Control: no-store`` headers
|
||||
keep it from being persisted by browser caches, proxies, or HAR
|
||||
capture tooling.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def set_access_tier(request: Request, body: dict):
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
"""Strategic Risk Analytics API — game-theoretic early warning overlays."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from analytics.backtest import (
|
||||
DEFAULT_BACKTEST_ALERT_THRESHOLD,
|
||||
run_historical_backtest,
|
||||
tune_alert_threshold,
|
||||
)
|
||||
from analytics.feed_adapter import normalize_feed_item
|
||||
from analytics.integration import get_gt_engine, refresh_from_latest_data
|
||||
from analytics.gt_alerts import top_gt_alerts
|
||||
from analytics.micro_rolling import micro_rolling_report
|
||||
from analytics.rolling_backtest import (
|
||||
freeze_weekly_snapshot,
|
||||
label_region,
|
||||
label_regions,
|
||||
rolling_alert_threshold,
|
||||
rolling_report,
|
||||
score_week,
|
||||
)
|
||||
from analytics.weekly_store import load_week
|
||||
from analytics.settings import gt_analytics_enabled
|
||||
from services.fetchers._store import _data_lock, get_latest_data_subset_refs, latest_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class RiskHeatmapRequest(BaseModel):
|
||||
"""Optional batch ingest + refresh controls for POST /api/analytics/risk_heatmap."""
|
||||
|
||||
refresh: bool = True
|
||||
items: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RollingFreezeRequest(BaseModel):
|
||||
week_id: str | None = None
|
||||
force: bool = False
|
||||
|
||||
|
||||
class RollingLabelEntry(BaseModel):
|
||||
region: str
|
||||
label: str
|
||||
notes: str = ""
|
||||
|
||||
|
||||
class RollingLabelRequest(BaseModel):
|
||||
week_id: str
|
||||
labels: list[RollingLabelEntry] = Field(default_factory=list)
|
||||
|
||||
|
||||
def _empty_heatmap() -> dict[str, Any]:
|
||||
return {
|
||||
"enabled": False,
|
||||
"type": "FeatureCollection",
|
||||
"features": [],
|
||||
"clusters": [],
|
||||
"processed": 0,
|
||||
"timestamp": None,
|
||||
}
|
||||
|
||||
|
||||
def _gt_risk_payload() -> dict[str, Any]:
|
||||
snap = get_latest_data_subset_refs("gt_risk")
|
||||
payload = snap.get("gt_risk")
|
||||
if not isinstance(payload, dict):
|
||||
return _empty_heatmap()
|
||||
heatmap = payload.get("heatmap") or {"type": "FeatureCollection", "features": []}
|
||||
return {
|
||||
"enabled": bool(payload.get("enabled")),
|
||||
"type": heatmap.get("type", "FeatureCollection"),
|
||||
"features": list(heatmap.get("features") or []),
|
||||
"clusters": list(payload.get("clusters") or []),
|
||||
"processed": int(payload.get("processed") or 0),
|
||||
"timestamp": payload.get("timestamp"),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/analytics/risk_heatmap")
|
||||
@limiter.limit("60/minute")
|
||||
async def risk_heatmap_get(request: Request) -> dict[str, Any]:
|
||||
"""Return cached GeoJSON risk overlay (posterior scores per region)."""
|
||||
if not gt_analytics_enabled():
|
||||
return _empty_heatmap()
|
||||
return _gt_risk_payload()
|
||||
|
||||
|
||||
@router.post("/api/analytics/risk_heatmap")
|
||||
@limiter.limit("12/minute")
|
||||
async def risk_heatmap_post(
|
||||
request: Request,
|
||||
body: RiskHeatmapRequest,
|
||||
_: None = Depends(require_local_operator),
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Ingest optional feed items and/or refresh beliefs from latest intel layers.
|
||||
|
||||
Requires local operator auth — intended for OpenClaw agents and admin tooling.
|
||||
"""
|
||||
if not gt_analytics_enabled():
|
||||
raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled")
|
||||
|
||||
engine = get_gt_engine()
|
||||
if engine is None:
|
||||
raise HTTPException(status_code=503, detail="Strategic Risk Analytics engine unavailable")
|
||||
|
||||
ingested = 0
|
||||
for raw in body.items:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
source_type = str(raw.get("source_type") or "manual")
|
||||
item = normalize_feed_item(raw, source_type=source_type)
|
||||
result = engine.process_feed_item(item)
|
||||
if result and not result.get("skipped"):
|
||||
ingested += 1
|
||||
|
||||
summary: dict[str, Any] = {"ingested": ingested}
|
||||
if body.refresh:
|
||||
with _data_lock:
|
||||
snapshot = dict(latest_data)
|
||||
summary.update(refresh_from_latest_data(snapshot, persist=True))
|
||||
|
||||
payload = _gt_risk_payload()
|
||||
payload["ingested"] = ingested
|
||||
payload["refresh"] = bool(body.refresh)
|
||||
return payload
|
||||
|
||||
|
||||
@router.get("/api/analytics/dossier/{region}")
|
||||
@limiter.limit("30/minute")
|
||||
async def analytics_dossier(request: Request, region: str) -> dict[str, Any]:
|
||||
"""Game-theoretic rationale, recent costly signals, and scenario sketches."""
|
||||
region_key = str(region or "").strip().lower()
|
||||
if not region_key or len(region_key) > 120:
|
||||
raise HTTPException(status_code=400, detail="Invalid region identifier")
|
||||
|
||||
if not gt_analytics_enabled():
|
||||
return {
|
||||
"enabled": False,
|
||||
"region": region_key,
|
||||
"current_risk": 0.0,
|
||||
"interpretation": "Strategic Risk Analytics is disabled.",
|
||||
"recent_signals": [],
|
||||
"scenarios": [],
|
||||
}
|
||||
|
||||
engine = get_gt_engine()
|
||||
if engine is None:
|
||||
raise HTTPException(status_code=503, detail="Strategic Risk Analytics engine unavailable")
|
||||
|
||||
dossier = engine.get_dossier(region_key)
|
||||
dossier["enabled"] = True
|
||||
return dossier
|
||||
|
||||
|
||||
@router.get("/api/analytics/backtest")
|
||||
@limiter.limit("6/minute")
|
||||
async def analytics_backtest(
|
||||
request: Request,
|
||||
expanded: bool = True,
|
||||
tune: bool = False,
|
||||
target_confidence: float = 0.95,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Run labeled historical backtest and return accuracy + Wilson 95% CI.
|
||||
|
||||
``confidence_rate`` is the Wilson lower bound (conservative pass metric).
|
||||
"""
|
||||
if not gt_analytics_enabled():
|
||||
return {
|
||||
"enabled": False,
|
||||
"message": "Strategic Risk Analytics is disabled.",
|
||||
}
|
||||
|
||||
if tune:
|
||||
threshold, report = tune_alert_threshold(target_confidence=target_confidence)
|
||||
else:
|
||||
threshold = DEFAULT_BACKTEST_ALERT_THRESHOLD
|
||||
report = run_historical_backtest(
|
||||
use_expanded_suite=expanded,
|
||||
alert_threshold=threshold,
|
||||
target_confidence=target_confidence,
|
||||
)
|
||||
|
||||
payload = report.to_dict()
|
||||
payload["enabled"] = True
|
||||
payload["expanded_suite"] = expanded
|
||||
payload["tuned"] = tune
|
||||
payload["recommended_alert_threshold"] = threshold
|
||||
return payload
|
||||
|
||||
|
||||
@router.get("/api/analytics/rolling")
|
||||
@limiter.limit("12/minute")
|
||||
async def analytics_rolling(
|
||||
request: Request,
|
||||
weeks: int = 8,
|
||||
target_confidence: float = 0.80,
|
||||
) -> dict[str, Any]:
|
||||
"""Rolling weekly operational validation — accuracy trend with delayed labels."""
|
||||
if not gt_analytics_enabled():
|
||||
return {
|
||||
"enabled": False,
|
||||
"message": "Strategic Risk Analytics is disabled.",
|
||||
}
|
||||
|
||||
report = rolling_report(weeks=max(1, min(weeks, 52)), target_confidence=target_confidence)
|
||||
report["enabled"] = True
|
||||
return report
|
||||
|
||||
|
||||
@router.get("/api/analytics/alerts")
|
||||
@limiter.limit("30/minute")
|
||||
async def analytics_top_alerts(
|
||||
request: Request,
|
||||
limit: int = 8,
|
||||
) -> dict[str, Any]:
|
||||
"""Top GT risk regions ranked by score — fly-to targets for the map."""
|
||||
if not gt_analytics_enabled():
|
||||
return {
|
||||
"enabled": False,
|
||||
"message": "Strategic Risk Analytics is disabled.",
|
||||
}
|
||||
|
||||
report = top_gt_alerts(limit=max(1, min(limit, 25)))
|
||||
report["enabled"] = True
|
||||
return report
|
||||
|
||||
|
||||
@router.get("/api/analytics/rolling/micro")
|
||||
@limiter.limit("30/minute")
|
||||
async def analytics_rolling_micro(
|
||||
request: Request,
|
||||
window_days: int = 3,
|
||||
limit: int = 15,
|
||||
) -> dict[str, Any]:
|
||||
"""Rolling 3-day micro average — spot vs baseline, ignition detection."""
|
||||
if not gt_analytics_enabled():
|
||||
return {
|
||||
"enabled": False,
|
||||
"message": "Strategic Risk Analytics is disabled.",
|
||||
}
|
||||
|
||||
report = micro_rolling_report(
|
||||
window_days=max(2, min(window_days, 7)),
|
||||
limit=max(1, min(limit, 50)),
|
||||
)
|
||||
report["enabled"] = True
|
||||
return report
|
||||
|
||||
|
||||
@router.get("/api/analytics/rolling/{week_id}")
|
||||
@limiter.limit("12/minute")
|
||||
async def analytics_rolling_week(request: Request, week_id: str) -> dict[str, Any]:
|
||||
"""Return a single frozen week snapshot and its score."""
|
||||
if not gt_analytics_enabled():
|
||||
return {"enabled": False, "message": "Strategic Risk Analytics is disabled."}
|
||||
|
||||
snapshot = load_week(str(week_id).strip())
|
||||
if snapshot is None:
|
||||
raise HTTPException(status_code=404, detail=f"Week {week_id} not found")
|
||||
|
||||
score = score_week(snapshot)
|
||||
return {
|
||||
"enabled": True,
|
||||
"week_id": snapshot.week_id,
|
||||
"snapshot": snapshot.to_dict(),
|
||||
"score": score.to_dict(),
|
||||
"alert_threshold": rolling_alert_threshold(),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/analytics/rolling/freeze")
|
||||
@limiter.limit("6/minute")
|
||||
async def analytics_rolling_freeze(
|
||||
request: Request,
|
||||
body: RollingFreezeRequest,
|
||||
_: None = Depends(require_local_operator),
|
||||
) -> dict[str, Any]:
|
||||
"""Freeze current GT scores for the ISO week (idempotent unless force=true)."""
|
||||
if not gt_analytics_enabled():
|
||||
raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled")
|
||||
|
||||
result = freeze_weekly_snapshot(
|
||||
week_id=body.week_id,
|
||||
force=body.force,
|
||||
frozen_by="api",
|
||||
)
|
||||
if not result.get("ok"):
|
||||
raise HTTPException(status_code=503, detail=result.get("detail", "Freeze failed"))
|
||||
result["enabled"] = True
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/analytics/rolling/label")
|
||||
@limiter.limit("12/minute")
|
||||
async def analytics_rolling_label(
|
||||
request: Request,
|
||||
body: RollingLabelRequest,
|
||||
_: None = Depends(require_local_operator),
|
||||
) -> dict[str, Any]:
|
||||
"""Apply delayed outcome labels to a frozen week."""
|
||||
if not gt_analytics_enabled():
|
||||
raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled")
|
||||
|
||||
week_id = str(body.week_id or "").strip()
|
||||
if not week_id:
|
||||
raise HTTPException(status_code=400, detail="week_id required")
|
||||
|
||||
if len(body.labels) == 1:
|
||||
entry = body.labels[0]
|
||||
result = label_region(
|
||||
week_id,
|
||||
entry.region,
|
||||
entry.label, # type: ignore[arg-type]
|
||||
notes=entry.notes,
|
||||
labeled_by="api",
|
||||
)
|
||||
else:
|
||||
result = label_regions(
|
||||
week_id,
|
||||
[row.model_dump() for row in body.labels],
|
||||
labeled_by="api",
|
||||
)
|
||||
|
||||
if not result.get("ok"):
|
||||
raise HTTPException(status_code=404, detail=result.get("detail", "Label failed"))
|
||||
result["enabled"] = True
|
||||
return result
|
||||
+43
-3
@@ -47,6 +47,8 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.tripcheck.com",
|
||||
"infocar.dgt.es",
|
||||
"informo.madrid.es",
|
||||
"webcams2.asfinag.at",
|
||||
"odo.asfinag.at",
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com",
|
||||
"www.lakecountypassage.com",
|
||||
@@ -55,6 +57,14 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.nps.gov",
|
||||
"home.lewiscounty.com",
|
||||
"www.seattle.gov",
|
||||
"511on.ca",
|
||||
"511.alberta.ca",
|
||||
"fl511.com",
|
||||
"www.fl511.com",
|
||||
"webcams.transport.nsw.gov.au",
|
||||
"www.livetraffic.com",
|
||||
"livetraffic.com",
|
||||
"opendata.ndw.nu",
|
||||
}
|
||||
|
||||
|
||||
@@ -120,7 +130,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
read_timeout = 18.0 if "/snapshots/" in path else 12.0
|
||||
return _CCTVProxyProfile(name="gdot-snapshot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, read_timeout), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host == "511ga.org":
|
||||
return _CCTVProxyProfile(name="gdot-511ga-image", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
@@ -128,7 +138,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
if host.startswith("vss") and host.endswith("dot.ga.gov"):
|
||||
return _CCTVProxyProfile(name="gdot-hls", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=10,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
return _CCTVProxyProfile(name="illinois-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
@@ -156,16 +166,46 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
return _CCTVProxyProfile(name="madrid-city", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://informo.madrid.es/"})
|
||||
if host in {"webcams2.asfinag.at", "odo.asfinag.at"}:
|
||||
return _CCTVProxyProfile(name="asfinag-austria", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.asfinag.at/"})
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(name="windy-webcams", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.windy.com/"})
|
||||
if host == "511on.ca":
|
||||
return _CCTVProxyProfile(name="ontario-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511on.ca/"})
|
||||
if host == "511.alberta.ca":
|
||||
return _CCTVProxyProfile(name="alberta-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511.alberta.ca/"})
|
||||
if host in {"fl511.com", "www.fl511.com"}:
|
||||
return _CCTVProxyProfile(name="florida-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://fl511.com/"})
|
||||
if host == "webcams.transport.nsw.gov.au":
|
||||
return _CCTVProxyProfile(name="nsw-live-traffic", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.livetraffic.com/"})
|
||||
if host in {"opendata.ndw.nu", "www.ndw.nu"}:
|
||||
return _CCTVProxyProfile(name="ndw-netherlands", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=120,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.ndw.nu/"})
|
||||
return _CCTVProxyProfile(name="generic-cctv", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=30,
|
||||
headers={"Accept": "*/*"})
|
||||
|
||||
|
||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)", **profile.headers}
|
||||
# Round 7a: per-install operator handle. Mozilla/5.0 prefix retained
|
||||
# because many CCTV endpoints sniff for a browser-like prefix.
|
||||
from services.network_utils import outbound_user_agent
|
||||
headers = {
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||
**profile.headers,
|
||||
}
|
||||
range_header = request.headers.get("range")
|
||||
if range_header:
|
||||
headers["Range"] = range_header
|
||||
|
||||
+278
-17
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
from fastapi import APIRouter, Request, Response, Query, Depends
|
||||
@@ -8,7 +9,7 @@ from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from limiter import limiter
|
||||
from auth import require_admin, require_local_operator
|
||||
from services.data_fetcher import get_latest_data, update_all_data
|
||||
from services.data_fetcher import update_all_data
|
||||
import orjson
|
||||
import json as json_mod
|
||||
|
||||
@@ -30,6 +31,14 @@ class LayerUpdate(BaseModel):
|
||||
layers: dict[str, bool]
|
||||
|
||||
|
||||
class LiveUamapOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
class PredictionMarketsOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
_LAST_VIEWPORT_UPDATE: tuple | None = None
|
||||
_LAST_VIEWPORT_UPDATE_TS = 0.0
|
||||
_VIEWPORT_UPDATE_LOCK = threading.Lock()
|
||||
@@ -98,6 +107,88 @@ def _current_etag(prefix: str = "") -> str:
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||
# filtering. Light reference layers (datacenters, military_bases,
|
||||
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||
# in these sets — they ship world-scale even when bounds are supplied so
|
||||
# panning never reveals an "empty world" of static infrastructure.
|
||||
#
|
||||
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||
# is byte-for-byte identical to the pre-#288 behavior.
|
||||
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"sigint",
|
||||
"trains",
|
||||
)
|
||||
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"gdelt",
|
||||
"firms_fires",
|
||||
"kiwisdr",
|
||||
"scanners",
|
||||
"psk_reporter",
|
||||
)
|
||||
|
||||
|
||||
def _has_full_bbox(s, w, n, e) -> bool:
|
||||
return None not in (s, w, n, e)
|
||||
|
||||
|
||||
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||
"""Quantize bbox to 1° before mixing into the ETag.
|
||||
|
||||
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||
quantizing here means small mouse drags don't blow the ETag cache
|
||||
on the client. Full-world bounds collapse to a single suffix.
|
||||
"""
|
||||
if not _has_full_bbox(s, w, n, e):
|
||||
return ""
|
||||
try:
|
||||
ss = math.floor(float(s))
|
||||
ww = math.floor(float(w))
|
||||
nn = math.ceil(float(n))
|
||||
ee = math.ceil(float(e))
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
# If the requested window covers basically the whole world, treat it as
|
||||
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||
# same ETag and benefit from the existing 304 path.
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return ""
|
||||
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||
|
||||
|
||||
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||
s: float, w: float, n: float, e: float) -> dict:
|
||||
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||
|
||||
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||
pad and handles antimeridian crossings.
|
||||
"""
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||
# guarantees the response matches the no-params shape.
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return payload
|
||||
for key in heavy_keys:
|
||||
items = payload.get(key)
|
||||
if not isinstance(items, list) or not items:
|
||||
continue
|
||||
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||
return payload
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
@@ -120,6 +211,15 @@ def _sanitize_payload(value):
|
||||
return value
|
||||
|
||||
|
||||
def _live_data_json_bytes(payload: dict) -> bytes:
|
||||
"""Serialize dashboard payloads with the same defensive orjson options everywhere."""
|
||||
return orjson.dumps(
|
||||
_sanitize_payload(payload),
|
||||
default=str,
|
||||
option=orjson.OPT_NON_STR_KEYS,
|
||||
)
|
||||
|
||||
|
||||
def _bbox_filter(items: list, s: float, w: float, n: float, e: float,
|
||||
lat_key: str = "lat", lng_key: str = "lng") -> list:
|
||||
pad_lat = (n - s) * 0.2
|
||||
@@ -304,6 +404,95 @@ async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.get("/api/liveuamap/scraper-status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_liveuamap_scraper_status():
|
||||
"""Whether LiveUAMap Playwright may run (Windows needs UI opt-in unless env forces)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status
|
||||
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
@router.post("/api/liveuamap/scraper-opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_liveuamap_scraper_opt_in(body: LiveUamapOptInUpdate, request: Request):
|
||||
"""Persist operator consent for LiveUAMap scraper (#348)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status, set_liveuamap_ui_opt_in
|
||||
|
||||
set_liveuamap_ui_opt_in(body.opted_in)
|
||||
if body.opted_in:
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
if is_any_active("global_incidents"):
|
||||
threading.Thread(target=_run_liveuamap_refresh, daemon=True).start()
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
def _run_liveuamap_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.geo import update_liveuamap
|
||||
|
||||
update_liveuamap()
|
||||
except Exception as e:
|
||||
logger.warning("LiveUAMap refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
@router.get("/api/prediction-markets/status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_prediction_markets_status():
|
||||
"""Whether Polymarket/Kalshi fetches and news market correlation are enabled."""
|
||||
from services.prediction_markets_settings import prediction_markets_status
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
@router.post("/api/prediction-markets/opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_prediction_markets_opt_in(body: PredictionMarketsOptInUpdate, request: Request):
|
||||
"""Enable or disable prediction market fetches + intercept story correlation."""
|
||||
from services.config import get_settings
|
||||
from services.prediction_markets_settings import (
|
||||
prediction_markets_status,
|
||||
set_prediction_markets_ui_opt_in,
|
||||
)
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
set_prediction_markets_ui_opt_in(body.opted_in)
|
||||
_write_env_value("PREDICTION_MARKETS_ENABLED", "true" if body.opted_in else "false")
|
||||
os.environ["PREDICTION_MARKETS_ENABLED"] = "true" if body.opted_in else "false"
|
||||
get_settings.cache_clear()
|
||||
|
||||
if body.opted_in:
|
||||
threading.Thread(target=_run_prediction_markets_refresh, daemon=True).start()
|
||||
else:
|
||||
threading.Thread(target=_run_prediction_markets_disable, daemon=True).start()
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
def _run_prediction_markets_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
fetch_prediction_markets()
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
def _run_prediction_markets_disable() -> None:
|
||||
try:
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
with _data_lock:
|
||||
latest_data["prediction_markets"] = []
|
||||
latest_data["trending_markets"] = []
|
||||
_mark_fresh("prediction_markets")
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets disable cleanup failed: %s", e)
|
||||
|
||||
|
||||
@router.post("/api/layers", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def update_layers(update: LayerUpdate, request: Request):
|
||||
@@ -313,6 +502,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
old_mesh = is_any_active("sigint_meshtastic")
|
||||
old_aprs = is_any_active("sigint_aprs")
|
||||
old_viirs = is_any_active("viirs_nightlights")
|
||||
old_datacenters = is_any_active("datacenters")
|
||||
old_fishing = is_any_active("fishing_activity")
|
||||
changed = False
|
||||
for key, value in update.layers.items():
|
||||
if key in active_layers:
|
||||
@@ -325,6 +516,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
new_mesh = is_any_active("sigint_meshtastic")
|
||||
new_aprs = is_any_active("sigint_aprs")
|
||||
new_viirs = is_any_active("viirs_nightlights")
|
||||
new_datacenters = is_any_active("datacenters")
|
||||
new_fishing = is_any_active("fishing_activity")
|
||||
if old_ships and not new_ships:
|
||||
from services.ais_stream import stop_ais_stream
|
||||
stop_ais_stream()
|
||||
@@ -368,13 +561,33 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
if not old_viirs and new_viirs:
|
||||
_queue_viirs_change_refresh()
|
||||
logger.info("VIIRS change refresh queued (layer enabled)")
|
||||
if not old_datacenters and new_datacenters:
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
fetch_datacenters()
|
||||
logger.info("Datacenters loaded (layer enabled)")
|
||||
if not old_fishing and new_fishing:
|
||||
from services.fetchers.geo import fetch_fishing_activity
|
||||
|
||||
fetch_fishing_activity()
|
||||
logger.info("Fishing activity refresh queued (layer enabled)")
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.get("/api/live-data")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data(request: Request):
|
||||
return get_latest_data()
|
||||
etag = _current_etag(prefix="live|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
payload = get_latest_data_deepcopy_snapshot()
|
||||
return Response(
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/bootstrap/critical")
|
||||
@@ -469,7 +682,7 @@ async def bootstrap_critical(request: Request):
|
||||
"bootstrap_payload": True,
|
||||
}
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
@@ -479,13 +692,14 @@ async def bootstrap_critical(request: Request):
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -525,20 +739,29 @@ async def live_data_fast(
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/live-data/slow")
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
):
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -549,7 +772,8 @@ async def live_data_slow(
|
||||
"firms_fires", "datacenters", "military_bases", "power_plants", "viirs_change_nodes",
|
||||
"scanners", "weather_alerts", "ukraine_alerts", "air_quality", "volcanoes",
|
||||
"fishing_activity", "psk_reporter", "correlations", "uap_sightings", "wastewater",
|
||||
"crowdthreat", "threat_level", "trending_markets",
|
||||
"crowdthreat", "threat_level", "trending_markets", "road_corridor_trends",
|
||||
"malware_threats", "cyber_threats", "scm_suppliers", "telegram_osint", "gt_risk",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
payload = {
|
||||
@@ -590,10 +814,47 @@ async def live_data_slow(
|
||||
"uap_sightings": (d.get("uap_sightings") or []) if active_layers.get("uap_sightings", True) else [],
|
||||
"wastewater": (d.get("wastewater") or []) if active_layers.get("wastewater", True) else [],
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"road_corridor_trends": (
|
||||
d.get("road_corridor_trends") or {"updated_at": None, "corridors": []}
|
||||
)
|
||||
if active_layers.get("road_corridor_trends", False)
|
||||
else {"updated_at": None, "corridors": []},
|
||||
"malware_threats": (
|
||||
d.get("malware_threats") or {"threats": [], "total": 0}
|
||||
)
|
||||
if active_layers.get("malware_c2", False)
|
||||
else {"threats": [], "total": 0},
|
||||
"cyber_threats": (
|
||||
d.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
)
|
||||
if active_layers.get("cyber_threats", False)
|
||||
else {"threats": [], "stats": {}},
|
||||
"scm_suppliers": (
|
||||
d.get("scm_suppliers") or {"suppliers": [], "total": 0, "critical_count": 0}
|
||||
)
|
||||
if active_layers.get("scm_suppliers", False)
|
||||
else {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": (
|
||||
d.get("telegram_osint") or {"posts": [], "total": 0, "geolocated": 0}
|
||||
)
|
||||
if active_layers.get("telegram_osint", True)
|
||||
else {"posts": [], "total": 0, "geolocated": 0},
|
||||
"gt_risk": (
|
||||
d.get("gt_risk")
|
||||
or {"enabled": False, "heatmap": {"type": "FeatureCollection", "features": []}, "clusters": []}
|
||||
)
|
||||
if active_layers.get("gt_risk", False)
|
||||
else {"enabled": False, "heatmap": {"type": "FeatureCollection", "features": []}, "clusters": []},
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Static reference layers (datacenters, military bases,
|
||||
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||
# hides the infrastructure overlay the operator already has on screen.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""Entity graph expansion (intel layer)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint_intel.resolve import resolve_entity
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/entity/expand")
|
||||
@limiter.limit("30/minute")
|
||||
async def entity_expand(
|
||||
request: Request,
|
||||
_: None = Depends(require_local_operator),
|
||||
type: str = Query(..., min_length=3, max_length=32),
|
||||
id: str = Query(..., min_length=2, max_length=200),
|
||||
registration: str | None = Query(default=None, max_length=32),
|
||||
model: str | None = Query(default=None, max_length=64),
|
||||
icao24: str | None = Query(default=None, max_length=16),
|
||||
) -> dict:
|
||||
props = {"label": id, "registration": registration, "model": model, "icao24": icao24}
|
||||
try:
|
||||
return resolve_entity(type, id, props)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail="Intelligence layer unavailable") from exc
|
||||
@@ -8,7 +8,7 @@ from services.data_fetcher import get_latest_data
|
||||
from services.schemas import HealthResponse
|
||||
import os
|
||||
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.79")
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.82")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -59,6 +59,12 @@ async def health_check(request: Request):
|
||||
# when the SPKI-pinned fallback is in effect. The data plane keeps
|
||||
# flowing (this is by design — see ais_proxy.js comments) but observers
|
||||
# who care about MITM-protection posture deserve a visible signal.
|
||||
#
|
||||
# Plus connectivity health (added 2026-05-23 when stream.aisstream.io
|
||||
# went fully offline): ``connected`` tells the frontend whether ship
|
||||
# data is actually flowing. When false, a banner explains that ships
|
||||
# are unavailable due to an upstream outage — better than the user
|
||||
# silently seeing an empty ocean and assuming we broke something.
|
||||
ais_status: dict = {}
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
@@ -69,6 +75,27 @@ async def health_check(request: Request):
|
||||
# Don't override a worse top-level status if SLOs already failed,
|
||||
# but escalate ok -> degraded so the field surfaces in dashboards.
|
||||
top_status = "degraded"
|
||||
# AIS_API_KEY not configured is "feature off", not "system broken" —
|
||||
# so we only escalate when the operator opted into AIS (key set) AND
|
||||
# the stream is currently offline.
|
||||
if (
|
||||
os.environ.get("AIS_API_KEY")
|
||||
and ais_status.get("connected") is False
|
||||
and top_status == "ok"
|
||||
):
|
||||
top_status = "degraded"
|
||||
|
||||
runtime: dict = {}
|
||||
try:
|
||||
from services.runtime_profile import get_runtime_profile
|
||||
from analytics.settings import gt_analytics_status
|
||||
|
||||
runtime = {
|
||||
**get_runtime_profile(),
|
||||
"gt_analytics": gt_analytics_status(),
|
||||
}
|
||||
except Exception:
|
||||
runtime = {}
|
||||
|
||||
return {
|
||||
"status": top_status,
|
||||
@@ -93,6 +120,7 @@ async def health_check(request: Request):
|
||||
"slo": slo_statuses,
|
||||
"slo_summary": slo_summary,
|
||||
"ais_proxy": ais_status,
|
||||
"runtime": runtime or None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
"""Malware, cyber threats, and country risk feeds."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from starlette.background import BackgroundTask
|
||||
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import get_latest_data_subset_refs
|
||||
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
||||
from services.intel_feeds.country_risk import build_country_risk_payload
|
||||
from services.network_utils import outbound_user_agent
|
||||
from services.telegram_translate import apply_posts_translations, normalize_translate_target
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/malware")
|
||||
@limiter.limit("60/minute")
|
||||
async def malware_feed(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("malware_threats")
|
||||
payload = snap.get("malware_threats")
|
||||
if isinstance(payload, dict) and payload.get("threats") is not None:
|
||||
return payload
|
||||
return {"threats": [], "total": 0, "timestamp": None, "source": "abuse.ch"}
|
||||
|
||||
|
||||
@router.get("/api/cyber-threats")
|
||||
@limiter.limit("60/minute")
|
||||
async def cyber_threats(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("cyber_threats")
|
||||
return snap.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
|
||||
@router.get("/api/country-risk")
|
||||
@limiter.limit("30/minute")
|
||||
async def country_risk(request: Request) -> dict:
|
||||
return build_country_risk_payload()
|
||||
|
||||
|
||||
@router.get("/api/telegram-feed")
|
||||
@limiter.limit("30/minute")
|
||||
async def telegram_feed(request: Request, lang: str | None = Query(default=None)) -> dict:
|
||||
snap = get_latest_data_subset_refs("telegram_osint")
|
||||
payload = snap.get("telegram_osint")
|
||||
if not isinstance(payload, dict) or payload.get("posts") is None:
|
||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||
|
||||
if lang:
|
||||
target = normalize_translate_target(lang)
|
||||
localized = dict(payload)
|
||||
localized["posts"] = apply_posts_translations(list(payload.get("posts") or []), target)
|
||||
localized["translate_locale"] = target
|
||||
return localized
|
||||
return payload
|
||||
|
||||
|
||||
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
||||
clean_type = str(content_type or "").split(";", 1)[0].strip().lower()
|
||||
if clean_type and clean_type not in {"application/octet-stream", "binary/octet-stream"}:
|
||||
return content_type
|
||||
path = str(urlparse(target_url).path or "").lower()
|
||||
if path.endswith((".jpg", ".jpeg")):
|
||||
return "image/jpeg"
|
||||
if path.endswith(".png"):
|
||||
return "image/png"
|
||||
if path.endswith(".webp"):
|
||||
return "image/webp"
|
||||
if path.endswith(".gif"):
|
||||
return "image/gif"
|
||||
if path.endswith(".mp4"):
|
||||
return "video/mp4"
|
||||
if path.endswith(".webm"):
|
||||
return "video/webm"
|
||||
return content_type or "application/octet-stream"
|
||||
|
||||
|
||||
@router.get("/api/telegram/media")
|
||||
@limiter.limit("60/minute")
|
||||
async def telegram_media_proxy(request: Request, url: str = Query(...)) -> StreamingResponse:
|
||||
"""Stream Telegram CDN media for in-app playback (host allowlist only)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise HTTPException(status_code=400, detail="Invalid scheme")
|
||||
if not telegram_media_host_allowed(parsed.hostname):
|
||||
raise HTTPException(status_code=403, detail="Host not allowed")
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-media')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "*/*",
|
||||
}
|
||||
if range_header := request.headers.get("range"):
|
||||
headers["Range"] = range_header
|
||||
|
||||
try:
|
||||
resp = requests.get(url, stream=True, timeout=(3, 45), headers=headers)
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Telegram media upstream failure %s: %s", url, exc)
|
||||
raise HTTPException(status_code=502, detail="Upstream fetch failed") from exc
|
||||
|
||||
if resp.status_code >= 400:
|
||||
resp.close()
|
||||
raise HTTPException(status_code=int(resp.status_code), detail=f"Upstream returned {resp.status_code}")
|
||||
|
||||
media_type = _infer_telegram_media_type(url, resp.headers.get("Content-Type", "application/octet-stream"))
|
||||
response_headers = {
|
||||
"Cache-Control": "private, max-age=300",
|
||||
"Accept-Ranges": resp.headers.get("Accept-Ranges", "bytes"),
|
||||
}
|
||||
if content_length := resp.headers.get("Content-Length"):
|
||||
response_headers["Content-Length"] = content_length
|
||||
if content_range := resp.headers.get("Content-Range"):
|
||||
response_headers["Content-Range"] = content_range
|
||||
|
||||
return StreamingResponse(
|
||||
resp.iter_content(chunk_size=65536),
|
||||
status_code=resp.status_code,
|
||||
media_type=media_type,
|
||||
headers=response_headers,
|
||||
background=BackgroundTask(resp.close),
|
||||
)
|
||||
@@ -223,11 +223,21 @@ async def oracle_markets_more(request: Request, category: str = "NEWS", offset:
|
||||
"has_more": offset + limit < len(cat_markets), "total": len(cat_markets)}
|
||||
|
||||
|
||||
@router.post("/api/mesh/oracle/resolve")
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve(request: Request):
|
||||
"""Resolve a prediction market."""
|
||||
"""Resolve a prediction market.
|
||||
|
||||
Issue #240 (tg12): requires admin authentication. The
|
||||
``mesh_write_exempt`` decorator below is **metadata only** — it tags
|
||||
the route as not requiring a mesh signed-write envelope, it does
|
||||
NOT itself enforce caller authorization. The ``Depends(require_admin)``
|
||||
on the route decorator is what actually gates access.
|
||||
"""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
body = await request.json()
|
||||
market_title = body.get("market_title", "")
|
||||
@@ -327,11 +337,18 @@ async def oracle_predictions(request: Request, node_id: str = ""):
|
||||
active_predictions, authenticated=_scoped_view_authenticated(request, "mesh.audit"))
|
||||
|
||||
|
||||
@router.post("/api/mesh/oracle/resolve-stakes")
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve-stakes",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve_stakes(request: Request):
|
||||
"""Resolve all expired stake contests."""
|
||||
"""Resolve all expired stake contests.
|
||||
|
||||
Issue #241 (tg12): requires admin authentication. See the note on
|
||||
``oracle_resolve`` above — ``mesh_write_exempt`` is metadata only.
|
||||
"""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
resolutions = oracle_ledger.resolve_expired_stakes()
|
||||
return {"ok": True, "resolutions": resolutions, "count": len(resolutions)}
|
||||
|
||||
@@ -55,10 +55,20 @@ def _hydrate_gate_store_from_chain(events: list) -> int:
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def infonet_peer_push(request: Request):
|
||||
"""Accept pushed Infonet events from relay peers (HMAC-authenticated)."""
|
||||
from services.mesh.mesh_fleet_defaults import infonet_fleet_join_enabled
|
||||
|
||||
if not infonet_fleet_join_enabled():
|
||||
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": [], "skipped": "fleet_join_disabled"}
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
@@ -82,13 +92,76 @@ async def infonet_peer_push(request: Request):
|
||||
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": []}
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@router.post("/api/mesh/dm/replicate-envelope")
|
||||
@limiter.limit("60/minute")
|
||||
async def dm_replicate_envelope(request: Request):
|
||||
"""Accept a DM envelope replicated from a peer relay (cross-node mailbox).
|
||||
|
||||
Companion endpoint to ``DMRelay.replicate_to_peers`` (outbound, in
|
||||
``mesh_dm_relay.py``). The sender's relay POSTs an encrypted DM
|
||||
envelope here after a successful local ``deposit``; this endpoint
|
||||
re-enforces the per-(sender, recipient) anti-spam cap and stores
|
||||
the envelope in the local mailbox if accepted.
|
||||
|
||||
The cap is the network rule: a hostile sender's relay can spool
|
||||
extras locally, but every honest peer enforces the cap on inbound
|
||||
replication. Recipient polling from any honest peer therefore
|
||||
never sees more than ``MESH_DM_PENDING_PER_SENDER_LIMIT`` pending
|
||||
from any one sender, no matter how many spam attempts were tried.
|
||||
|
||||
Same HMAC auth pattern as ``infonet_peer_push`` and ``gate_peer_push``.
|
||||
"""
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
# DM envelopes are bounded by MESH_DM_MAX_MSG_BYTES + envelope
|
||||
# overhead; 64 KB is a generous ceiling.
|
||||
if int(content_length) > 65_536:
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Request body too large (max 64KB)"}',
|
||||
status_code=413, media_type="application/json",
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
body_bytes = await request.body()
|
||||
if not _verify_peer_push_hmac(request, body_bytes):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid or missing peer HMAC"}',
|
||||
status_code=403, media_type="application/json",
|
||||
)
|
||||
try:
|
||||
body = json_mod.loads(body_bytes or b"{}")
|
||||
except (ValueError, TypeError):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid JSON body"}',
|
||||
status_code=400, media_type="application/json",
|
||||
)
|
||||
envelope = body.get("envelope")
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
|
||||
originating_peer = _peer_hmac_url_from_request(request) or ""
|
||||
|
||||
from services.mesh.mesh_dm_relay import dm_relay
|
||||
result = dm_relay.accept_replica(
|
||||
envelope=envelope,
|
||||
originating_peer_url=originating_peer,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/mesh/gate/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def gate_peer_push(request: Request):
|
||||
"""Accept pushed gate events from relay peers (private plane)."""
|
||||
from services.mesh.mesh_fleet_defaults import infonet_fleet_join_enabled
|
||||
|
||||
if not infonet_fleet_join_enabled():
|
||||
return {"ok": True, "accepted": 0, "duplicates": 0, "skipped": "fleet_join_disabled"}
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
|
||||
@@ -65,6 +65,7 @@ from services.mesh.mesh_signed_events import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
_INFONET_SYNC_RATE_LIMIT = "600/minute"
|
||||
|
||||
|
||||
def _signed_body(request: Request) -> dict[str, Any]:
|
||||
@@ -263,6 +264,19 @@ def _redact_public_event(event: dict) -> dict:
|
||||
return _redact_vote_gate(_redact_key_rotate_payload(_redact_gate_metadata(event)))
|
||||
|
||||
|
||||
def _infonet_private_transport_required() -> bool:
|
||||
import main as _m
|
||||
|
||||
return bool(_m._infonet_private_transport_required())
|
||||
|
||||
|
||||
def _infonet_sync_response_events(events: list[dict], request=None) -> list[dict]:
|
||||
"""Build the sync event surface for the current transport policy."""
|
||||
import main as _m
|
||||
|
||||
return _m._infonet_sync_response_events(events, request=request)
|
||||
|
||||
|
||||
def _trusted_gate_reply_to(event: dict) -> str:
|
||||
if not isinstance(event, dict):
|
||||
return ""
|
||||
@@ -574,6 +588,12 @@ def _hydrate_gate_store_from_chain(events: list[dict]) -> int:
|
||||
pass
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list[dict]) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
# --- Safe type helpers ---
|
||||
|
||||
def _safe_int(val, default=0):
|
||||
@@ -1531,7 +1551,7 @@ async def infonet_locator(request: Request, limit: int = Query(32, ge=4, le=128)
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/sync")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
@mesh_write_exempt(MeshWriteExemption.PEER_GOSSIP)
|
||||
async def infonet_sync_post(
|
||||
request: Request,
|
||||
@@ -1584,8 +1604,7 @@ async def infonet_sync_post(
|
||||
elif matched_hash == GENESIS_HASH and len(locator) > 1:
|
||||
forked = True
|
||||
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
|
||||
response = {
|
||||
"events": events,
|
||||
@@ -1646,7 +1665,7 @@ async def mesh_rns_status(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/mesh/infonet/sync")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
async def infonet_sync(
|
||||
request: Request,
|
||||
after_hash: str = "",
|
||||
@@ -1684,8 +1703,7 @@ async def infonet_sync(
|
||||
)
|
||||
base = after_hash or GENESIS_HASH
|
||||
events = infonet.get_events_after(base, limit=limit)
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
return {
|
||||
"events": events,
|
||||
"after_hash": base,
|
||||
@@ -1724,6 +1742,7 @@ async def infonet_ingest(request: Request):
|
||||
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@@ -2279,6 +2298,12 @@ async def infonet_event(request: Request, event_id: str):
|
||||
)
|
||||
return _strip_gate_for_access(evt, access)
|
||||
return {"ok": False, "detail": "Event not found"}
|
||||
if evt.get("event_type") == "dm_message":
|
||||
return await _private_plane_refusal_response(
|
||||
request,
|
||||
status_code=403,
|
||||
payload=_private_plane_access_denied_payload(),
|
||||
)
|
||||
if evt.get("event_type") == "gate_message":
|
||||
gate_id = str(evt.get("payload", {}).get("gate", "") or evt.get("gate", "") or "").strip()
|
||||
access = _verify_gate_access(request, gate_id) if gate_id else ""
|
||||
@@ -2303,7 +2328,7 @@ async def infonet_node_events(
|
||||
from services.mesh.mesh_hashchain import infonet
|
||||
|
||||
events = infonet.get_events_by_node(node_id, limit=limit)
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
events = _redact_public_node_history(
|
||||
events,
|
||||
@@ -2328,7 +2353,7 @@ async def infonet_events_by_type(
|
||||
else:
|
||||
events = list(reversed(infonet.events))
|
||||
events = events[offset : offset + limit]
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
return {
|
||||
"events": events,
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Operator OSINT recon routes (server-side proxies, SSRF guarded)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint import lookups
|
||||
|
||||
router = APIRouter(dependencies=[Depends(require_local_operator)])
|
||||
|
||||
_ALLOWED_SCHEMAS = {
|
||||
"Person",
|
||||
"Organization",
|
||||
"Company",
|
||||
"Vessel",
|
||||
"Airplane",
|
||||
"LegalEntity",
|
||||
}
|
||||
|
||||
|
||||
class SweepScanRequest(BaseModel):
|
||||
ip: str = Field(min_length=7, max_length=45)
|
||||
cidr: int = Field(default=24, ge=24, le=32)
|
||||
|
||||
|
||||
def _bad_request(exc: ValueError) -> HTTPException:
|
||||
return HTTPException(status_code=400, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("/api/osint/ip")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_ip(request: Request, ip: str = Query(..., min_length=7, max_length=45)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_ip(ip)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/dns")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_dns(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_dns(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/whois")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_whois(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_whois(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/certs")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_certs(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_certs(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/threats")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_threats(request: Request, query: str | None = Query(default=None, max_length=253)) -> dict:
|
||||
return lookups.lookup_threats(query)
|
||||
|
||||
|
||||
@router.get("/api/osint/bgp")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_bgp(request: Request, query: str = Query(..., min_length=2, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_bgp(query)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sanctions")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_sanctions(
|
||||
request: Request,
|
||||
query: str = Query(..., min_length=4, max_length=200),
|
||||
schema: str | None = Query(default=None),
|
||||
limit: int = Query(default=25, ge=1, le=100),
|
||||
) -> dict:
|
||||
if schema and schema not in _ALLOWED_SCHEMAS:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid schema. Allowed: {', '.join(sorted(_ALLOWED_SCHEMAS))}")
|
||||
return lookups.lookup_sanctions(query, schema=schema, limit=limit)
|
||||
|
||||
|
||||
@router.get("/api/osint/cve")
|
||||
@limiter.limit("30/minute")
|
||||
async def osint_cve(request: Request, cve: str = Query(..., min_length=10, max_length=32)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_cve(cve)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404 if "not found" in str(exc).lower() else 400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/mac")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_mac(request: Request, mac: str = Query(..., min_length=5, max_length=32)) -> dict:
|
||||
return lookups.lookup_mac(mac)
|
||||
|
||||
|
||||
@router.get("/api/osint/github")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_github(request: Request, username: str = Query(..., min_length=1, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_github(username)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/leaks")
|
||||
@limiter.limit("10/minute")
|
||||
async def osint_leaks(request: Request, email: str = Query(..., min_length=5, max_length=254)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_leaks(email)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sweep")
|
||||
@limiter.limit("5/minute")
|
||||
async def osint_sweep_init(
|
||||
request: Request,
|
||||
ip: str = Query(..., min_length=7, max_length=45),
|
||||
cidr: int = Query(default=24, ge=24, le=32),
|
||||
) -> dict:
|
||||
try:
|
||||
return lookups.sweep_init(ip, cidr)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.post("/api/osint/sweep/scan")
|
||||
@limiter.limit("3/minute")
|
||||
async def osint_sweep_scan(request: Request, payload: SweepScanRequest) -> dict:
|
||||
try:
|
||||
subnet = lookups.subnet_start_for(payload.ip, payload.cidr)
|
||||
scan = lookups.sweep_scan(subnet, payload.cidr)
|
||||
init = lookups.sweep_init(payload.ip, payload.cidr)
|
||||
return {**init, **scan, "subnet": f"{subnet}/{payload.cidr}"}
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
@@ -0,0 +1,105 @@
|
||||
"""Road corridor Sentinel-2 freight trend endpoints (opt-in slow layer)."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from limiter import limiter
|
||||
from services.road_corridor_sat.config import optional_deps_available, road_corridor_sat_enabled
|
||||
from services.road_corridor_sat.credentials import sentinel_credentials_configured
|
||||
from services.road_corridor_sat.jobs import enqueue_analyze, get_job, get_latest_job, job_to_dict
|
||||
from services.road_corridor_sat.presets import CORRIDOR_PRESETS, get_preset
|
||||
from services.road_corridor_sat.storage import build_trends_payload, preset_metadata
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _status_payload() -> dict:
|
||||
latest = get_latest_job()
|
||||
return {
|
||||
"enabled": road_corridor_sat_enabled(),
|
||||
"deps_installed": optional_deps_available(),
|
||||
"credentials_configured": sentinel_credentials_configured(),
|
||||
"preset_count": len(CORRIDOR_PRESETS),
|
||||
"attribution": "backend/third_party/drishx/NOTICE.md",
|
||||
"active_job": job_to_dict(latest) if latest and latest.status in {"queued", "running"} else None,
|
||||
}
|
||||
|
||||
|
||||
def _require_analyze_ready() -> None:
|
||||
if not optional_deps_available():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Install optional road-corridor dependencies (uv sync --extra road-corridor)",
|
||||
)
|
||||
if not sentinel_credentials_configured():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in Imagery settings",
|
||||
)
|
||||
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
lat: float = Field(ge=-90, le=90)
|
||||
lon: float = Field(ge=-180, le=180)
|
||||
label: str | None = Field(default=None, max_length=120)
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/status")
|
||||
@limiter.limit("60/minute")
|
||||
async def road_corridors_status(request: Request) -> dict:
|
||||
return {"ok": True, **_status_payload()}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors")
|
||||
@limiter.limit("60/minute")
|
||||
async def list_road_corridors(request: Request) -> dict:
|
||||
return {
|
||||
"ok": True,
|
||||
"status": _status_payload(),
|
||||
"presets": CORRIDOR_PRESETS,
|
||||
"trends": build_trends_payload(),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/road-corridors/analyze")
|
||||
@limiter.limit("6/minute")
|
||||
async def analyze_road_corridor_here(request: Request, payload: AnalyzeRequest) -> dict:
|
||||
"""Start an on-demand Sentinel-2 corridor analysis at map center."""
|
||||
_require_analyze_ready()
|
||||
try:
|
||||
job = enqueue_analyze(payload.lat, payload.lon, payload.label)
|
||||
except RuntimeError as exc:
|
||||
if str(exc) == "analysis_already_running":
|
||||
active = get_latest_job()
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Analysis already in progress",
|
||||
headers={"X-Job-Id": active.job_id if active else ""},
|
||||
) from exc
|
||||
raise
|
||||
return {"ok": True, **job_to_dict(job)}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/analyze/status")
|
||||
@limiter.limit("120/minute")
|
||||
async def analyze_road_corridor_status(
|
||||
request: Request,
|
||||
job_id: str | None = Query(default=None),
|
||||
) -> dict:
|
||||
job = get_job(job_id) if job_id else get_latest_job()
|
||||
if job is None:
|
||||
return {"ok": True, "job": None}
|
||||
return {"ok": True, "job": job_to_dict(job)}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/{preset_id}")
|
||||
@limiter.limit("60/minute")
|
||||
async def get_road_corridor(preset_id: str, request: Request) -> dict:
|
||||
meta = preset_metadata(preset_id)
|
||||
if meta is None:
|
||||
raise HTTPException(status_code=404, detail="Unknown corridor preset")
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
# Ad-hoc viewport runs are stored on disk but not in CORRIDOR_PRESETS.
|
||||
return {"ok": True, "preset": None, "result": meta, "status": _status_payload()}
|
||||
return {"ok": True, "preset": preset, "result": meta, "status": _status_payload()}
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Supply-chain risk overlay."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.scm.suppliers import build_scm_payload
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/scm-suppliers")
|
||||
@limiter.limit("30/minute")
|
||||
async def scm_suppliers(request: Request, _: None = Depends(require_local_operator)) -> dict:
|
||||
return build_scm_payload()
|
||||
+120
-10
@@ -85,7 +85,63 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
@router.get("/api/sentinel2/search")
|
||||
# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ───
|
||||
@router.get("/api/wikipedia/summary")
|
||||
@limiter.limit("60/minute")
|
||||
def api_wikipedia_summary(
|
||||
request: Request,
|
||||
title: str = Query(..., min_length=1, max_length=256),
|
||||
):
|
||||
"""Proxy Wikipedia REST summaries through the self-hosted backend."""
|
||||
from services.region_dossier import fetch_wikipedia_page_summary
|
||||
|
||||
summary = fetch_wikipedia_page_summary(title)
|
||||
if summary is None:
|
||||
return JSONResponse(status_code=404, content={"detail": "not_found"})
|
||||
return summary
|
||||
|
||||
|
||||
class WikidataSparqlRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
@router.post("/api/wikidata/sparql")
|
||||
@limiter.limit("30/minute")
|
||||
def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest):
|
||||
"""Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org."""
|
||||
from services.region_dossier import fetch_wikidata_sparql_bindings
|
||||
|
||||
q = (body.query or "").strip()
|
||||
if len(q) > 12_000:
|
||||
raise HTTPException(400, "SPARQL query too large")
|
||||
bindings = fetch_wikidata_sparql_bindings(q)
|
||||
return {"bindings": bindings}
|
||||
|
||||
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
# operator-only helpers — they MUST NOT be callable by anonymous remote
|
||||
# users, because:
|
||||
#
|
||||
# * /api/sentinel/token — caller supplies their own Sentinel client_id +
|
||||
# client_secret. Without operator gating, the backend becomes a free
|
||||
# anonymous OAuth-mint relay for any Copernicus account.
|
||||
# * /api/sentinel/tile — same shape as the token route but for tile
|
||||
# imagery. Without gating, the backend acts as an anonymous quota and
|
||||
# bandwidth relay for Sentinel Hub Process API calls.
|
||||
# * /api/sentinel2/search — hits the Planetary Computer STAC search API
|
||||
# and falls back to Esri imagery. No caller credentials are involved,
|
||||
# but the route is still an anonymous external-search relay. We gate
|
||||
# it the same way for consistency with the rest of the operator-only
|
||||
# helper surface.
|
||||
#
|
||||
# Gating is via require_local_operator (loopback / bridge / admin key),
|
||||
# matching the same allowlist already used by /api/region-dossier and
|
||||
# the other operator helpers further up this file. Single-operator nodes
|
||||
# see no behavior change — their dashboard already lives on loopback or
|
||||
# the trusted Docker bridge, so it still resolves.
|
||||
@router.get("/api/sentinel2/search", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
def api_sentinel2_search(
|
||||
request: Request,
|
||||
@@ -97,18 +153,60 @@ def api_sentinel2_search(
|
||||
return search_sentinel2_scene(lat, lng)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token")
|
||||
# Issue #298 (tg12): Sentinel credentials moved server-side
|
||||
# ---------------------------------------------------------------------------
|
||||
# Previously the frontend kept Copernicus CDSE client_id + client_secret in
|
||||
# browser localStorage / sessionStorage and forwarded them on every tile
|
||||
# request through this proxy. That exposed real third-party credentials to
|
||||
# any same-origin script (XSS, malicious browser extension, dev-tools HAR
|
||||
# export).
|
||||
#
|
||||
# Resolution order (first match wins):
|
||||
# 1. Request body — kept for back-compat. A small number of legacy
|
||||
# operator setups may still post credentials; we don't break them.
|
||||
# 2. Backend .env — SENTINEL_CLIENT_ID / SENTINEL_CLIENT_SECRET, managed
|
||||
# through the existing /api/settings/api-keys flow (admin-gated).
|
||||
#
|
||||
# The frontend in ``sentinelHub.ts`` no longer reads browser storage and no
|
||||
# longer forwards credentials — every dashboard request now lands in (2).
|
||||
# The require_local_operator gate (added in #303/PR #303) stays — both layers
|
||||
# are independent: the gate blocks anonymous callers, the env fallback lets
|
||||
# legitimate (gated) callers omit credentials from the body.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _resolve_sentinel_credentials(body_id: str, body_secret: str) -> tuple[str, str]:
|
||||
"""Return (client_id, client_secret) using body values when present,
|
||||
otherwise falling back to backend .env. Empty strings if neither is set."""
|
||||
import os as _os
|
||||
cid = (body_id or "").strip() or (_os.environ.get("SENTINEL_CLIENT_ID", "") or "").strip()
|
||||
csec = (body_secret or "").strip() or (_os.environ.get("SENTINEL_CLIENT_SECRET", "") or "").strip()
|
||||
return cid, csec
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_sentinel_token(request: Request):
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block)."""
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block).
|
||||
|
||||
Credentials are resolved by ``_resolve_sentinel_credentials`` — body
|
||||
fields are honored for back-compat, otherwise the backend .env values
|
||||
populated through ``/api/settings/api-keys`` are used.
|
||||
"""
|
||||
import requests as req
|
||||
body = await request.body()
|
||||
from urllib.parse import parse_qs
|
||||
params = parse_qs(body.decode("utf-8"))
|
||||
client_id = params.get("client_id", [""])[0]
|
||||
client_secret = params.get("client_secret", [""])[0]
|
||||
body_id = params.get("client_id", [""])[0]
|
||||
body_secret = params.get("client_secret", [""])[0]
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(400, "client_id and client_secret required")
|
||||
# Friendly, non-hostile error — points the operator at the place
|
||||
# they configure other API keys instead of just saying "required".
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||
try:
|
||||
resp = await asyncio.to_thread(req.post, token_url,
|
||||
@@ -152,7 +250,7 @@ import os as _os
|
||||
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/tile")
|
||||
@router.post("/api/sentinel/tile", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("300/minute")
|
||||
async def api_sentinel_tile(request: Request):
|
||||
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
||||
@@ -163,8 +261,11 @@ async def api_sentinel_tile(request: Request):
|
||||
except Exception:
|
||||
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
||||
|
||||
client_id = body.get("client_id", "")
|
||||
client_secret = body.get("client_secret", "")
|
||||
# Issue #298: same resolution order as /api/sentinel/token — body
|
||||
# values for back-compat, otherwise backend .env.
|
||||
body_id = body.get("client_id", "")
|
||||
body_secret = body.get("client_secret", "")
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
preset = body.get("preset", "TRUE-COLOR")
|
||||
date_str = body.get("date", "")
|
||||
z = body.get("z", 0)
|
||||
@@ -172,7 +273,16 @@ async def api_sentinel_tile(request: Request):
|
||||
y = body.get("y", 0)
|
||||
|
||||
if not client_id or not client_secret or not date_str:
|
||||
raise HTTPException(400, "client_id, client_secret, and date required")
|
||||
# Distinguish "no creds" from "no date" so the operator knows
|
||||
# what to fix. Same friendly pointer as the /token route.
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
raise HTTPException(400, "date required")
|
||||
|
||||
now = _time.time()
|
||||
credential_fp = _credential_fingerprint(client_id, client_secret)
|
||||
|
||||
@@ -160,8 +160,13 @@ router = APIRouter()
|
||||
|
||||
# --- Constants ---
|
||||
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
||||
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||
# "is this on?" boolean. Transport choice, anonymous-mode state, and
|
||||
# the named privacy profile were all leaking actionable recon to
|
||||
# unauthenticated callers and are now gated behind authenticated reads.
|
||||
# See the matching block in backend/main.py for the full rationale.
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||
@@ -303,6 +308,10 @@ class WormholeDmDecryptRequest(BaseModel):
|
||||
session_welcome: str | None = None
|
||||
|
||||
|
||||
class WormholeDmMlsKeyPackageRequest(BaseModel):
|
||||
alias: str
|
||||
|
||||
|
||||
class WormholeDmResetRequest(BaseModel):
|
||||
peer_id: str | None = None
|
||||
|
||||
@@ -321,6 +330,14 @@ class WormholeDmBootstrapDecryptRequest(BaseModel):
|
||||
ciphertext: str
|
||||
|
||||
|
||||
class WormholeDmConnectContactRequest(BaseModel):
|
||||
lookup_token: str = ""
|
||||
peer_id: str = ""
|
||||
note: str = ""
|
||||
lookup_peer_url: str = ""
|
||||
cached_prekey_bundle: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WormholeDmInviteImportRequest(BaseModel):
|
||||
invite: dict[str, Any]
|
||||
alias: str = ""
|
||||
@@ -1080,7 +1097,21 @@ async def api_wormhole_dm_bootstrap_decrypt(request: Request, body: WormholeDmBo
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/sender-token", dependencies=[Depends(require_admin)])
|
||||
@router.post("/api/wormhole/dm/connect-contact", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_connect_contact(request: Request, body: WormholeDmConnectContactRequest):
|
||||
from services.openclaw_infonet import send_contact_request
|
||||
|
||||
return send_contact_request(
|
||||
lookup_token=str(body.lookup_token or ""),
|
||||
peer_id=str(body.peer_id or ""),
|
||||
note=str(body.note or ""),
|
||||
lookup_peer_url=str(body.lookup_peer_url or ""),
|
||||
cached_prekey_bundle=dict(body.cached_prekey_bundle or {}) if body.cached_prekey_bundle else None,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/sender-token", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_wormhole_dm_sender_token(request: Request, body: WormholeDmSenderTokenRequest):
|
||||
if _safe_int(body.count or 1, 1) > 1:
|
||||
@@ -1223,6 +1254,23 @@ async def api_wormhole_dm_decrypt(request: Request, body: WormholeDmDecryptReque
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/mls-key-package", dependencies=[Depends(require_admin)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_wormhole_dm_mls_key_package(request: Request, body: WormholeDmMlsKeyPackageRequest):
|
||||
from services.mesh.mesh_dm_mls import export_dm_key_package_for_alias
|
||||
|
||||
return export_dm_key_package_for_alias(str(body.alias or "").strip())
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/mls-reset", dependencies=[Depends(require_admin)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_mls_reset(request: Request):
|
||||
from services.mesh.mesh_dm_mls import reset_dm_mls_state
|
||||
|
||||
reset_dm_mls_state(clear_privacy_core=True, clear_persistence=True)
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/reset", dependencies=[Depends(require_admin)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_reset(request: Request, body: WormholeDmResetRequest):
|
||||
@@ -1282,7 +1330,25 @@ async def api_wormhole_dm_contact_delete(request: Request, peer_id: str):
|
||||
return {"ok": True, "peer_id": peer_id, "deleted": deleted}
|
||||
|
||||
|
||||
_WORMHOLE_PUBLIC_FIELDS = {"installed", "configured", "running", "ready"}
|
||||
@router.post("/api/wormhole/dm/contact/{peer_id}/sever", dependencies=[Depends(require_admin)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_wormhole_dm_contact_sever(request: Request, peer_id: str):
|
||||
from services.mesh.mesh_wormhole_contacts import sever_wormhole_dm_contact
|
||||
|
||||
try:
|
||||
body = await request.json()
|
||||
except Exception:
|
||||
body = {}
|
||||
if not isinstance(body, dict):
|
||||
body = {}
|
||||
block = bool(body.get("block", False))
|
||||
try:
|
||||
return sever_wormhole_dm_contact(peer_id, block=block)
|
||||
except ValueError as exc:
|
||||
return {"ok": False, "detail": str(exc)}
|
||||
|
||||
|
||||
_WORMHOLE_PUBLIC_FIELDS = {"installed", "configured", "running", "ready", "arti_ready"}
|
||||
|
||||
|
||||
def _redact_wormhole_status(state: dict[str, Any], authenticated: bool) -> dict[str, Any]:
|
||||
@@ -1303,6 +1369,25 @@ async def api_wormhole_status(request: Request):
|
||||
return await _m.api_wormhole_status(request)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/wormhole/private-delivery/{item_id}",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("120/minute")
|
||||
async def api_wormhole_private_delivery_item(request: Request, item_id: str):
|
||||
from services.mesh.mesh_metadata_exposure import metadata_exposure_for_request
|
||||
from services.mesh.mesh_private_outbox import private_delivery_outbox
|
||||
|
||||
exposure = metadata_exposure_for_request(
|
||||
request,
|
||||
authenticated=True,
|
||||
)
|
||||
item = private_delivery_outbox.get_item(item_id, exposure=exposure)
|
||||
if item is None:
|
||||
raise HTTPException(status_code=404, detail="private_delivery_item_not_found")
|
||||
return {"ok": True, "item": item}
|
||||
|
||||
|
||||
@router.post("/api/wormhole/private-delivery/{item_id}/action", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_private_delivery_action(
|
||||
|
||||
@@ -20,7 +20,17 @@ OUT_PATH = Path(__file__).parent.parent / "data" / "power_plants.json"
|
||||
|
||||
def main() -> None:
|
||||
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
# Round 7a: release-time data refresher. Uses the per-operator UA if
|
||||
# available, otherwise a release-script-specific identifier. This
|
||||
# script is run by the maintainer at release time, NOT at runtime,
|
||||
# so an aggregate UA is acceptable; we still use the helper so the
|
||||
# behavior matches the rest of the project.
|
||||
try:
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua = outbound_user_agent("release-script-power-plants")
|
||||
except Exception:
|
||||
ua = "operator-release-script (purpose: power-plants)"
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
|
||||
|
||||
@@ -167,6 +167,11 @@ def cmd_hash(args: argparse.Namespace) -> int:
|
||||
print("")
|
||||
print("Updater pin:")
|
||||
print(f"MESH_UPDATE_SHA256={digest}")
|
||||
print("")
|
||||
print("Release checklist:")
|
||||
print(" - add this digest to SHA256SUMS.txt for the GitHub release")
|
||||
print(" - add/update backend/data/release_digests.json for bundled updater verification")
|
||||
print(" - keep MESH_UPDATE_SHA256 available as the operator override path")
|
||||
return 0 if asset_matches else 2
|
||||
|
||||
|
||||
|
||||
@@ -92,18 +92,37 @@ SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token
|
||||
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
||||
|
||||
if [[ -n "$TEXT_FILES" ]]; then
|
||||
# Known-public exclusions: lines matching `<host-or-ip> ssh-<algo> <key>`
|
||||
# are SSH known_hosts entries — the host's PUBLIC fingerprint, which is
|
||||
# by definition safe to commit (the whole point of pinning known_hosts
|
||||
# is to publish the fingerprint widely so MITM is detectable). Filter
|
||||
# these out before flagging the file.
|
||||
KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA'
|
||||
|
||||
# Use grep with file list, skip missing/binary, limit output
|
||||
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
||||
if [[ -n "$CONTENT_HITS" ]]; then
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo "$CONTENT_HITS" | while read -r f; do
|
||||
echo -e " ${RED}$f${NC}"
|
||||
# Show first matching line for context
|
||||
grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | head -2 | while read -r line; do
|
||||
echo -e " ${YELLOW}$line${NC}"
|
||||
done
|
||||
done
|
||||
FOUND=1
|
||||
REAL_HITS=""
|
||||
REAL_REPORT=""
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
# Re-grep this file, but filter out known_hosts-style lines.
|
||||
FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true)
|
||||
if [[ -n "$FILE_HITS" ]]; then
|
||||
REAL_HITS+="$f"$'\n'
|
||||
REAL_REPORT+=" ${RED}$f${NC}"$'\n'
|
||||
# Show first 2 matching lines for context
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n'
|
||||
done < <(echo "$FILE_HITS" | head -2)
|
||||
fi
|
||||
done <<< "$CONTENT_HITS"
|
||||
if [[ -n "$REAL_HITS" ]]; then
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo -en "$REAL_REPORT"
|
||||
FOUND=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""Operator settings for the embedded agent shell (working directory)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SETTINGS_FILE = Path(__file__).resolve().parent.parent / "data" / "agent_shell_settings.json"
|
||||
_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _default_working_directory() -> str:
|
||||
explicit = str(os.environ.get("AGENT_SHELL_DEFAULT_CWD") or "").strip()
|
||||
if explicit and os.path.isdir(explicit):
|
||||
return explicit
|
||||
home = str(os.environ.get("HOME") or "").strip()
|
||||
if home and home != "/nonexistent" and os.path.isdir(home):
|
||||
return home
|
||||
return "/app"
|
||||
|
||||
|
||||
def get_agent_shell_settings() -> dict[str, Any]:
|
||||
with _LOCK:
|
||||
if not _SETTINGS_FILE.exists():
|
||||
return {"working_directory": _default_working_directory()}
|
||||
try:
|
||||
payload = json.loads(_SETTINGS_FILE.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
logger.warning("agent_shell_settings_unreadable")
|
||||
return {"working_directory": _default_working_directory()}
|
||||
cwd = str(payload.get("working_directory") or "").strip() or _default_working_directory()
|
||||
return {"working_directory": cwd}
|
||||
|
||||
|
||||
def set_agent_shell_working_directory(path: str) -> dict[str, Any]:
|
||||
normalized = str(path or "").strip()
|
||||
if not normalized:
|
||||
raise ValueError("working_directory_required")
|
||||
resolved = os.path.abspath(os.path.expanduser(normalized))
|
||||
if not os.path.isdir(resolved):
|
||||
raise ValueError("working_directory_not_found")
|
||||
with _LOCK:
|
||||
_SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
_SETTINGS_FILE.write_text(
|
||||
json.dumps({"working_directory": resolved}, indent=2) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return {"working_directory": resolved}
|
||||
@@ -350,19 +350,58 @@ _proxy_process = None
|
||||
# path during an upstream cert outage. Surfaced via ais_proxy_status() for
|
||||
# /api/health.
|
||||
_proxy_status: dict = {}
|
||||
# Upstream-connectivity telemetry (added when stream.aisstream.io went fully
|
||||
# offline on 2026-05-23). ``_last_msg_at`` is the unix timestamp of the most
|
||||
# recent vessel message received from the proxy. ``_proxy_spawn_count`` is
|
||||
# how many times we've started the node proxy; combined with no recent
|
||||
# messages it tells us the proxy is respawning in a tight loop because the
|
||||
# upstream is unreachable. Surfaced via ais_proxy_status() so the operator
|
||||
# can see "AIS is dead" instead of guessing whether it's their map filter,
|
||||
# their api key, or upstream.
|
||||
_last_msg_at: float = 0.0
|
||||
_proxy_spawn_count: int = 0
|
||||
_VESSEL_TRAIL_INTERVAL_S = 120
|
||||
_VESSEL_TRAIL_MAX_POINTS = 240
|
||||
|
||||
|
||||
def ais_proxy_status() -> dict:
|
||||
"""Return a copy of the latest ais_proxy.js status (issue #258).
|
||||
# How stale "last vessel message" can be before we consider the stream
|
||||
# disconnected. AISStream typically pushes multiple messages/sec, so a 60s
|
||||
# gap means something's wrong upstream or in transit.
|
||||
_AIS_CONNECTED_FRESHNESS_S = 60
|
||||
|
||||
Currently surfaces ``degraded_tls`` (bool) which is true when the
|
||||
proxy is using SPKI-pinned fallback because AISStream's cert expired.
|
||||
Returns an empty dict when no status has been received yet.
|
||||
|
||||
def ais_proxy_status() -> dict:
|
||||
"""Return a copy of the latest ais_proxy.js status + connectivity health.
|
||||
|
||||
Fields:
|
||||
* ``degraded_tls`` (bool, issue #258) — true when the proxy is using
|
||||
SPKI-pinned fallback because AISStream's cert expired.
|
||||
* ``connected`` (bool) — true when we received a vessel message in
|
||||
the last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||
* ``last_msg_age_seconds`` (int | None) — seconds since the last
|
||||
vessel message; None if we've never received one.
|
||||
* ``proxy_spawn_count`` (int) — how many times we've spawned the
|
||||
node proxy. Sustained increases here without ``connected`` means
|
||||
we're respawning in a tight loop because upstream is dead.
|
||||
|
||||
Returns an empty dict when called before the AIS subsystem starts
|
||||
(e.g. during tests or when no API key is set).
|
||||
"""
|
||||
with _vessels_lock:
|
||||
return dict(_proxy_status)
|
||||
status = dict(_proxy_status)
|
||||
last = _last_msg_at
|
||||
spawns = _proxy_spawn_count
|
||||
|
||||
now = time.time()
|
||||
if last > 0:
|
||||
last_age = int(now - last)
|
||||
status["last_msg_age_seconds"] = last_age
|
||||
status["connected"] = last_age <= _AIS_CONNECTED_FRESHNESS_S
|
||||
else:
|
||||
status["last_msg_age_seconds"] = None
|
||||
status["connected"] = False
|
||||
status["proxy_spawn_count"] = spawns
|
||||
return status
|
||||
|
||||
import os
|
||||
|
||||
@@ -588,8 +627,10 @@ def _ais_stream_loop():
|
||||
env=proxy_env,
|
||||
**popen_kwargs,
|
||||
)
|
||||
global _proxy_spawn_count
|
||||
with _vessels_lock:
|
||||
_proxy_process = process
|
||||
_proxy_spawn_count += 1
|
||||
|
||||
# Drain stderr in a background thread to prevent deadlock
|
||||
import threading
|
||||
@@ -645,9 +686,15 @@ def _ais_stream_loop():
|
||||
if not mmsi:
|
||||
continue
|
||||
|
||||
# Telemetry: stamp the timestamp of the most recent real
|
||||
# vessel message. ais_proxy_status() reads this to decide
|
||||
# whether the stream is currently "connected" — i.e. has
|
||||
# any data flowed in the last 60s.
|
||||
global _last_msg_at
|
||||
with _vessels_lock:
|
||||
_last_msg_at = time.time()
|
||||
if mmsi not in _vessels:
|
||||
_vessels[mmsi] = {"_updated": time.time()}
|
||||
_vessels[mmsi] = {"_updated": _last_msg_at}
|
||||
vessel = _vessels[mmsi]
|
||||
|
||||
# Update position from PositionReport or StandardClassBPositionReport
|
||||
|
||||
@@ -51,6 +51,15 @@ API_REGISTRY = [
|
||||
"url": "https://aisstream.io/",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"id": "gfw_api_token",
|
||||
"env_key": "GFW_API_TOKEN",
|
||||
"name": "Global Fishing Watch",
|
||||
"description": "Bearer token for Global Fishing Watch fishing-vessel activity events (Fishing Activity map layer). Free registration at globalfishingwatch.org.",
|
||||
"category": "Maritime",
|
||||
"url": "https://globalfishingwatch.org/our-apis/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "adsb_lol",
|
||||
"env_key": None,
|
||||
@@ -150,6 +159,31 @@ API_REGISTRY = [
|
||||
"url": "https://finnhub.io/register",
|
||||
"required": False,
|
||||
},
|
||||
# Issue #298 (tg12): Sentinel Hub / Copernicus Data Space Ecosystem
|
||||
# credentials were previously held in browser localStorage / sessionStorage
|
||||
# by the Settings panel. Moved server-side to the same .env-backed
|
||||
# store every other third-party API key lives in. The Sentinel proxy
|
||||
# routes (POST /api/sentinel/token, /tile) now fall back to these
|
||||
# env values when the request body omits credentials — see
|
||||
# backend/routers/tools.py for the resolution order.
|
||||
{
|
||||
"id": "sentinel_client_id",
|
||||
"env_key": "SENTINEL_CLIENT_ID",
|
||||
"name": "Sentinel Hub / Copernicus — Client ID",
|
||||
"description": "OAuth2 client ID for Copernicus Data Space Ecosystem (CDSE). Required for the Sentinel-2 imagery overlay and the right-click Sentinel-2 Intel Card. Sign in at dataspace.copernicus.eu and create OAuth credentials.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "sentinel_client_secret",
|
||||
"env_key": "SENTINEL_CLIENT_SECRET",
|
||||
"name": "Sentinel Hub / Copernicus — Client Secret",
|
||||
"description": "OAuth2 client secret paired with the Client ID above. Used by the backend to mint short-lived access tokens against the CDSE identity provider. Stored in the backend .env; never sent to the browser.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
|
||||
ALLOWED_ENV_KEYS = {
|
||||
|
||||
+407
-173
@@ -1,46 +1,90 @@
|
||||
"""
|
||||
Carrier Strike Group OSINT Tracker
|
||||
===================================
|
||||
Scrapes multiple OSINT sources to maintain current estimated positions
|
||||
for US Navy Carrier Strike Groups. Updates on startup + 00:00 & 12:00 UTC.
|
||||
Maintains estimated positions for US Navy Carrier Strike Groups with
|
||||
honest provenance and freshness signals.
|
||||
|
||||
Sources:
|
||||
1. GDELT News API — recent carrier movement headlines
|
||||
2. WikiVoyage / public port-call databases
|
||||
3. Fallback — last-known or static OSINT estimates
|
||||
Issues #244 / #245 / #246 (tg12 external audit):
|
||||
|
||||
The previous implementation baked a snapshot of USNI News Fleet &
|
||||
Marine Tracker positions (March 9, 2026) into the registry as
|
||||
``fallback_lat``/``fallback_lng`` and stamped ``updated = now()``
|
||||
every time the dossier was rendered. That presented stale editorial
|
||||
data as live state. It also persisted GDELT-derived positions to the
|
||||
on-disk cache with no freshness signal, so a single news mention from
|
||||
months ago could keep overriding the (already-stale) registry default
|
||||
indefinitely.
|
||||
|
||||
Architecture after this PR:
|
||||
|
||||
::
|
||||
|
||||
backend/data/carrier_seed.json read-only, shipped with image,
|
||||
used ONCE on first-ever startup
|
||||
to bootstrap carrier_cache.json.
|
||||
|
||||
backend/data/carrier_cache.json mutable, lives in the runtime data
|
||||
volume, written by every GDELT
|
||||
refresh + any future source.
|
||||
|
||||
Startup flow:
|
||||
|
||||
1. ``carrier_cache.json`` exists? → load it.
|
||||
2. Otherwise, copy ``carrier_seed.json`` → ``carrier_cache.json``,
|
||||
then load it. (This happens once, ever, per install.)
|
||||
3. Background: GDELT fetch runs. Any carrier mentioned in fresh news
|
||||
gets its entry replaced with the news-derived position.
|
||||
``position_source_at`` is set to the news article timestamp.
|
||||
|
||||
Freshness is a *labelling* decision, not an eviction decision:
|
||||
|
||||
- ``position_source_at`` within the configurable freshness window
|
||||
(default 14 days) → ``position_confidence = "recent"``.
|
||||
- Older than that → ``position_confidence = "stale"``.
|
||||
- Bootstrapped from the seed file (never updated) → ``"seed"``.
|
||||
- No cache entry at all (e.g. a carrier added to the registry after
|
||||
first install) → carrier renders at its homeport with
|
||||
``"homeport_default"``.
|
||||
|
||||
Carriers are never hidden, never teleported, never disappeared. The
|
||||
position the user sees is always the last position the system actually
|
||||
observed, with an honest "as-of" timestamp the UI can render however
|
||||
it likes. A year from now, the runtime cache reflects whatever this
|
||||
install has observed via GDELT — not the seed snapshot.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import random
|
||||
from datetime import datetime, timezone
|
||||
import shutil
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Carrier registry: hull number → metadata + fallback position
|
||||
# Carrier registry: hull number → identity only.
|
||||
#
|
||||
# Issue #244 (tg12): the previous registry carried hard-coded
|
||||
# ``fallback_lat``/``fallback_lng`` that were dated editorial
|
||||
# snapshots from a 2026-03-09 article. Those fields are DELETED. The
|
||||
# registry is now identity + homeport only; positions are sourced
|
||||
# exclusively from carrier_cache.json (and via that, from the
|
||||
# bootstrap seed or live OSINT).
|
||||
# -----------------------------------------------------------------
|
||||
CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
|
||||
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
|
||||
# --- Bremerton, WA (Naval Base Kitsap) ---
|
||||
# Distinct pier positions along Sinclair Inlet so carriers don't stack
|
||||
"CVN-68": {
|
||||
"name": "USS Nimitz (CVN-68)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5535,
|
||||
"homeport_lng": -122.6400,
|
||||
"fallback_lat": 47.5535,
|
||||
"fallback_lng": -122.6400,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Maintenance)",
|
||||
},
|
||||
"CVN-76": {
|
||||
"name": "USS Ronald Reagan (CVN-76)",
|
||||
@@ -48,23 +92,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5580,
|
||||
"homeport_lng": -122.6360,
|
||||
"fallback_lat": 47.5580,
|
||||
"fallback_lng": -122.6360,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Decommissioning)",
|
||||
},
|
||||
# --- Norfolk, VA (Naval Station Norfolk) ---
|
||||
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
|
||||
"CVN-69": {
|
||||
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9465,
|
||||
"homeport_lng": -76.3265,
|
||||
"fallback_lat": 36.9465,
|
||||
"fallback_lng": -76.3265,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
},
|
||||
"CVN-78": {
|
||||
"name": "USS Gerald R. Ford (CVN-78)",
|
||||
@@ -72,10 +107,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9505,
|
||||
"homeport_lng": -76.3250,
|
||||
"fallback_lat": 18.0,
|
||||
"fallback_lng": 39.5,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
"CVN-74": {
|
||||
"name": "USS John C. Stennis (CVN-74)",
|
||||
@@ -83,10 +114,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9540,
|
||||
"homeport_lng": -76.3235,
|
||||
"fallback_lat": 36.98,
|
||||
"fallback_lng": -76.43,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
},
|
||||
"CVN-75": {
|
||||
"name": "USS Harry S. Truman (CVN-75)",
|
||||
@@ -94,10 +121,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9580,
|
||||
"homeport_lng": -76.3220,
|
||||
"fallback_lat": 36.0,
|
||||
"fallback_lng": 15.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
},
|
||||
"CVN-77": {
|
||||
"name": "USS George H.W. Bush (CVN-77)",
|
||||
@@ -105,23 +128,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9620,
|
||||
"homeport_lng": -76.3210,
|
||||
"fallback_lat": 36.5,
|
||||
"fallback_lng": -74.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
},
|
||||
# --- San Diego, CA (Naval Base San Diego) ---
|
||||
# Carrier piers along the east shore of San Diego Bay, spread N-S
|
||||
"CVN-70": {
|
||||
"name": "USS Carl Vinson (CVN-70)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6840,
|
||||
"homeport_lng": -117.1290,
|
||||
"fallback_lat": 32.6840,
|
||||
"fallback_lng": -117.1290,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Homeport)",
|
||||
},
|
||||
"CVN-71": {
|
||||
"name": "USS Theodore Roosevelt (CVN-71)",
|
||||
@@ -129,10 +143,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6885,
|
||||
"homeport_lng": -117.1280,
|
||||
"fallback_lat": 32.6885,
|
||||
"fallback_lng": -117.1280,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Maintenance)",
|
||||
},
|
||||
"CVN-72": {
|
||||
"name": "USS Abraham Lincoln (CVN-72)",
|
||||
@@ -140,10 +150,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6925,
|
||||
"homeport_lng": -117.1275,
|
||||
"fallback_lat": 20.0,
|
||||
"fallback_lng": 64.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
# --- Yokosuka, Japan (CFAY) ---
|
||||
"CVN-73": {
|
||||
@@ -152,16 +158,18 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Yokosuka, Japan",
|
||||
"homeport_lat": 35.2830,
|
||||
"homeport_lng": 139.6700,
|
||||
"fallback_lat": 35.2830,
|
||||
"fallback_lng": 139.6700,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "Yokosuka, Japan (Forward deployed)",
|
||||
},
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Region → approximate center coordinates
|
||||
# Used to map textual geographic descriptions to lat/lng
|
||||
# Region → approximate center coordinates.
|
||||
#
|
||||
# Issue #245 (tg12): converting a region name straight into precise
|
||||
# map coordinates is false precision. We still use this table to
|
||||
# infer a coarse position from a headline mention, but the resulting
|
||||
# carrier object is now stamped ``position_confidence = "approximate"``
|
||||
# so the UI can render an uncertainty radius / dimmed icon. The
|
||||
# centroid is a best-effort midpoint of the named body of water.
|
||||
# -----------------------------------------------------------------
|
||||
REGION_COORDS: Dict[str, tuple] = {
|
||||
# Oceans & Seas
|
||||
@@ -220,9 +228,39 @@ REGION_COORDS: Dict[str, tuple] = {
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Cache file for persisting positions between restarts
|
||||
# Files
|
||||
# -----------------------------------------------------------------
|
||||
CACHE_FILE = Path(__file__).parent.parent / "carrier_cache.json"
|
||||
#
|
||||
# The seed lives in the read-only image data dir (it ships with each
|
||||
# release). The cache lives in the same data dir but is written at
|
||||
# runtime; under Docker compose this dir is volume-mounted so the
|
||||
# cache persists across container restarts, which is the whole point
|
||||
# of the seed-then-observe model — the user's runtime observations
|
||||
# survive image upgrades.
|
||||
SEED_FILE = Path(__file__).parent.parent / "data" / "carrier_seed.json"
|
||||
CACHE_FILE = Path(__file__).parent.parent / "data" / "carrier_cache.json"
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Freshness window for position_confidence labeling. Issue #246 (tg12):
|
||||
# previously persisted cache entries had no freshness signal at all.
|
||||
# After this change, the position itself is preserved (we never lose
|
||||
# what was last observed) but the confidence label flips from
|
||||
# "recent" to "stale" once the underlying source is older than this
|
||||
# window. Operator-overridable via env var.
|
||||
# -----------------------------------------------------------------
|
||||
_DEFAULT_FRESHNESS_WINDOW_DAYS = 14
|
||||
|
||||
|
||||
def _freshness_window_days() -> int:
|
||||
raw = str(os.environ.get("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "") or "").strip()
|
||||
if not raw:
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
try:
|
||||
n = int(raw)
|
||||
return n if n > 0 else _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
|
||||
|
||||
_carrier_positions: Dict[str, dict] = {}
|
||||
_positions_lock = threading.Lock()
|
||||
@@ -234,25 +272,159 @@ _GDELT_REQUEST_DELAY_SECONDS = 1.25
|
||||
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _parse_iso(ts: str) -> Optional[datetime]:
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
# Python's fromisoformat accepts +00:00 but not 'Z' until 3.11.
|
||||
normalized = ts.replace("Z", "+00:00")
|
||||
dt = datetime.fromisoformat(normalized)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _compute_position_confidence(entry: dict, *, now: Optional[datetime] = None) -> str:
|
||||
"""Return the public confidence label for a carrier cache entry.
|
||||
|
||||
Order of precedence:
|
||||
- explicit "homeport_default" / "seed" labels are preserved.
|
||||
- dated entries (with position_source_at) are "recent" if within
|
||||
the configured freshness window, else "stale".
|
||||
- missing position_source_at falls through to "stale".
|
||||
"""
|
||||
raw_label = str(entry.get("position_confidence", "") or "").strip()
|
||||
# Explicit "kind of provenance" labels are preserved as-is. They
|
||||
# describe HOW we got the position, not WHEN — a fresh headline-to-
|
||||
# centroid match (#245) is still imprecise no matter how recently
|
||||
# it was observed, and the seed (#244) is always the seed.
|
||||
if raw_label in {"seed", "homeport_default", "approximate"}:
|
||||
# Approximate entries can still age into "stale_approximate" if
|
||||
# they fall out of the freshness window — that distinction lets
|
||||
# the UI render a different badge for old-and-imprecise vs
|
||||
# recent-and-imprecise. seed/homeport_default never age (they
|
||||
# were never timestamped against real observations).
|
||||
if raw_label == "approximate":
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if source_at is not None:
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
if reference - source_at > timedelta(days=_freshness_window_days()):
|
||||
return "stale_approximate"
|
||||
return raw_label
|
||||
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if not source_at:
|
||||
return "stale"
|
||||
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
window = timedelta(days=_freshness_window_days())
|
||||
if reference - source_at <= window:
|
||||
return "recent"
|
||||
return "stale"
|
||||
|
||||
|
||||
def _load_seed() -> Dict[str, dict]:
|
||||
"""Load the read-only seed file shipped with the image.
|
||||
|
||||
Returns a hull→entry dict (no _meta wrapper). Missing or malformed
|
||||
seed files yield an empty dict — the caller falls back to homeport
|
||||
defaults.
|
||||
"""
|
||||
try:
|
||||
if not SEED_FILE.exists():
|
||||
logger.info("Carrier seed file not present at %s; first-run will fall back to homeport defaults", SEED_FILE)
|
||||
return {}
|
||||
raw = json.loads(SEED_FILE.read_text(encoding="utf-8"))
|
||||
carriers = raw.get("carriers", {}) if isinstance(raw, dict) else {}
|
||||
if not isinstance(carriers, dict):
|
||||
return {}
|
||||
logger.info("Carrier seed loaded: %d entries from %s", len(carriers), SEED_FILE)
|
||||
return carriers
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning("Failed to load carrier seed file %s: %s", SEED_FILE, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _load_cache() -> Dict[str, dict]:
|
||||
"""Load cached carrier positions from disk."""
|
||||
"""Load the mutable cache (last-known positions persisted between restarts)."""
|
||||
try:
|
||||
if CACHE_FILE.exists():
|
||||
data = json.loads(CACHE_FILE.read_text())
|
||||
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
|
||||
return data
|
||||
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
logger.info("Carrier cache loaded: %d carriers from %s", len(data), CACHE_FILE)
|
||||
return data
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Failed to load carrier cache: {e}")
|
||||
logger.warning("Failed to load carrier cache: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(positions: Dict[str, dict]):
|
||||
"""Persist carrier positions to disk."""
|
||||
def _save_cache(positions: Dict[str, dict]) -> None:
|
||||
"""Persist the mutable cache. Atomic write (temp + rename) so a crash
|
||||
mid-write can't leave the file truncated."""
|
||||
try:
|
||||
CACHE_FILE.write_text(json.dumps(positions, indent=2))
|
||||
logger.info(f"Carrier cache saved: {len(positions)} carriers")
|
||||
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = CACHE_FILE.with_suffix(CACHE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(positions, indent=2), encoding="utf-8")
|
||||
# On Windows os.replace is atomic and overwrites existing files.
|
||||
os.replace(tmp, CACHE_FILE)
|
||||
logger.info("Carrier cache saved: %d carriers", len(positions))
|
||||
except (IOError, OSError) as e:
|
||||
logger.warning(f"Failed to save carrier cache: {e}")
|
||||
logger.warning("Failed to save carrier cache: %s", e)
|
||||
|
||||
|
||||
def _homeport_entry_for(hull: str) -> Optional[dict]:
|
||||
"""Return a homeport-default cache entry for a hull, or None if the
|
||||
hull is not in the registry."""
|
||||
info = CARRIER_REGISTRY.get(hull)
|
||||
if not info:
|
||||
return None
|
||||
return {
|
||||
"lat": info["homeport_lat"],
|
||||
"lng": info["homeport_lng"],
|
||||
"heading": 0,
|
||||
"desc": f"{info['homeport']} (no observations yet)",
|
||||
"source": f"Homeport default ({info['homeport']})",
|
||||
"source_url": info.get("wiki", ""),
|
||||
"position_source_at": _now_iso(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
|
||||
|
||||
def _bootstrap_cache_if_missing() -> Dict[str, dict]:
|
||||
"""One-shot: if no cache exists, materialize one from the seed file.
|
||||
|
||||
Returns the cache contents (hull→entry). On first-ever startup,
|
||||
this writes ``carrier_cache.json`` so subsequent restarts skip the
|
||||
seed entirely. Operator-deleted caches re-bootstrap the same way —
|
||||
operators can use that to "reset" carrier positions, but it's an
|
||||
explicit operator action.
|
||||
"""
|
||||
if CACHE_FILE.exists():
|
||||
return _load_cache()
|
||||
|
||||
seed = _load_seed()
|
||||
if not seed:
|
||||
# No seed file either. Build a homeport-default cache so the
|
||||
# first save_cache call still produces something honest.
|
||||
homeports: Dict[str, dict] = {}
|
||||
for hull in CARRIER_REGISTRY:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
homeports[hull] = entry
|
||||
if homeports:
|
||||
_save_cache(homeports)
|
||||
return homeports
|
||||
|
||||
# Persist the seed as the first cache so subsequent runs skip this branch.
|
||||
_save_cache(seed)
|
||||
logger.info("Carrier cache bootstrapped from seed (first-ever startup)")
|
||||
return dict(seed)
|
||||
|
||||
|
||||
def _match_region(text: str) -> Optional[tuple]:
|
||||
@@ -270,10 +442,8 @@ def _match_carrier(text: str) -> Optional[str]:
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
hull_check = hull.lower().replace("-", "")
|
||||
name_parts = info["name"].lower()
|
||||
# Match hull number (e.g., "CVN-78", "CVN78")
|
||||
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
||||
return hull
|
||||
# Match ship name (e.g., "Ford", "Eisenhower", "Vinson")
|
||||
ship_name = name_parts.split("(")[0].strip()
|
||||
last_name = ship_name.split()[-1] if ship_name else ""
|
||||
if last_name and len(last_name) > 3 and last_name in text_lower:
|
||||
@@ -323,8 +493,9 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
articles = data.get("articles", [])
|
||||
for art in articles:
|
||||
title = art.get("title", "")
|
||||
url = art.get("url", "")
|
||||
results.append({"title": title, "url": url})
|
||||
article_url = art.get("url", "")
|
||||
article_at = art.get("seendate") or art.get("date") or ""
|
||||
results.append({"title": title, "url": article_url, "seendate": article_at})
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.debug(f"GDELT search failed for '{term}': {e}")
|
||||
continue
|
||||
@@ -340,108 +511,175 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
return results
|
||||
|
||||
|
||||
def _gdelt_seendate_to_iso(seendate: str) -> Optional[str]:
|
||||
"""GDELT returns YYYYMMDDhhmmss (UTC). Convert to ISO8601 for
|
||||
position_source_at. Returns None if the input is unparseable."""
|
||||
raw = (seendate or "").strip()
|
||||
if len(raw) < 8 or not raw.isdigit():
|
||||
return None
|
||||
try:
|
||||
dt = datetime.strptime(raw[:14] if len(raw) >= 14 else raw[:8] + "000000", "%Y%m%d%H%M%S")
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
||||
"""Parse carrier positions from news article titles and descriptions."""
|
||||
"""Parse carrier positions from news article titles.
|
||||
|
||||
Issue #245 (tg12): the position is a region centroid, which is
|
||||
coarse — we now stamp ``position_confidence = "approximate"`` so
|
||||
the UI can render that uncertainty. Issue #244: the
|
||||
``position_source_at`` field is the news article's actual seen
|
||||
date, NOT now(), so the freshness check correctly flips entries
|
||||
to "stale" once they age past the configured window.
|
||||
"""
|
||||
updates: Dict[str, dict] = {}
|
||||
|
||||
for article in articles:
|
||||
title = article.get("title", "")
|
||||
|
||||
# Try to match a carrier from the title
|
||||
hull = _match_carrier(title)
|
||||
if not hull:
|
||||
continue
|
||||
|
||||
# Try to match a region from the title
|
||||
coords = _match_region(title)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
# Only update if we haven't seen this carrier yet (first match wins — most recent)
|
||||
# First match wins (most recent article, GDELT returns newest first
|
||||
# per term).
|
||||
if hull not in updates:
|
||||
iso_at = _gdelt_seendate_to_iso(str(article.get("seendate", ""))) or _now_iso()
|
||||
updates[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": title[:100],
|
||||
"source": "GDELT News API",
|
||||
"source": "GDELT News API (headline region match — approximate)",
|
||||
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
"position_source_at": iso_at,
|
||||
# Headline-to-centroid match is explicitly approximate.
|
||||
"position_confidence": "approximate",
|
||||
}
|
||||
logger.info(
|
||||
f"Carrier update: {CARRIER_REGISTRY[hull]['name']} → {coords} (from: {title[:80]})"
|
||||
"Carrier update: %s → %s (from: %s)",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
coords,
|
||||
title[:80],
|
||||
)
|
||||
|
||||
return updates
|
||||
|
||||
|
||||
def _load_carrier_fallbacks() -> Dict[str, dict]:
|
||||
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
|
||||
positions: Dict[str, dict] = {}
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
positions[hull] = {
|
||||
"name": info["name"],
|
||||
"lat": info["fallback_lat"],
|
||||
"lng": info["fallback_lng"],
|
||||
"heading": info["fallback_heading"],
|
||||
"desc": info["fallback_desc"],
|
||||
"wiki": info["wiki"],
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
# Overlay cached positions from previous runs (may have GDELT data)
|
||||
cached = _load_cache()
|
||||
for hull, cached_pos in cached.items():
|
||||
if hull in positions:
|
||||
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get(
|
||||
"source", ""
|
||||
).startswith("News"):
|
||||
positions[hull].update(
|
||||
{
|
||||
"lat": cached_pos["lat"],
|
||||
"lng": cached_pos["lng"],
|
||||
"desc": cached_pos.get("desc", positions[hull]["desc"]),
|
||||
"source": cached_pos.get("source", "Cached OSINT"),
|
||||
"updated": cached_pos.get("updated", ""),
|
||||
}
|
||||
)
|
||||
return positions
|
||||
def _enrich_for_rendering(hull: str, entry: dict, *, now: Optional[datetime] = None) -> dict:
|
||||
"""Add live computed fields (confidence label, last_osint_update)
|
||||
on top of the persisted cache entry. The persisted entry is left
|
||||
untouched; this function builds the public-facing object.
|
||||
"""
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
confidence = _compute_position_confidence(entry, now=now)
|
||||
return {
|
||||
"name": entry.get("name", info.get("name", hull)),
|
||||
"lat": entry["lat"],
|
||||
"lng": entry["lng"],
|
||||
"heading": entry.get("heading", 0),
|
||||
"desc": entry.get("desc", ""),
|
||||
"wiki": entry.get("wiki", info.get("wiki", "")),
|
||||
"source": entry.get("source", "OSINT estimated position"),
|
||||
"source_url": entry.get("source_url", ""),
|
||||
"position_source_at": entry.get("position_source_at", ""),
|
||||
"position_confidence": confidence,
|
||||
# Existing field preserved for backward compatibility with the
|
||||
# current frontend ShipPopup; now reflects the SOURCE's observed
|
||||
# time (not now()), so "last reported X days ago" is honest.
|
||||
"last_osint_update": entry.get("position_source_at", ""),
|
||||
# Convenience boolean for the UI: true when the position is
|
||||
# NOT live OSINT (used to render dimmed icons / badges).
|
||||
"is_fallback": confidence in {"seed", "stale", "stale_approximate", "homeport_default"},
|
||||
}
|
||||
|
||||
|
||||
def update_carrier_positions():
|
||||
"""Main update function — called on startup and every 12h.
|
||||
def update_carrier_positions() -> None:
|
||||
"""Refresh carrier positions.
|
||||
|
||||
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
|
||||
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
|
||||
Phase 1 (instant): publish whatever's in carrier_cache.json (or
|
||||
bootstrap from seed on first-ever run), so the map has carriers
|
||||
immediately.
|
||||
|
||||
Phase 2 (slow): query GDELT and replace position entries for any
|
||||
carrier mentioned in fresh news. Persist back to cache.
|
||||
"""
|
||||
global _last_update
|
||||
|
||||
# --- Phase 1: instant fallback + cache ---
|
||||
positions = _load_carrier_fallbacks()
|
||||
# --- Phase 1: instant cache (bootstrap from seed on first-ever run) ---
|
||||
positions = _bootstrap_cache_if_missing()
|
||||
|
||||
# Ensure every registered hull has SOMETHING in the cache. A hull
|
||||
# the seed didn't cover (e.g. added after install) renders at its
|
||||
# homeport with "homeport_default" confidence.
|
||||
for hull in CARRIER_REGISTRY:
|
||||
if hull not in positions:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
positions[hull] = entry
|
||||
|
||||
with _positions_lock:
|
||||
# Only overwrite if positions are currently empty (first startup).
|
||||
# If we already have data from a previous cycle, keep it while GDELT runs.
|
||||
if not _carrier_positions:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)"
|
||||
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
)
|
||||
|
||||
# --- Phase 2: slow GDELT enrichment ---
|
||||
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||
#
|
||||
# USNI publishes a weekly editorial tracker with each carrier's
|
||||
# actual operating area, parsed from explicit prose like
|
||||
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
# These positions are tagged ``position_confidence: "recent"`` because
|
||||
# they reflect actual reporting, not headline-keyword centroids.
|
||||
# USNI updates are preferred over GDELT — they're authoritative on
|
||||
# US Navy positions where GDELT is just article-title text mining.
|
||||
try:
|
||||
from services.fetchers.usni_fleet_tracker import (
|
||||
fetch_latest_fleet_tracker_positions,
|
||||
)
|
||||
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||
for hull, pos in usni_positions.items():
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier USNI update: %s → %s",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
pos.get("desc", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||
|
||||
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||
#
|
||||
# Used only to backfill carriers USNI didn't mention this week. The
|
||||
# position is stamped ``approximate`` so the UI knows it's a
|
||||
# headline-centroid match (Issue #245).
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
if hull in positions:
|
||||
positions[hull].update(pos)
|
||||
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
|
||||
# Only overwrite if the existing entry is NOT a recent USNI
|
||||
# observation. A "recent" USNI position is higher-confidence
|
||||
# than a GDELT headline-centroid match — don't let GDELT
|
||||
# demote a real position to an approximate one.
|
||||
existing = positions.get(hull, {})
|
||||
existing_conf = _compute_position_confidence(existing)
|
||||
if existing_conf == "recent":
|
||||
continue
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier OSINT: updated %s from GDELT news",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
)
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"GDELT carrier fetch failed: {e}")
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
|
||||
# Save and update the global state with enriched positions
|
||||
with _positions_lock:
|
||||
_carrier_positions.clear()
|
||||
_carrier_positions.update(positions)
|
||||
@@ -449,21 +687,15 @@ def update_carrier_positions():
|
||||
|
||||
_save_cache(positions)
|
||||
|
||||
sources = {}
|
||||
for p in positions.values():
|
||||
src = p.get("source", "unknown")
|
||||
sources[src] = sources.get(src, 0) + 1
|
||||
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
|
||||
confidences: Dict[str, int] = {}
|
||||
for entry in positions.values():
|
||||
label = _compute_position_confidence(entry)
|
||||
confidences[label] = confidences.get(label, 0) + 1
|
||||
logger.info("Carrier tracker: %d carriers updated. Confidence: %s", len(positions), confidences)
|
||||
|
||||
|
||||
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
"""Offset carriers that share identical coordinates so they don't stack.
|
||||
|
||||
At port: offset along the pier axis (~500m / 0.004° apart).
|
||||
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
|
||||
so they're visibly separate but clearly operating together.
|
||||
"""
|
||||
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
|
||||
"""Offset carriers that share identical coordinates so they don't stack."""
|
||||
from collections import defaultdict
|
||||
|
||||
groups: dict[str, list[int]] = defaultdict(list)
|
||||
@@ -475,7 +707,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
if len(indices) < 2:
|
||||
continue
|
||||
n = len(indices)
|
||||
# Determine if this is a port (near a homeport) or at sea
|
||||
sample = result[indices[0]]
|
||||
at_port = any(
|
||||
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
||||
@@ -484,7 +715,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
)
|
||||
|
||||
if at_port:
|
||||
# Use each carrier's distinct homeport pier coordinates
|
||||
for idx in indices:
|
||||
carrier = result[idx]
|
||||
hull = None
|
||||
@@ -497,8 +727,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
carrier["lat"] = info["homeport_lat"]
|
||||
carrier["lng"] = info["homeport_lng"]
|
||||
else:
|
||||
# At sea: spread in a line perpendicular to travel (~0.08° apart)
|
||||
spacing = 0.08 # ~9km — close enough to see they're together
|
||||
spacing = 0.08
|
||||
start_offset = -(n - 1) * spacing / 2
|
||||
for j, idx in enumerate(indices):
|
||||
result[idx]["lng"] += start_offset + j * spacing
|
||||
@@ -507,36 +736,44 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
|
||||
|
||||
def get_carrier_positions() -> List[dict]:
|
||||
"""Return current carrier positions for the data pipeline."""
|
||||
"""Return current carrier positions for the data pipeline.
|
||||
|
||||
Each entry has the full provenance + freshness fields; the UI can
|
||||
decide how to render them. Carriers are never hidden — only
|
||||
labeled.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
with _positions_lock:
|
||||
result = []
|
||||
for hull, pos in _carrier_positions.items():
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
result: List[dict] = []
|
||||
for hull, entry in _carrier_positions.items():
|
||||
enriched = _enrich_for_rendering(hull, entry, now=now)
|
||||
result.append(
|
||||
{
|
||||
"name": pos.get("name", info.get("name", hull)),
|
||||
"name": enriched["name"],
|
||||
"type": "carrier",
|
||||
"lat": pos["lat"],
|
||||
"lng": pos["lng"],
|
||||
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
|
||||
"lat": enriched["lat"],
|
||||
"lng": enriched["lng"],
|
||||
"heading": None, # OSINT cannot determine true heading.
|
||||
"sog": 0,
|
||||
"cog": 0,
|
||||
"country": "United States",
|
||||
"desc": pos.get("desc", ""),
|
||||
"wiki": pos.get("wiki", info.get("wiki", "")),
|
||||
"desc": enriched["desc"],
|
||||
"wiki": enriched["wiki"],
|
||||
"estimated": True,
|
||||
"source": pos.get("source", "OSINT estimated position"),
|
||||
"source_url": pos.get(
|
||||
"source_url", "https://news.usni.org/category/fleet-tracker"
|
||||
),
|
||||
"last_osint_update": pos.get("updated", ""),
|
||||
"source": enriched["source"],
|
||||
"source_url": enriched["source_url"],
|
||||
"last_osint_update": enriched["last_osint_update"],
|
||||
# New fields (additive — existing UI continues to work):
|
||||
"position_source_at": enriched["position_source_at"],
|
||||
"position_confidence": enriched["position_confidence"],
|
||||
"is_fallback": enriched["is_fallback"],
|
||||
}
|
||||
)
|
||||
return _deconflict_positions(result)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily.
|
||||
# -----------------------------------------------------------------
|
||||
_scheduler_thread: Optional[threading.Thread] = None
|
||||
_scheduler_stop = threading.Event()
|
||||
@@ -544,7 +781,6 @@ _scheduler_stop = threading.Event()
|
||||
|
||||
def _scheduler_loop():
|
||||
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
||||
# Initial update on startup
|
||||
try:
|
||||
update_carrier_positions()
|
||||
except Exception as e:
|
||||
@@ -552,7 +788,6 @@ def _scheduler_loop():
|
||||
|
||||
while not _scheduler_stop.is_set():
|
||||
now = datetime.now(timezone.utc)
|
||||
# Next target: 00:00 or 12:00 UTC, whichever is sooner
|
||||
hour = now.hour
|
||||
if hour < 12:
|
||||
next_hour = 12
|
||||
@@ -561,18 +796,17 @@ def _scheduler_loop():
|
||||
|
||||
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
||||
if next_hour == 24:
|
||||
from datetime import timedelta
|
||||
|
||||
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
wait_seconds = (next_run - now).total_seconds()
|
||||
logger.info(
|
||||
f"Carrier tracker: next update at {next_run.isoformat()} ({wait_seconds/3600:.1f}h)"
|
||||
"Carrier tracker: next update at %s (%.1fh)",
|
||||
next_run.isoformat(),
|
||||
wait_seconds / 3600,
|
||||
)
|
||||
|
||||
# Wait until next scheduled time, or until stop event
|
||||
if _scheduler_stop.wait(timeout=wait_seconds):
|
||||
break # Stop event was set
|
||||
break
|
||||
|
||||
try:
|
||||
update_carrier_positions()
|
||||
|
||||
@@ -17,6 +17,9 @@ _KNOWN_CCTV_MEDIA_HOST_ALIASES = {
|
||||
# Trusted upstream occasionally publishes a typo for this Georgia camera
|
||||
# host. Normalize it at ingest so the proxy and client stay consistent.
|
||||
"navigatos-c2c.dot.ga.gov": "navigator-c2c.dot.ga.gov",
|
||||
# TravelIQ staging hosts occasionally appear in 511 catalog metadata.
|
||||
"on.stage.traveliq.co": "511on.ca",
|
||||
"ab.stage.traveliq.co": "511.alberta.ca",
|
||||
}
|
||||
|
||||
_POINT_WKT_RE = re.compile(
|
||||
@@ -40,6 +43,17 @@ def _normalize_cctv_media_url(raw_url: str) -> str:
|
||||
return urlunparse(parsed._replace(netloc=netloc))
|
||||
|
||||
|
||||
def _ensure_https_url(raw_url: str) -> str:
|
||||
"""Upgrade http:// media/catalog URLs to https:// at ingest time."""
|
||||
candidate = _normalize_cctv_media_url(str(raw_url or "").strip())
|
||||
if not candidate:
|
||||
return ""
|
||||
parsed = urlparse(candidate)
|
||||
if parsed.scheme.lower() == "http":
|
||||
return urlunparse(parsed._replace(scheme="https"))
|
||||
return candidate
|
||||
|
||||
|
||||
def _looks_like_direct_cctv_media_url(url: str) -> bool:
|
||||
candidate = str(url or "").strip().lower()
|
||||
if not candidate.startswith(("http://", "https://")):
|
||||
@@ -93,6 +107,165 @@ def _parse_wkt_point(raw_point: str) -> tuple[float | None, float | None]:
|
||||
return lat, lon
|
||||
|
||||
|
||||
def _fetch_traveliq_v2_cameras(
|
||||
*,
|
||||
api_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse TravelIQ-style GET /api/v2/get/cameras feeds (Ontario, Alberta)."""
|
||||
resp = fetch_with_curl(
|
||||
api_url,
|
||||
timeout=30,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in data:
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
try:
|
||||
lat = float(cam.get("Latitude"))
|
||||
lon = float(cam.get("Longitude"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
site_id = cam.get("Id")
|
||||
location = str(cam.get("Location") or cam.get("Roadway") or "Camera")[:120]
|
||||
views = cam.get("Views") or []
|
||||
if not views:
|
||||
continue
|
||||
|
||||
for view in views:
|
||||
if not isinstance(view, dict):
|
||||
continue
|
||||
status = str(view.get("Status") or "enabled").strip().lower()
|
||||
if status and status not in {"enabled", "active"}:
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(view.get("Url") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
view_id = view.get("Id") or site_id
|
||||
if site_id is None or view_id is None:
|
||||
continue
|
||||
label = str(view.get("Description") or location or "Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}-{view_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
return cameras
|
||||
|
||||
|
||||
def _fetch_511_datatables_cameras(
|
||||
*,
|
||||
list_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
referer: str,
|
||||
page_size: int = 500,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse 511 DataTables POST /List/GetData/Cameras feeds (Georgia, Florida)."""
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
list_url,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": page_size},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": referer,
|
||||
"Origin": base_url.rstrip("/"),
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or source_agency
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(
|
||||
geography.get("wellKnownText") if isinstance(geography, dict) else ""
|
||||
)
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < page_size:
|
||||
break
|
||||
return cameras
|
||||
|
||||
|
||||
def init_db():
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
@@ -169,7 +342,7 @@ class BaseCCTVIngestor(ABC):
|
||||
cam.get("lat"),
|
||||
cam.get("lon"),
|
||||
cam.get("direction_facing", "Unknown"),
|
||||
cam.get("media_url"),
|
||||
_ensure_https_url(cam.get("media_url", "")),
|
||||
cam.get("media_type", _detect_media_type(cam.get("media_url", ""))),
|
||||
cam.get("refresh_rate_seconds", 60),
|
||||
),
|
||||
@@ -454,77 +627,14 @@ class WSDOTIngestor(BaseCCTVIngestor):
|
||||
class GeorgiaDOTIngestor(BaseCCTVIngestor):
|
||||
"""Georgia cameras via the public 511GA list feed."""
|
||||
|
||||
URL = "https://511ga.org/List/GetData/Cameras"
|
||||
BASE_URL = "https://511ga.org"
|
||||
PAGE_SIZE = 500
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
cameras = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
self.URL,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": self.PAGE_SIZE},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://511ga.org/cctv",
|
||||
"Origin": "https://511ga.org",
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Georgia CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
for row in rows:
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or "GA Camera"
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(geography.get("wellKnownText") if isinstance(geography, dict) else "")
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _normalize_cctv_media_url(
|
||||
urljoin(self.BASE_URL, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"GDOT-{site_id}",
|
||||
"source_agency": "Georgia DOT",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < self.PAGE_SIZE:
|
||||
break
|
||||
return cameras
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://511ga.org/List/GetData/Cameras",
|
||||
base_url="https://511ga.org",
|
||||
id_prefix="GDOT",
|
||||
source_agency="Georgia DOT",
|
||||
referer="https://511ga.org/cctv",
|
||||
)
|
||||
|
||||
|
||||
class IllinoisDOTIngestor(BaseCCTVIngestor):
|
||||
@@ -1009,17 +1119,72 @@ def _extract_img_src(html_fragment: str):
|
||||
return None
|
||||
|
||||
|
||||
class AsfinagIngestor(BaseCCTVIngestor):
|
||||
"""Austria ASFINAG motorway webcams (Osiris port)."""
|
||||
|
||||
API_URL = "https://odo.asfinag.at/odo/rest/sec/resource/001/json/webcams?language=atDE"
|
||||
HEADERS = {
|
||||
"User-Agent": "Shadowbroker-CCTV/1.0",
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://www.asfinag.at/",
|
||||
"Authorization": "Basic bWFwX3dpZGdldDp0ZWdkaXc=",
|
||||
}
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
response = fetch_with_curl(self.API_URL, timeout=15, headers=self.HEADERS)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
logger.error("AsfinagIngestor: fetch failed: %s", exc)
|
||||
return []
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in payload:
|
||||
cam_id = cam.get("wcs_id")
|
||||
lat = cam.get("wgs84_lat")
|
||||
lon = cam.get("wgs84_lon")
|
||||
image_url = cam.get("url_campic")
|
||||
if not cam_id or lat is None or lon is None or not image_url:
|
||||
continue
|
||||
if str(cam_id).startswith("Utinform"):
|
||||
continue
|
||||
label = cam.get("position_txt") or cam.get("direction_txt") or "ASFINAG Webcam"
|
||||
secure_url = _ensure_https_url(image_url)
|
||||
if not secure_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"ASFINAG-{cam_id}",
|
||||
"source_agency": "ASFINAG Austria",
|
||||
"lat": float(lat),
|
||||
"lon": float(lon),
|
||||
"direction_facing": label,
|
||||
"media_url": secure_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 300,
|
||||
}
|
||||
)
|
||||
logger.info("AsfinagIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class MadridCityIngestor(BaseCCTVIngestor):
|
||||
"""Madrid City Hall traffic cameras from datos.madrid.es KML feed."""
|
||||
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def _fetch_kml(self):
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
response = self._fetch_kml()
|
||||
except Exception as e:
|
||||
logger.error(f"MadridCityIngestor: failed to fetch KML: {e}")
|
||||
return []
|
||||
@@ -1055,6 +1220,9 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
if desc_el is not None and desc_el.text:
|
||||
image_url = _extract_img_src(desc_el.text)
|
||||
|
||||
if not image_url:
|
||||
continue
|
||||
image_url = _ensure_https_url(image_url)
|
||||
if not image_url:
|
||||
continue
|
||||
|
||||
@@ -1076,6 +1244,153 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
return cameras
|
||||
|
||||
|
||||
class Ontario511Ingestor(BaseCCTVIngestor):
|
||||
"""Ontario highway cameras via 511on.ca TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
|
||||
|
||||
class Alberta511Ingestor(BaseCCTVIngestor):
|
||||
"""Alberta highway cameras via 511 Alberta TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511.alberta.ca/api/v2/get/cameras",
|
||||
base_url="https://511.alberta.ca",
|
||||
id_prefix="AB511",
|
||||
source_agency="511 Alberta",
|
||||
)
|
||||
|
||||
|
||||
class Florida511Ingestor(BaseCCTVIngestor):
|
||||
"""Florida cameras via FL511 DataTables feed (~4,800 sites)."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://fl511.com/List/GetData/Cameras",
|
||||
base_url="https://fl511.com",
|
||||
id_prefix="FL511",
|
||||
source_agency="Florida 511",
|
||||
referer="https://fl511.com/",
|
||||
)
|
||||
|
||||
|
||||
class AustraliaLiveTrafficIngestor(BaseCCTVIngestor):
|
||||
"""NSW / Australia live traffic cameras via Transport for NSW JSON feed."""
|
||||
|
||||
URL = "https://www.livetraffic.com/datajson/all-feeds-web.json"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=35, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Australia Live Traffic CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict) or item.get("eventType") != "liveCams":
|
||||
continue
|
||||
geometry = item.get("geometry") if isinstance(item.get("geometry"), dict) else {}
|
||||
coords = geometry.get("coordinates") if isinstance(geometry.get("coordinates"), list) else []
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
try:
|
||||
lon = float(coords[0])
|
||||
lat = float(coords[1])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
props = item.get("properties") if isinstance(item.get("properties"), dict) else {}
|
||||
media_url = _ensure_https_url(str(props.get("href") or "").strip())
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
cam_id = str(item.get("path") or props.get("id") or len(cameras)).strip("/")
|
||||
label = str(props.get("title") or props.get("headline") or "Australia Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"AUS-{cam_id}",
|
||||
"source_agency": "NSW Live Traffic",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("AustraliaLiveTrafficIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class NetherlandsRWSIngestor(BaseCCTVIngestor):
|
||||
"""Netherlands Rijkswaterstaat cameras from legacy NDW open-data JSON.
|
||||
|
||||
The opendata.ndw.nu/cameras.json feed Osiris used is often offline; when
|
||||
unavailable this ingestor returns an empty set and logs a warning.
|
||||
"""
|
||||
|
||||
URL = "https://opendata.ndw.nu/cameras.json"
|
||||
MAX_CAMERAS = 1200
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=25, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Netherlands RWS cameras.json unavailable (HTTP %s) — "
|
||||
"NDW retired this open-data endpoint; no cameras ingested",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for i, cam in enumerate(data[: self.MAX_CAMERAS]):
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
lat = cam.get("lat") if cam.get("lat") is not None else cam.get("latitude")
|
||||
lon = cam.get("lng") if cam.get("lng") is not None else cam.get("longitude")
|
||||
media_url = _ensure_https_url(
|
||||
str(cam.get("imageUrl") or cam.get("feed_url") or cam.get("url") or "").strip()
|
||||
)
|
||||
if lat is None or lon is None or not media_url:
|
||||
continue
|
||||
try:
|
||||
lat_f, lon_f = float(lat), float(lon)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"NLRWS-{cam.get('id') or i}",
|
||||
"source_agency": "Rijkswaterstaat",
|
||||
"lat": lat_f,
|
||||
"lon": lon_f,
|
||||
"direction_facing": str(cam.get("name") or "Netherlands Camera")[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("NetherlandsRWSIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
def _detect_media_type(url: str) -> str:
|
||||
"""Detect the media type from a camera URL for proper frontend rendering."""
|
||||
if not url:
|
||||
@@ -1094,29 +1409,40 @@ def _detect_media_type(url: str) -> str:
|
||||
return "image"
|
||||
|
||||
|
||||
def scheduled_cctv_ingestors() -> List[tuple["BaseCCTVIngestor", str]]:
|
||||
"""Canonical list of CCTV ingestors for startup, scheduler, and DB seeding."""
|
||||
return [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
(AsfinagIngestor(), "cctv_asfinag"),
|
||||
(OSMALPRCameraIngestor(), "cctv_osm_alpr"),
|
||||
(Ontario511Ingestor(), "cctv_on511"),
|
||||
(Alberta511Ingestor(), "cctv_ab511"),
|
||||
(Florida511Ingestor(), "cctv_fl511"),
|
||||
(AustraliaLiveTrafficIngestor(), "cctv_australia"),
|
||||
(NetherlandsRWSIngestor(), "cctv_nl_rws"),
|
||||
]
|
||||
|
||||
|
||||
def run_all_ingestors():
|
||||
"""Run all CCTV ingestors synchronously. Used for first-run DB seeding."""
|
||||
ingestors = [
|
||||
TFLJamCamIngestor(),
|
||||
LTASingaporeIngestor(),
|
||||
AustinTXIngestor(),
|
||||
NYCDOTIngestor(),
|
||||
CaltransIngestor(),
|
||||
ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(),
|
||||
GeorgiaDOTIngestor(),
|
||||
IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(),
|
||||
WindyWebcamsIngestor(),
|
||||
OSMTrafficCameraIngestor(),
|
||||
DGTNationalIngestor(),
|
||||
MadridCityIngestor(),
|
||||
]
|
||||
for ing in ingestors:
|
||||
for ingestor, _name in scheduled_cctv_ingestors():
|
||||
try:
|
||||
ing.ingest()
|
||||
ingestor.ingest()
|
||||
except Exception as e:
|
||||
logger.warning(f"Ingestor {ing.__class__.__name__} failed during seed: {e}")
|
||||
logger.warning(f"Ingestor {ingestor.__class__.__name__} failed during seed: {e}")
|
||||
|
||||
|
||||
def get_all_cameras() -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -30,8 +30,13 @@ class Settings(BaseSettings):
|
||||
MESH_MQTT_INCLUDE_DEFAULT_ROOTS: bool = True
|
||||
MESH_RNS_ENABLED: bool = False
|
||||
MESH_ARTI_ENABLED: bool = False
|
||||
# When true, trust wormhole_status.json ready bit if the child process is
|
||||
# alive — avoids transport-tier flapping when /api/health probes time out
|
||||
# under Tor load (common during live DM E2E).
|
||||
MESH_WORMHOLE_TRUST_FILE_READY: bool = False
|
||||
MESH_ARTI_SOCKS_PORT: int = 9050
|
||||
MESH_RELAY_PEERS: str = ""
|
||||
MESH_PUBLIC_PEER_URL: str = ""
|
||||
# Bootstrap seeds are discovery hints, not authoritative network roots.
|
||||
# Nodes promote healthy discovered peers from the store/manifest over time.
|
||||
MESH_BOOTSTRAP_SEED_PEERS: str = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
@@ -42,7 +47,24 @@ class Settings(BaseSettings):
|
||||
MESH_INFONET_ALLOW_CLEARNET_SYNC: bool = False
|
||||
MESH_BOOTSTRAP_DISABLED: bool = False
|
||||
MESH_BOOTSTRAP_MANIFEST_PATH: str = "data/bootstrap_peers.json"
|
||||
MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY: str = ""
|
||||
# Public sb-testnet-0 fleet signer (participants). Seed operator holds the private key.
|
||||
MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY: str = (
|
||||
"ul1d0kj/ODPIp0OhHzX8eLAVXzJ3CVvzW1vn2IC6q3I="
|
||||
)
|
||||
MESH_BOOTSTRAP_SIGNER_PRIVATE_KEY: str = ""
|
||||
# When true, empty MESH_PEER_PUSH_SECRET uses the public fleet HMAC for seed join/announce.
|
||||
MESH_INFONET_FLEET_JOIN: bool = True
|
||||
MESH_INFONET_FLEET_JOIN_DISABLED: bool = False
|
||||
# Headless relay/seed compose: auto-enable Tor wormhole on startup so
|
||||
# docker compose redeploys keep the fleet onion reachable.
|
||||
MESH_INFONET_RELAY_AUTO_WORMHOLE: bool = False
|
||||
MESH_INFONET_RELAY_AUTO_WORMHOLE_DISABLED: bool = False
|
||||
MESH_BOOTSTRAP_SIGNER_ID: str = ""
|
||||
MESH_PEER_REGISTRY_ENABLED: bool = False
|
||||
MESH_PEER_REGISTRY_DISABLED: bool = False
|
||||
MESH_PEER_REGISTRY_STALE_S: int = 604800
|
||||
MESH_SWARM_MANIFEST_TTL_S: int = 14400
|
||||
MESH_SWARM_MANIFEST_PULL_INTERVAL_S: int = 300
|
||||
MESH_NODE_MODE: str = "participant"
|
||||
MESH_SYNC_INTERVAL_S: int = 300
|
||||
MESH_SYNC_FAILURE_BACKOFF_S: int = 60
|
||||
@@ -53,6 +75,12 @@ class Settings(BaseSettings):
|
||||
MESH_RELAY_FAILURE_COOLDOWN_S: int = 120
|
||||
MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S: int = 15
|
||||
MESH_PEER_PUSH_SECRET: str = ""
|
||||
# Issue #256 (tg12): optional per-peer HMAC secret map. Comma-separated
|
||||
# `url=secret` pairs. When a peer URL appears here, only that per-peer
|
||||
# secret is accepted for it — the global MESH_PEER_PUSH_SECRET above is
|
||||
# ignored for that specific URL. Single-peer installs and unmigrated
|
||||
# multi-peer installs leave this empty and behavior is unchanged.
|
||||
MESH_PEER_SECRETS: str = ""
|
||||
MESH_RNS_APP_NAME: str = "shadowbroker"
|
||||
MESH_RNS_ASPECT: str = "infonet"
|
||||
MESH_RNS_IDENTITY_PATH: str = ""
|
||||
@@ -110,6 +138,21 @@ class Settings(BaseSettings):
|
||||
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
||||
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
||||
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
||||
# Anti-spam: cap on distinct UNACKED messages a single sender can have
|
||||
# parked in a single recipient's mailbox at any one time. Once the
|
||||
# recipient pulls (acks) a message, the sender's quota for that pair
|
||||
# frees up. Default 2 — a sender who wants to deliver more must wait
|
||||
# for the recipient to actually read the prior messages.
|
||||
#
|
||||
# This cap is enforced TWICE: once on the local deposit path (the
|
||||
# sender's own node refuses to spool the 3rd message) AND once on
|
||||
# the replication-acceptance path (honest peer relays refuse to
|
||||
# accept inbound replicas that would put them over the cap). The
|
||||
# double enforcement makes the rule a NETWORK rule — patching out
|
||||
# the local check on a hostile sender's relay doesn't let extras
|
||||
# propagate, because every honest peer enforces the same cap on
|
||||
# inbound replication.
|
||||
MESH_DM_PENDING_PER_SENDER_LIMIT: int = 2
|
||||
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
||||
@@ -289,6 +332,19 @@ class Settings(BaseSettings):
|
||||
# service operator can identify per-install traffic instead of a generic
|
||||
# "ShadowBroker" aggregate.
|
||||
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
||||
# Per-install operator handle used in the User-Agent for EVERY third-party
|
||||
# API the backend calls (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz,
|
||||
# Broadcastify, weather.gov, NUFORC, etc.). The default is empty, in which
|
||||
# case backend/services/network_utils.py auto-generates a stable
|
||||
# pseudonymous handle like "operator-7f3a92" on first use and caches it.
|
||||
# Operators who want to identify themselves with a real handle can set
|
||||
# this; operators who want to stay pseudonymous can leave it empty.
|
||||
#
|
||||
# The handle is sent ONLY to public third-party APIs. It is NEVER mixed
|
||||
# into mesh / Wormhole / Infonet identity (those have their own crypto
|
||||
# identity layer; conflating the two would leak public attribution into
|
||||
# private mesh state).
|
||||
OPERATOR_HANDLE: str = ""
|
||||
|
||||
# SAR (Synthetic Aperture Radar) data layer
|
||||
# Mode A — free catalog metadata, no account, default-on
|
||||
|
||||
@@ -11,8 +11,13 @@ DEFAULT_TRAIL_TTL_S = 300 # 5 min - trail TTL for non-tracked flights
|
||||
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
|
||||
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
|
||||
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
|
||||
GPS_JAMMING_MIN_RATIO = 0.30 # 30% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 5 # Min aircraft in grid cell for statistical significance
|
||||
# Tuned 2026-05: previously 0.30 / 5 aircraft which — combined with the
|
||||
# -1 noise cushion in the detector AND the pre-fix nac_p==0 filter that
|
||||
# discarded jamming victims — meant the layer almost never lit up.
|
||||
# Lowering the bar so genuine jamming zones with sparser ADS-B coverage
|
||||
# clear (eastern Med, Russia/Ukraine border, Iran/Iraq).
|
||||
GPS_JAMMING_MIN_RATIO = 0.20 # 20% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 3 # Min aircraft in grid cell for statistical significance
|
||||
|
||||
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
|
||||
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
|
||||
|
||||
@@ -19,6 +19,7 @@ import concurrent.futures
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
@@ -75,6 +76,7 @@ from services.fetchers.infrastructure import ( # noqa: F401
|
||||
fetch_tinygs,
|
||||
fetch_psk_reporter,
|
||||
)
|
||||
from services.fetchers.road_corridor_sat import fetch_road_corridor_trends # noqa: F401
|
||||
from services.fetchers.geo import ( # noqa: F401
|
||||
fetch_ships,
|
||||
fetch_airports,
|
||||
@@ -99,6 +101,10 @@ from services.fetchers.crowdthreat import fetch_crowdthreat # noqa: F401
|
||||
from services.fetchers.wastewater import fetch_wastewater # noqa: F401
|
||||
from services.fetchers.sar_catalog import fetch_sar_catalog # noqa: F401
|
||||
from services.fetchers.sar_products import fetch_sar_products # noqa: F401
|
||||
from services.fetchers.malware import fetch_malware_threats # noqa: F401
|
||||
from services.fetchers.telegram_osint import fetch_telegram_osint # noqa: F401
|
||||
from services.fetchers.cyber_status import fetch_cyber_threats # noqa: F401
|
||||
from services.scm.suppliers import fetch_scm_suppliers # noqa: F401
|
||||
from services.ais_stream import prune_stale_vessels # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -144,13 +150,18 @@ _STARTUP_HEAVY_REFRESH_DELAY_S = float(os.environ.get("SHADOWBROKER_STARTUP_HEAV
|
||||
_STARTUP_HEAVY_REFRESH_STARTED = False
|
||||
_STARTUP_HEAVY_REFRESH_LOCK = threading.Lock()
|
||||
_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_FETCH_WORKERS", "8"))
|
||||
_HEAVY_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_HEAVY_FETCH_WORKERS", "2"))
|
||||
_SLOW_FETCH_CONCURRENCY = int(os.environ.get("SHADOWBROKER_SLOW_FETCH_CONCURRENCY", "4"))
|
||||
_STARTUP_HEAVY_CONCURRENCY = int(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY", "2"))
|
||||
|
||||
# Shared thread pool — reused across all fetch cycles instead of creating/destroying per tick
|
||||
# Fast-tier pool (flights, ships, sigint, …). Slow / heavy work uses a separate pool
|
||||
# so Playwright, GDELT, CCTV ingest, etc. cannot starve the 60s refresh path (#375).
|
||||
_SHARED_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(2, _FETCH_WORKERS), thread_name_prefix="fetch"
|
||||
)
|
||||
_SLOW_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(1, _HEAVY_FETCH_WORKERS), thread_name_prefix="fetch-slow"
|
||||
)
|
||||
|
||||
|
||||
def _cache_json_safe(value):
|
||||
@@ -319,10 +330,49 @@ def seed_startup_caches() -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler & Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
def _executor_for_task_label(label: str) -> concurrent.futures.ThreadPoolExecutor:
|
||||
if label.startswith(("slow-tier", "startup-heavy")):
|
||||
return _SLOW_EXECUTOR
|
||||
return _SHARED_EXECUTOR
|
||||
|
||||
|
||||
def _run_task_with_health_on_executor(
|
||||
executor: concurrent.futures.ThreadPoolExecutor,
|
||||
func,
|
||||
name: str | None = None,
|
||||
) -> None:
|
||||
"""Run a scheduled job on the given pool so it cannot starve fast-tier workers."""
|
||||
task_name = name or getattr(func, "__name__", "task")
|
||||
future = executor.submit(func)
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
future.result(timeout=_TASK_HARD_TIMEOUT_S)
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_success
|
||||
|
||||
record_success(task_name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning("task slow: %s took %.2fs", task_name, duration)
|
||||
except concurrent.futures.TimeoutError:
|
||||
future.cancel()
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(task_name, error=TimeoutError(f"{task_name} timed out"), duration_s=duration)
|
||||
logger.error("task timed out: %s (%.2fs)", task_name, duration)
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(task_name, error=e, duration_s=duration)
|
||||
logger.exception("task failed: %s", task_name)
|
||||
|
||||
|
||||
def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
"""Run tasks concurrently and log any exceptions (do not fail silently)."""
|
||||
if not funcs:
|
||||
return
|
||||
executor = _executor_for_task_label(label)
|
||||
if max_concurrency is None:
|
||||
if label.startswith("slow-tier"):
|
||||
max_concurrency = _SLOW_FETCH_CONCURRENCY
|
||||
@@ -330,12 +380,13 @@ def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
max_concurrency = _STARTUP_HEAVY_CONCURRENCY
|
||||
else:
|
||||
max_concurrency = len(funcs)
|
||||
max_concurrency = max(1, min(max_concurrency, len(funcs)))
|
||||
pool_workers = getattr(executor, "_max_workers", len(funcs))
|
||||
max_concurrency = max(1, min(max_concurrency, len(funcs), pool_workers))
|
||||
|
||||
remaining_funcs = list(funcs)
|
||||
while remaining_funcs:
|
||||
batch, remaining_funcs = remaining_funcs[:max_concurrency], remaining_funcs[max_concurrency:]
|
||||
futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
futures = {executor.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
_drain_task_futures(label, futures)
|
||||
|
||||
|
||||
@@ -352,6 +403,13 @@ def _drain_task_futures(label: str, futures: dict):
|
||||
record_success(name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning(f"{label} task slow: {name} took {duration:.2f}s")
|
||||
except concurrent.futures.TimeoutError:
|
||||
future.cancel()
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(name, error=TimeoutError(f"{name} timed out"), duration_s=duration)
|
||||
logger.error("%s task timed out: %s (%.2fs)", label, name, duration)
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
@@ -405,7 +463,6 @@ def update_slow_data():
|
||||
logger.info("Slow-tier data update starting...")
|
||||
slow_funcs = [
|
||||
fetch_news,
|
||||
fetch_prediction_markets,
|
||||
fetch_earthquakes,
|
||||
fetch_firms_fires,
|
||||
fetch_firms_country_fires,
|
||||
@@ -427,6 +484,9 @@ def update_slow_data():
|
||||
fetch_fishing_activity,
|
||||
fetch_power_plants,
|
||||
fetch_ukraine_air_raid_alerts,
|
||||
fetch_malware_threats,
|
||||
fetch_cyber_threats,
|
||||
fetch_scm_suppliers,
|
||||
]
|
||||
_run_tasks("slow-tier", slow_funcs)
|
||||
# Run correlation engine after all data is fresh
|
||||
@@ -439,6 +499,12 @@ def update_slow_data():
|
||||
latest_data["correlations"] = correlations
|
||||
except Exception as e:
|
||||
logger.error("Correlation engine failed: %s", e)
|
||||
try:
|
||||
from analytics.integration import maybe_refresh_gt_analytics
|
||||
|
||||
maybe_refresh_gt_analytics()
|
||||
except Exception as e:
|
||||
logger.error("GT analytics refresh failed: %s", e)
|
||||
from services.fetchers._store import bump_data_version
|
||||
bump_data_version()
|
||||
_save_intel_startup_cache()
|
||||
@@ -470,6 +536,15 @@ def _load_cctv_cache_for_startup() -> None:
|
||||
logger.warning("Startup CCTV cache load failed (non-fatal): %s", e)
|
||||
|
||||
|
||||
def _load_static_infrastructure_for_startup() -> None:
|
||||
"""Disk-backed reference layers — instant, no network."""
|
||||
for func in (fetch_datacenters, fetch_military_bases, fetch_power_plants):
|
||||
try:
|
||||
func()
|
||||
except Exception as e:
|
||||
logger.warning("Startup static infrastructure load failed for %s: %s", func.__name__, e)
|
||||
|
||||
|
||||
def _run_delayed_startup_heavy_refresh() -> None:
|
||||
if _STARTUP_HEAVY_REFRESH_DELAY_S > 0:
|
||||
logger.info(
|
||||
@@ -482,6 +557,7 @@ def _run_delayed_startup_heavy_refresh() -> None:
|
||||
"startup-heavy",
|
||||
[
|
||||
update_slow_data,
|
||||
fetch_telegram_osint,
|
||||
fetch_volcanoes,
|
||||
fetch_viirs_change_nodes,
|
||||
fetch_unusual_whales,
|
||||
@@ -520,6 +596,7 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
logger.info("Full data update starting (parallel)...")
|
||||
# Preload Meshtastic map cache immediately (instant, from disk)
|
||||
seed_startup_caches()
|
||||
_load_static_infrastructure_for_startup()
|
||||
with _data_lock:
|
||||
meshtastic_seeded = bool(latest_data.get("meshtastic_map_nodes"))
|
||||
if startup_mode:
|
||||
@@ -596,22 +673,9 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
# (the scheduled job also runs every 10 min for ongoing refresh).
|
||||
if startup_mode:
|
||||
try:
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor, LTASingaporeIngestor, AustinTXIngestor,
|
||||
NYCDOTIngestor, CaltransIngestor, ColoradoDOTIngestor,
|
||||
WSDOTIngestor, GeorgiaDOTIngestor, IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor, WindyWebcamsIngestor, DGTNationalIngestor,
|
||||
MadridCityIngestor, OSMTrafficCameraIngestor, get_all_cameras,
|
||||
)
|
||||
from services.cctv_pipeline import OSMALPRCameraIngestor
|
||||
_startup_ingestors = [
|
||||
TFLJamCamIngestor(), LTASingaporeIngestor(), AustinTXIngestor(),
|
||||
NYCDOTIngestor(), CaltransIngestor(), ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(), GeorgiaDOTIngestor(), IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(), WindyWebcamsIngestor(), DGTNationalIngestor(),
|
||||
MadridCityIngestor(), OSMTrafficCameraIngestor(),
|
||||
OSMALPRCameraIngestor(),
|
||||
]
|
||||
from services.cctv_pipeline import get_all_cameras, scheduled_cctv_ingestors
|
||||
|
||||
_startup_ingestors = [ing for ing, _name in scheduled_cctv_ingestors()]
|
||||
logger.info("Running CCTV ingest at startup (%d ingestors)...", len(_startup_ingestors))
|
||||
ingest_futures = {
|
||||
_SHARED_EXECUTOR.submit(ing.ingest): ing.__class__.__name__
|
||||
@@ -747,6 +811,49 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Telegram OSINT — hourly t.me/s channel scrape (kept off the 5-minute slow tier).
|
||||
_telegram_interval_m = max(15, int(os.environ.get("TELEGRAM_OSINT_INTERVAL_MINUTES", "60")))
|
||||
|
||||
def _fetch_telegram_osint_with_gt():
|
||||
fetch_telegram_osint()
|
||||
try:
|
||||
from analytics.integration import maybe_refresh_gt_analytics
|
||||
|
||||
maybe_refresh_gt_analytics()
|
||||
except Exception as exc:
|
||||
logger.error("GT analytics refresh after telegram failed: %s", exc)
|
||||
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(_fetch_telegram_osint_with_gt, "fetch_telegram_osint"),
|
||||
"interval",
|
||||
minutes=_telegram_interval_m,
|
||||
next_run_time=datetime.utcnow() + timedelta(seconds=45),
|
||||
id="telegram_osint",
|
||||
max_instances=1,
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# Prediction markets — own jittered cadence (Polymarket/Kalshi clearnet egress).
|
||||
# Kept off the fixed 5-minute slow tier so poll timing is less fingerprintable.
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
|
||||
_pm_interval_m = max(5, int(os.environ.get("PREDICTION_MARKETS_INTERVAL_MINUTES", "7")))
|
||||
_pm_jitter_s = max(0, int(os.environ.get("PREDICTION_MARKETS_SCHEDULER_JITTER_S", "240")))
|
||||
_pm_initial_max_s = max(0, int(os.environ.get("PREDICTION_MARKETS_INITIAL_DELAY_MAX_S", "180")))
|
||||
_pm_first_run = datetime.utcnow() + timedelta(
|
||||
seconds=random.randint(30, max(30, _pm_initial_max_s))
|
||||
)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_prediction_markets, "fetch_prediction_markets"),
|
||||
"interval",
|
||||
minutes=_pm_interval_m,
|
||||
jitter=_pm_jitter_s,
|
||||
next_run_time=_pm_first_run,
|
||||
id="prediction_markets",
|
||||
max_instances=1,
|
||||
misfire_grace_time=300,
|
||||
)
|
||||
|
||||
# Weather alerts — every 5 minutes (time-critical, separate from slow tier)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_weather_alerts, "fetch_weather_alerts"),
|
||||
@@ -777,6 +884,39 @@ def start_scheduler():
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# Flight observation pruning — drops icao24 → first_seen_at entries we
|
||||
# haven't seen in an hour. Same cadence as AIS prune for symmetry; the
|
||||
# per-tick scan is O(in-flight aircraft) so it's cheap.
|
||||
from services.fetchers.flight_observations import prune as _prune_flight_observations
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(_prune_flight_observations, "prune_flight_observations"),
|
||||
"interval",
|
||||
minutes=5,
|
||||
id="flight_observation_prune",
|
||||
max_instances=1,
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# AISHub REST fallback — slow polling when the AISStream WebSocket
|
||||
# primary is offline. Configurable interval via
|
||||
# AISHUB_POLL_INTERVAL_MINUTES env (default 20 min). Operator must
|
||||
# set AISHUB_USERNAME to opt in. The fetcher is gated internally on
|
||||
# the primary being disconnected, so this job is cheap when the
|
||||
# WebSocket is healthy (early-returns after a status check).
|
||||
from services.fetchers.aishub_fallback import (
|
||||
aishub_poll_interval_minutes,
|
||||
fetch_aishub_vessels,
|
||||
)
|
||||
_aishub_interval = aishub_poll_interval_minutes()
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_aishub_vessels, "fetch_aishub_vessels"),
|
||||
"interval",
|
||||
minutes=_aishub_interval,
|
||||
id="aishub_fallback",
|
||||
max_instances=1,
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
|
||||
# days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
|
||||
# and broken upstream). Airline schedules change on a quarterly cycle,
|
||||
@@ -810,16 +950,71 @@ def start_scheduler():
|
||||
)
|
||||
|
||||
# GDELT — every 30 minutes (downloads 32 ZIP files per call, avoid rate limits)
|
||||
def _fetch_gdelt_with_gt():
|
||||
fetch_gdelt()
|
||||
try:
|
||||
from analytics.integration import maybe_refresh_gt_analytics
|
||||
|
||||
maybe_refresh_gt_analytics()
|
||||
except Exception as exc:
|
||||
logger.error("GT analytics refresh after gdelt failed: %s", exc)
|
||||
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_gdelt, "fetch_gdelt"),
|
||||
lambda: _run_task_with_health_on_executor(_SLOW_EXECUTOR, _fetch_gdelt_with_gt, "fetch_gdelt"),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="gdelt",
|
||||
max_instances=1,
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# GT analytics — Louvain herding/coordination clusters (feature-flagged).
|
||||
def _recompute_gt_clusters():
|
||||
try:
|
||||
from analytics.integration import recompute_gt_herding_clusters
|
||||
|
||||
recompute_gt_herding_clusters()
|
||||
except Exception as exc:
|
||||
logger.error("GT Louvain recompute failed: %s", exc)
|
||||
|
||||
def _freeze_gt_weekly_snapshot():
|
||||
try:
|
||||
from analytics.integration import maybe_freeze_gt_weekly_snapshot
|
||||
|
||||
maybe_freeze_gt_weekly_snapshot()
|
||||
except Exception as exc:
|
||||
logger.error("GT rolling weekly freeze failed: %s", exc)
|
||||
|
||||
try:
|
||||
from analytics.settings import get_gt_settings, gt_engine_operational
|
||||
|
||||
_gt_settings = get_gt_settings()
|
||||
if gt_engine_operational():
|
||||
_scheduler.add_job(
|
||||
_recompute_gt_clusters,
|
||||
"interval",
|
||||
minutes=_gt_settings.louvain_interval_minutes,
|
||||
id="gt_analytics_louvain",
|
||||
max_instances=1,
|
||||
misfire_grace_time=300,
|
||||
next_run_time=datetime.utcnow() + timedelta(minutes=3),
|
||||
)
|
||||
_scheduler.add_job(
|
||||
_freeze_gt_weekly_snapshot,
|
||||
"cron",
|
||||
day_of_week="mon",
|
||||
hour=0,
|
||||
minute=5,
|
||||
id="gt_rolling_weekly_freeze",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("GT Louvain scheduler not registered: %s", exc)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(update_liveuamap, "update_liveuamap"),
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, update_liveuamap, "update_liveuamap"
|
||||
),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="liveuamap",
|
||||
@@ -829,39 +1024,9 @@ def start_scheduler():
|
||||
|
||||
# CCTV pipeline refresh — runs all ingestors, then refreshes in-memory data.
|
||||
# Delay the first run slightly so startup serves cached/DB-backed data first.
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor,
|
||||
LTASingaporeIngestor,
|
||||
AustinTXIngestor,
|
||||
NYCDOTIngestor,
|
||||
CaltransIngestor,
|
||||
ColoradoDOTIngestor,
|
||||
WSDOTIngestor,
|
||||
GeorgiaDOTIngestor,
|
||||
IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor,
|
||||
WindyWebcamsIngestor,
|
||||
DGTNationalIngestor,
|
||||
MadridCityIngestor,
|
||||
OSMTrafficCameraIngestor,
|
||||
)
|
||||
from services.cctv_pipeline import scheduled_cctv_ingestors
|
||||
|
||||
_cctv_ingestors = [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
]
|
||||
_cctv_ingestors = scheduled_cctv_ingestors()
|
||||
|
||||
def _run_cctv_ingest_cycle():
|
||||
from services.fetchers._store import is_any_active
|
||||
@@ -880,7 +1045,9 @@ def start_scheduler():
|
||||
logger.warning(f"CCTV post-ingest refresh failed: {e}")
|
||||
|
||||
_scheduler.add_job(
|
||||
_run_cctv_ingest_cycle,
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, _run_cctv_ingest_cycle, "cctv_ingest_cycle"
|
||||
),
|
||||
"interval",
|
||||
minutes=10,
|
||||
id="cctv_ingest",
|
||||
@@ -950,6 +1117,16 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# Sentinel-2 road corridor freight trends — daily (opt-in, heavy CDSE usage)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_road_corridor_trends, "fetch_road_corridor_trends"),
|
||||
"interval",
|
||||
hours=24,
|
||||
id="road_corridor_trends",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
# FIMI disinformation index — every 12 hours (weekly editorial feed)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_fimi, "fetch_fimi"),
|
||||
@@ -960,16 +1137,19 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# UAP sightings (NUFORC) — daily at 12:00 UTC
|
||||
# UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window;
|
||||
# each self-hosted install pulls live nuforc.org so operators see current
|
||||
# reports (typically ~400–500 mappable pins). Disk cache TTL defaults to 7d.
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(
|
||||
lambda: fetch_uap_sightings(force_refresh=True),
|
||||
"fetch_uap_sightings",
|
||||
),
|
||||
"cron",
|
||||
day_of_week="mon",
|
||||
hour=12,
|
||||
minute=0,
|
||||
id="uap_sightings_daily",
|
||||
id="uap_sightings_weekly",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
@@ -1094,7 +1274,10 @@ def start_scheduler():
|
||||
def stop_scheduler():
|
||||
if _scheduler:
|
||||
_scheduler.shutdown(wait=False)
|
||||
_SLOW_EXECUTOR.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
|
||||
def get_latest_data():
|
||||
return get_latest_data_subset(*latest_data.keys())
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
return get_latest_data_deepcopy_snapshot()
|
||||
|
||||
@@ -46,6 +46,7 @@ _CRITICAL_WARN = {
|
||||
|
||||
_OPTIONAL = {
|
||||
"AIS_API_KEY": "AIS vessel streaming (ships layer will be empty without it)",
|
||||
"GFW_API_TOKEN": "Global Fishing Watch fishing-vessel activity (fishing_activity layer)",
|
||||
"LTA_ACCOUNT_KEY": "Singapore LTA traffic cameras (CCTV layer)",
|
||||
"PUBLIC_API_KEY": "Optional client auth for public endpoints (recommended for exposed deployments)",
|
||||
}
|
||||
|
||||
@@ -16,8 +16,15 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _feed_ingester_user_agent() -> str:
|
||||
# Round 7a: per-install attribution for operator-curated feed URLs.
|
||||
return outbound_user_agent("feed-ingester")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -157,7 +164,7 @@ def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
||||
resp = requests.get(
|
||||
feed_url,
|
||||
timeout=_FETCH_TIMEOUT,
|
||||
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
||||
headers={"User-Agent": _feed_ingester_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -69,6 +69,12 @@ class DashboardData(TypedDict, total=False):
|
||||
sar_scenes: List[Dict[str, Any]]
|
||||
sar_anomalies: List[Dict[str, Any]]
|
||||
sar_aoi_coverage: List[Dict[str, Any]]
|
||||
road_corridor_trends: Dict[str, Any]
|
||||
malware_threats: Dict[str, Any]
|
||||
cyber_threats: Dict[str, Any]
|
||||
scm_suppliers: Dict[str, Any]
|
||||
telegram_osint: Dict[str, Any]
|
||||
gt_risk: Dict[str, Any]
|
||||
|
||||
|
||||
# In-memory store
|
||||
@@ -119,6 +125,18 @@ latest_data: DashboardData = {
|
||||
"sar_scenes": [],
|
||||
"sar_anomalies": [],
|
||||
"sar_aoi_coverage": [],
|
||||
"road_corridor_trends": {"updated_at": None, "corridors": []},
|
||||
"malware_threats": {"threats": [], "total": 0, "timestamp": None},
|
||||
"cyber_threats": {"threats": [], "stats": {}},
|
||||
"scm_suppliers": {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": {"posts": [], "total": 0, "geolocated": 0, "timestamp": None},
|
||||
"gt_risk": {
|
||||
"enabled": False,
|
||||
"heatmap": {"type": "FeatureCollection", "features": []},
|
||||
"clusters": [],
|
||||
"processed": 0,
|
||||
"timestamp": None,
|
||||
},
|
||||
}
|
||||
|
||||
# Per-source freshness timestamps
|
||||
@@ -230,27 +248,52 @@ _active_layers_version: int = 0
|
||||
def bump_active_layers_version() -> None:
|
||||
"""Increment the active-layer version when frontend toggles change response shape."""
|
||||
global _active_layers_version
|
||||
_active_layers_version += 1
|
||||
with _data_lock:
|
||||
_active_layers_version += 1
|
||||
|
||||
|
||||
def get_active_layers_version() -> int:
|
||||
"""Return the current active-layer version (for ETag generation)."""
|
||||
return _active_layers_version
|
||||
with _data_lock:
|
||||
return _active_layers_version
|
||||
|
||||
|
||||
def get_latest_data_subset(*keys: str) -> DashboardData:
|
||||
"""Return a deep snapshot of only the requested top-level keys.
|
||||
|
||||
This avoids cloning the entire dashboard store for endpoints that only need
|
||||
a small tier-specific subset. Deep copy ensures callers cannot mutate
|
||||
nested structures (e.g. individual flight dicts) and affect the live store.
|
||||
Grabs references under the lock, then deep-copies outside it so fetcher
|
||||
writers are not blocked for the duration of a large clone (#375).
|
||||
"""
|
||||
with _data_lock:
|
||||
snap: DashboardData = {}
|
||||
for key in keys:
|
||||
value = latest_data.get(key)
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
items = [(key, latest_data.get(key)) for key in keys]
|
||||
snap: DashboardData = {}
|
||||
for key, value in items:
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
|
||||
|
||||
def get_latest_data_deepcopy_snapshot() -> DashboardData:
|
||||
"""Deep-copy the full dashboard for /api/health and legacy /api/live-data.
|
||||
|
||||
The per-value deepcopy runs OUTSIDE ``_data_lock`` so a large clone cannot
|
||||
block fetcher writers (#375). The store contract is replace-don't-mutate,
|
||||
but a writer that mutates a nested object in place (e.g. a live bridge
|
||||
updating an entry that is also published in this store) can race the
|
||||
deepcopy and raise ``RuntimeError: dictionary changed size during
|
||||
iteration`` — surfacing a 500 on the health/live-data path. The racing
|
||||
mutation window is tiny, so retry a few times rather than fail; a fresh
|
||||
attempt almost always lands on a quiescent moment. Defense-in-depth on top
|
||||
of fixing the offending writers, not a substitute for it.
|
||||
"""
|
||||
attempts = 4
|
||||
for attempt in range(attempts):
|
||||
with _data_lock:
|
||||
items = list(latest_data.items())
|
||||
try:
|
||||
return {key: copy.deepcopy(value) for key, value in items}
|
||||
except RuntimeError:
|
||||
if attempt == attempts - 1:
|
||||
raise
|
||||
|
||||
|
||||
def get_latest_data_subset_refs(*keys: str) -> DashboardData:
|
||||
@@ -320,6 +363,13 @@ active_layers: dict[str, bool] = {
|
||||
"ai_intel": True,
|
||||
"crowdthreat": False,
|
||||
"sar": True,
|
||||
"road_corridor_trends": False,
|
||||
"malware_c2": False,
|
||||
"submarine_cables": False,
|
||||
"scm_suppliers": False,
|
||||
"cyber_threats": False,
|
||||
"telegram_osint": True,
|
||||
"gt_risk": False,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -21,6 +21,13 @@ from typing import Any
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _aircraft_db_user_agent() -> str:
|
||||
"""Round 7a: lazy import so the per-install operator handle is included."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("aircraft-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUCKET_LIST_URL = (
|
||||
@@ -31,8 +38,6 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_LIST_TIMEOUT_S = 30
|
||||
_DOWNLOAD_TIMEOUT_S = 600
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_aircraft_by_hex: dict[str, dict[str, str]] = {}
|
||||
_last_refresh = 0.0
|
||||
@@ -44,7 +49,7 @@ def _latest_snapshot_key() -> str:
|
||||
response = requests.get(
|
||||
_BUCKET_LIST_URL,
|
||||
timeout=_LIST_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.text)
|
||||
@@ -71,7 +76,7 @@ def _stream_csv_index(url: str) -> dict[str, dict[str, str]]:
|
||||
url,
|
||||
timeout=_DOWNLOAD_TIMEOUT_S,
|
||||
stream=True,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
line_iter = (
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
"""AISHub REST fallback for ship tracking when AISStream is unreachable.
|
||||
|
||||
Background
|
||||
----------
|
||||
On 2026-05-23 ``stream.aisstream.io`` (the primary live AIS WebSocket feed)
|
||||
went fully offline. Backend's only ship signal vanished. This module polls
|
||||
``data.aishub.net``'s free REST API on a slow cadence (default 20 min) when
|
||||
the WebSocket primary is disconnected, so the ships layer doesn't go fully
|
||||
dark during upstream outages.
|
||||
|
||||
Why 20 minutes
|
||||
--------------
|
||||
AISHub's free tier is rate-limited and explicitly asks consumers to be
|
||||
courteous. 20 minutes is well inside their limits, gives ships time to
|
||||
move enough to look "alive" on the map, and won't drain their service.
|
||||
Configurable via the ``AISHUB_POLL_INTERVAL_MINUTES`` env var (clamped to
|
||||
[1, 360]).
|
||||
|
||||
Why slow vs primary
|
||||
-------------------
|
||||
This is degraded mode, not a replacement. A ship at 20 knots moves about
|
||||
6 nautical miles in 20 minutes — visible on the map but coarser than the
|
||||
real-time WebSocket signal. When AISStream comes back online, the
|
||||
WebSocket data will overwrite these records via the same ``_vessels``
|
||||
dict and ``source`` will flip from ``"aishub"`` back to upstream-live.
|
||||
|
||||
Opt-in
|
||||
------
|
||||
Operator must set ``AISHUB_USERNAME`` (free registration at
|
||||
https://www.aishub.net/api). If unset, this fetcher is a no-op.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
AISHUB_URL = "https://data.aishub.net/ws.php"
|
||||
|
||||
|
||||
def aishub_username() -> str:
|
||||
return str(os.environ.get("AISHUB_USERNAME", "")).strip()
|
||||
|
||||
|
||||
def aishub_fallback_enabled() -> bool:
|
||||
"""Returns True only when the operator has registered with AISHub and
|
||||
set ``AISHUB_USERNAME``. The presence of the username is the opt-in."""
|
||||
return bool(aishub_username())
|
||||
|
||||
|
||||
def aishub_poll_interval_minutes() -> int:
|
||||
"""Default 20 minutes. Clamped to [1, 360] so a hostile or
|
||||
misconfigured env var can't either hammer the upstream or silence the
|
||||
fallback for a day."""
|
||||
raw = os.environ.get("AISHUB_POLL_INTERVAL_MINUTES", "20")
|
||||
try:
|
||||
value = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
value = 20
|
||||
return max(1, min(360, value))
|
||||
|
||||
|
||||
def _should_run_fallback() -> bool:
|
||||
"""Only run when the primary WebSocket is disconnected. Avoids stomping
|
||||
over fresher live data when AISStream is healthy.
|
||||
|
||||
Returns False if:
|
||||
* AISHub isn't configured (no username)
|
||||
* AISStream primary is currently connected (recent vessel messages)
|
||||
|
||||
Returns True only when AIS is configured-but-down. The
|
||||
``proxy_spawn_count > 0`` guard means "the primary has at least tried
|
||||
to run" — if the user set AISHUB_USERNAME but not AIS_API_KEY at all,
|
||||
AISHub will still serve as a primary on its own slow cadence.
|
||||
"""
|
||||
if not aishub_fallback_enabled():
|
||||
return False
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
status = ais_proxy_status() or {}
|
||||
except Exception:
|
||||
return True # ais_stream not importable? still try AISHub.
|
||||
# If the WebSocket primary is connected, skip the fallback — fresher
|
||||
# data is already flowing.
|
||||
if status.get("connected") is True:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _parse_aishub_response(payload: str) -> list[dict]:
|
||||
"""Parse the AISHub JSON response into a list of vessel records.
|
||||
|
||||
Successful response shape::
|
||||
|
||||
[
|
||||
{"ERROR": false, "USERNAME": "...", "FORMAT": "1", "RECORDS": N},
|
||||
[{"MMSI": ..., "LATITUDE": ..., "LONGITUDE": ..., ...}, ...]
|
||||
]
|
||||
|
||||
Error response shape::
|
||||
|
||||
[{"ERROR": true, "ERROR_MESSAGE": "..."}]
|
||||
|
||||
Empty payload (e.g. silent rate-limit drop) returns ``[]``.
|
||||
"""
|
||||
if not payload or not payload.strip():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("AISHub: response is not JSON: %s", e)
|
||||
return []
|
||||
if not isinstance(data, list) or not data:
|
||||
return []
|
||||
header = data[0] if isinstance(data[0], dict) else {}
|
||||
if header.get("ERROR") is True:
|
||||
logger.warning(
|
||||
"AISHub: upstream error: %s",
|
||||
header.get("ERROR_MESSAGE", "<unspecified>"),
|
||||
)
|
||||
return []
|
||||
if len(data) < 2 or not isinstance(data[1], list):
|
||||
return []
|
||||
return [row for row in data[1] if isinstance(row, dict)]
|
||||
|
||||
|
||||
def _normalize_record(row: dict) -> dict | None:
|
||||
"""Map an AISHub vessel record to our internal vessel schema.
|
||||
|
||||
Returns None when the record can't be used (no MMSI, bad position,
|
||||
sentinel "not available" lat/lng).
|
||||
"""
|
||||
try:
|
||||
mmsi = int(row.get("MMSI") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not mmsi:
|
||||
return None
|
||||
try:
|
||||
lat = float(row.get("LATITUDE"))
|
||||
lng = float(row.get("LONGITUDE"))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
# AIS uses 91/181 as "no position available" sentinels.
|
||||
if abs(lat) > 90 or abs(lng) > 180:
|
||||
return None
|
||||
if lat == 91.0 or lng == 181.0:
|
||||
return None
|
||||
# SOG raw 102.3 is "speed not available"; sanitize to 0.
|
||||
try:
|
||||
sog_raw = float(row.get("SOG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
sog_raw = 0.0
|
||||
sog = 0.0 if sog_raw >= 102.2 else sog_raw
|
||||
try:
|
||||
cog = float(row.get("COG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
cog = 0.0
|
||||
try:
|
||||
heading_raw = int(row.get("HEADING") or 511)
|
||||
except (TypeError, ValueError):
|
||||
heading_raw = 511
|
||||
# AIS heading sentinel 511 = "not available" — fall back to COG.
|
||||
heading = heading_raw if heading_raw != 511 else cog
|
||||
try:
|
||||
ais_type = int(row.get("TYPE") or 0)
|
||||
except (TypeError, ValueError):
|
||||
ais_type = 0
|
||||
return {
|
||||
"mmsi": mmsi,
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"sog": sog,
|
||||
"cog": cog,
|
||||
"heading": heading,
|
||||
"name": str(row.get("NAME") or "").strip() or "UNKNOWN",
|
||||
"callsign": str(row.get("CALLSIGN") or "").strip(),
|
||||
"destination": str(row.get("DEST") or "").strip().replace("@", "") or "",
|
||||
"imo": int(row.get("IMO") or 0),
|
||||
"ais_type_code": ais_type,
|
||||
}
|
||||
|
||||
|
||||
def fetch_aishub_vessels() -> int:
|
||||
"""Poll AISHub and merge vessels into the shared ``_vessels`` store.
|
||||
|
||||
Returns the number of vessels updated (0 on skip, error, or no data).
|
||||
Designed to be called by the APScheduler tier — see
|
||||
``data_fetcher.py`` for the 20-minute interval job that wraps this.
|
||||
"""
|
||||
if not _should_run_fallback():
|
||||
logger.debug("AISHub fallback skipped: primary connected or not configured")
|
||||
return 0
|
||||
|
||||
username = aishub_username()
|
||||
url = (
|
||||
f"{AISHUB_URL}?username={username}&format=1&output=json"
|
||||
f"&compress=0"
|
||||
)
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=30)
|
||||
except Exception as e:
|
||||
logger.warning("AISHub fetch failed: %s", e)
|
||||
return 0
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
logger.warning(
|
||||
"AISHub HTTP %s",
|
||||
getattr(response, "status_code", "None"),
|
||||
)
|
||||
return 0
|
||||
|
||||
rows = _parse_aishub_response(getattr(response, "text", "") or "")
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
# Inline imports to avoid a circular dependency at module load time
|
||||
# (ais_stream imports lots of things and is loaded by main.py).
|
||||
from services.ais_stream import (
|
||||
_vessels,
|
||||
_vessels_lock,
|
||||
_record_vessel_trail_locked,
|
||||
classify_vessel,
|
||||
get_country_from_mmsi,
|
||||
)
|
||||
|
||||
now = time.time()
|
||||
count = 0
|
||||
with _vessels_lock:
|
||||
for row in rows:
|
||||
normalized = _normalize_record(row)
|
||||
if normalized is None:
|
||||
continue
|
||||
mmsi = normalized["mmsi"]
|
||||
vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
|
||||
# Don't overwrite fresher live data: if the WebSocket pushed an
|
||||
# update for this MMSI more recently than now-1s (race during
|
||||
# the brief reconnection window) keep the live one.
|
||||
last = float(vessel.get("_updated") or 0)
|
||||
if last > now - 1:
|
||||
continue
|
||||
vessel.update(
|
||||
{
|
||||
"lat": normalized["lat"],
|
||||
"lng": normalized["lng"],
|
||||
"sog": normalized["sog"],
|
||||
"cog": normalized["cog"],
|
||||
"heading": normalized["heading"],
|
||||
"_updated": now,
|
||||
"source": "aishub",
|
||||
}
|
||||
)
|
||||
if normalized["name"] and normalized["name"] != "UNKNOWN":
|
||||
vessel["name"] = normalized["name"]
|
||||
if normalized["callsign"]:
|
||||
vessel["callsign"] = normalized["callsign"]
|
||||
if normalized["destination"]:
|
||||
vessel["destination"] = normalized["destination"]
|
||||
if normalized["imo"]:
|
||||
vessel["imo"] = normalized["imo"]
|
||||
if normalized["ais_type_code"]:
|
||||
vessel["ais_type_code"] = normalized["ais_type_code"]
|
||||
vessel["type"] = classify_vessel(normalized["ais_type_code"], mmsi)
|
||||
if not vessel.get("country"):
|
||||
vessel["country"] = get_country_from_mmsi(mmsi)
|
||||
_record_vessel_trail_locked(
|
||||
mmsi,
|
||||
normalized["lat"],
|
||||
normalized["lng"],
|
||||
normalized["sog"],
|
||||
now,
|
||||
)
|
||||
count += 1
|
||||
|
||||
if count:
|
||||
logger.info(
|
||||
"AISHub fallback: merged %d vessels (poll interval %d min)",
|
||||
count,
|
||||
aishub_poll_interval_minutes(),
|
||||
)
|
||||
return count
|
||||
@@ -0,0 +1,62 @@
|
||||
"""CISA KEV + cyber threat stats (Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_cyber_threats() -> dict[str, Any]:
|
||||
if not is_any_active("cyber_threats"):
|
||||
return latest_data.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
results: dict[str, Any] = {"threats": [], "stats": {}, "timestamp": datetime.now(timezone.utc).isoformat()}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json",
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
vulns = data.get("vulnerabilities") or []
|
||||
results["stats"]["cisa_total"] = len(vulns)
|
||||
now = datetime.now(timezone.utc)
|
||||
recent = []
|
||||
for v in vulns:
|
||||
try:
|
||||
added = datetime.fromisoformat(v.get("dateAdded", "").replace("Z", "+00:00"))
|
||||
days = (now - added).total_seconds() / 86400
|
||||
except Exception:
|
||||
continue
|
||||
if days <= 30:
|
||||
recent.append(v)
|
||||
recent = recent[:10]
|
||||
results["threats"] = [
|
||||
{
|
||||
"id": v.get("cveID"),
|
||||
"name": v.get("vulnerabilityName"),
|
||||
"vendor": v.get("vendorProject"),
|
||||
"product": v.get("product"),
|
||||
"severity": "CRITICAL",
|
||||
"date": v.get("dateAdded"),
|
||||
"due": v.get("dueDate"),
|
||||
"source": "CISA KEV",
|
||||
}
|
||||
for v in recent
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.warning("CISA KEV fetch failed: %s", exc)
|
||||
|
||||
count = len(results["threats"])
|
||||
results["stats"]["active_cves"] = count
|
||||
results["stats"]["threat_level"] = "CRITICAL" if count >= 8 else "HIGH" if count >= 4 else "ELEVATED"
|
||||
|
||||
with _data_lock:
|
||||
latest_data["cyber_threats"] = results
|
||||
_mark_fresh("cyber_threats")
|
||||
return results
|
||||
@@ -15,7 +15,11 @@ import time
|
||||
import heapq
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from services.network_utils import external_curl_fallback_enabled, fetch_with_curl
|
||||
from services.network_utils import (
|
||||
external_curl_fallback_enabled,
|
||||
fetch_with_curl,
|
||||
outbound_user_agent,
|
||||
)
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.nuforc_enrichment import enrich_sighting
|
||||
from services.fetchers.retry import with_retry
|
||||
@@ -279,13 +283,13 @@ def fetch_weather_alerts():
|
||||
return
|
||||
alerts = []
|
||||
try:
|
||||
# weather.gov requires a User-Agent per their API policy, but it
|
||||
# need not identify the operator. Use a project-generic string and
|
||||
# let the user override via SHADOWBROKER_USER_AGENT if needed.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
# weather.gov requires a User-Agent per their API policy. Round 7a:
|
||||
# send the per-install operator handle so they can rate-limit per
|
||||
# operator instead of treating "Shadowbroker" as one entity.
|
||||
from services.network_utils import outbound_user_agent
|
||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||
headers = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"User-Agent": outbound_user_agent("weather-gov"),
|
||||
"Accept": "application/geo+json",
|
||||
}
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
@@ -688,7 +692,8 @@ _NUFORC_TILESET = "nuforc.cmm18aqea06bu1mmselhpnano-0ce5v"
|
||||
_NUFORC_TOKEN = os.environ.get("NUFORC_MAPBOX_TOKEN", "").strip()
|
||||
_NUFORC_RADIUS_M = 200_000 # 200 km query radius
|
||||
_NUFORC_LIMIT = 50 # max features per tilequery call
|
||||
_NUFORC_RECENT_DAYS = int(os.environ.get("NUFORC_RECENT_DAYS", "60"))
|
||||
# Rolling window shown on the map (~2 calendar months). Override via NUFORC_RECENT_DAYS.
|
||||
_NUFORC_RECENT_DAYS = max(1, int(os.environ.get("NUFORC_RECENT_DAYS", "60")))
|
||||
_NUFORC_HF_FALLBACK_LIMIT = max(25, int(os.environ.get("NUFORC_HF_FALLBACK_LIMIT", "250")))
|
||||
_NUFORC_HF_GEOCODE_LIMIT = max(25, int(os.environ.get("NUFORC_HF_GEOCODE_LIMIT", "150")))
|
||||
_NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1")))
|
||||
@@ -696,6 +701,12 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1
|
||||
# practice, so a 0.3s spacing keeps us well under any soft throttle while
|
||||
# still rebuilding a full 12-month window in ~10 minutes.
|
||||
_NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3"))
|
||||
# Disk cache TTL — match the weekly scheduler so restarts between fetches still
|
||||
# serve the same rolling 60-day snapshot without hammering nuforc.org daily.
|
||||
_NUFORC_CACHE_TTL_S = max(
|
||||
3600,
|
||||
int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600,
|
||||
)
|
||||
_NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
||||
_NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json"
|
||||
_NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json"
|
||||
@@ -713,7 +724,12 @@ _NUFORC_LIVE_NONCE_RE = re.compile(
|
||||
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
||||
)
|
||||
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
||||
_NUFORC_LIVE_USER_AGENT = "Mozilla/5.0 (ShadowBroker-OSINT NUFORC-fetcher)"
|
||||
# Round 7a: NUFORC's site is sensitive to non-browser UAs but we send a
|
||||
# per-install operator handle prefixed by Mozilla/5.0 so we're identifiable
|
||||
# without being aggregately blocked. Operators who want stricter privacy
|
||||
# can override the entire UA via SHADOWBROKER_USER_AGENT.
|
||||
def _nuforc_live_user_agent() -> str:
|
||||
return f"Mozilla/5.0 ({outbound_user_agent('nuforc-live')})"
|
||||
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
||||
|
||||
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
||||
@@ -757,6 +773,35 @@ def _fetch_nuforc_tilequery(lng: float, lat: float) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def _uap_cutoff_date_str() -> str:
|
||||
return (datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _uap_sighting_date_str(sighting: dict) -> str | None:
|
||||
"""Normalize a sighting row to YYYY-MM-DD for window filtering."""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
raw = str(sighting.get("date_time") or sighting.get("occurred") or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
parsed = _parse_date(raw)
|
||||
if parsed:
|
||||
return parsed
|
||||
if len(raw) >= 10 and raw[4] == "-" and raw[7] == "-":
|
||||
return raw[:10]
|
||||
return None
|
||||
|
||||
|
||||
def _filter_uap_sightings_recent(sightings: list[dict]) -> list[dict]:
|
||||
"""Drop anything outside the rolling NUFORC_RECENT_DAYS window."""
|
||||
cutoff = _uap_cutoff_date_str()
|
||||
return [
|
||||
sighting
|
||||
for sighting in sightings
|
||||
if (_uap_sighting_date_str(sighting) or "") >= cutoff
|
||||
]
|
||||
|
||||
|
||||
def _parse_nuforc_tile_date(value: str) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
@@ -793,19 +838,41 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] |
|
||||
built_dt = datetime.fromisoformat(built) if built else None
|
||||
if built_dt is None:
|
||||
return None
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > 86400:
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S:
|
||||
return None
|
||||
if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS:
|
||||
logger.info(
|
||||
"UAP sightings: cache cutoff_days mismatch (%s != %s); rebuilding",
|
||||
raw.get("cutoff_days"),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return None
|
||||
sightings = raw.get("sightings")
|
||||
if isinstance(sightings, list):
|
||||
if len(sightings) <= 0:
|
||||
logger.info("UAP sightings: cache is fresh but empty; rebuilding")
|
||||
return None
|
||||
filtered = _filter_uap_sightings_recent(sightings)
|
||||
if not filtered:
|
||||
logger.warning(
|
||||
"UAP sightings: cache had %d rows but none within last %d days; rebuilding",
|
||||
len(sightings),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return None
|
||||
if len(filtered) < len(sightings):
|
||||
logger.info(
|
||||
"UAP sightings: dropped %d stale cached rows outside %d-day window",
|
||||
len(sightings) - len(filtered),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
logger.info(
|
||||
"UAP sightings: loaded %d cached reports from %s",
|
||||
len(sightings),
|
||||
"UAP sightings: loaded %d cached reports from %s (within %d-day window)",
|
||||
len(filtered),
|
||||
built,
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return sightings
|
||||
return filtered
|
||||
except Exception as e:
|
||||
logger.warning("UAP sightings: cache load error: %s", e)
|
||||
return None
|
||||
@@ -819,6 +886,7 @@ def _save_nuforc_sightings_cache(sightings: list[dict]) -> None:
|
||||
_NUFORC_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"built": datetime.utcnow().isoformat(),
|
||||
"cutoff_days": _NUFORC_RECENT_DAYS,
|
||||
"count": len(sightings),
|
||||
"sightings": sightings,
|
||||
}
|
||||
@@ -957,7 +1025,7 @@ def _photon_lookup(query: str) -> list[float] | None:
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (NUFORC-UAP-layer)",
|
||||
"User-Agent": outbound_user_agent("nuforc-uap-geocode"),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=10,
|
||||
@@ -1026,97 +1094,10 @@ def _nuforc_months_for_window(days: int) -> list[str]:
|
||||
return months
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via the live wpDataTables AJAX.
|
||||
|
||||
Returns a list of raw row dicts with the fields we care about:
|
||||
id, occurred (YYYY-MM-DD), posted (YYYY-MM-DD), city, state, country,
|
||||
shape_raw, summary, explanation. Empty list on any failure — caller
|
||||
decides whether a failure is fatal.
|
||||
"""
|
||||
def _parse_nuforc_live_datatables_rows(raw_rows: list) -> list[dict]:
|
||||
"""Parse wpDataTables ``data`` array into normalized row dicts."""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
if not external_curl_fallback_enabled():
|
||||
logger.warning(
|
||||
"NUFORC live: external curl disabled on Windows for %s; "
|
||||
"set SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=1 to opt in.",
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.returncode != 0 or not index_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: index fetch exit=%s for %s", index_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.stdout)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live: wdtNonce not found on index page for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
|
||||
# Step 2: POST to admin-ajax.php with length=-1 to pull the whole month.
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
"-H", f"Referer: {index_url}",
|
||||
"-H", "X-Requested-With: XMLHttpRequest",
|
||||
"-H", "Content-Type: application/x-www-form-urlencoded",
|
||||
"--data", post_data,
|
||||
ajax_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: ajax fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.returncode != 0 or not ajax_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: ajax fetch exit=%s for %s", ajax_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = json.loads(ajax_res.stdout)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live: ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
|
||||
raw_rows = payload.get("data") or []
|
||||
out: list[dict] = []
|
||||
for raw in raw_rows:
|
||||
if not isinstance(raw, list) or len(raw) < 8:
|
||||
@@ -1165,16 +1146,166 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_requests(yyyymm: str) -> list[dict]:
|
||||
"""Live NUFORC month fetch via requests (Windows-safe when curl is disabled)."""
|
||||
import requests
|
||||
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
headers = {"User-Agent": _nuforc_live_user_agent()}
|
||||
session = requests.Session()
|
||||
session.headers.update(headers)
|
||||
try:
|
||||
index_res = session.get(index_url, timeout=60)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.status_code != 200 or not index_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live (requests): index HTTP %s for %s",
|
||||
index_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.text)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live (requests): wdtNonce not found for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = session.post(
|
||||
ajax_url,
|
||||
data=post_data,
|
||||
headers={
|
||||
**headers,
|
||||
"Referer": index_url,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): ajax failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.status_code != 200 or not ajax_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live (requests): ajax HTTP %s for %s",
|
||||
ajax_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = ajax_res.json()
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live (requests): ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_curl(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via curl + wpDataTables AJAX."""
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.returncode != 0 or not index_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: index fetch exit=%s for %s", index_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.stdout)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live: wdtNonce not found on index page for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
|
||||
# Step 2: POST to admin-ajax.php with length=-1 to pull the whole month.
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
"-H", f"Referer: {index_url}",
|
||||
"-H", "X-Requested-With: XMLHttpRequest",
|
||||
"-H", "Content-Type: application/x-www-form-urlencoded",
|
||||
"--data", post_data,
|
||||
ajax_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: ajax fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.returncode != 0 or not ajax_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: ajax fetch exit=%s for %s", ajax_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = json.loads(ajax_res.stdout)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live: ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via live wpDataTables AJAX."""
|
||||
if external_curl_fallback_enabled():
|
||||
rows = _nuforc_fetch_month_live_curl(yyyymm, cookie_jar)
|
||||
if rows:
|
||||
return rows
|
||||
return _nuforc_fetch_month_live_requests(yyyymm)
|
||||
|
||||
|
||||
def _build_recent_uap_sightings() -> list[dict]:
|
||||
"""Build the rolling 1-year UAP sightings layer from live NUFORC data.
|
||||
"""Build the rolling UAP sightings layer from live NUFORC data.
|
||||
|
||||
Hits nuforc.org's public sub-index once per month in the window, drops
|
||||
anything outside the exact day-precision cutoff, dedupes by sighting id,
|
||||
geocodes city+state via the existing location cache, and returns rows
|
||||
keyed to the same schema the frontend already renders.
|
||||
"""
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
cutoff_str = _uap_cutoff_date_str()
|
||||
months = _nuforc_months_for_window(_NUFORC_RECENT_DAYS)
|
||||
|
||||
try:
|
||||
@@ -1374,10 +1505,21 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
This is a resilience fallback for local/Windows runs where nuforc.org is
|
||||
Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
|
||||
as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
|
||||
|
||||
Date-cutoff guard: the kcimc/NUFORC HF dataset is a static snapshot whose
|
||||
maintainer refreshes it sporadically. Without a cutoff, sorting by
|
||||
occurred-desc and taking the top N rows returns whatever the mirror's
|
||||
newest rows happen to be — which can be years old if the snapshot is
|
||||
stale. We apply the same ``_NUFORC_RECENT_DAYS`` window the live path
|
||||
uses (60 days). If the HF mirror has nothing inside the window we return
|
||||
``[]`` rather than silently serving 3-year-old "newest" rows.
|
||||
"""
|
||||
from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
|
||||
from services.geocode_validate import coord_in_country
|
||||
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
|
||||
if not response or response.status_code != 200:
|
||||
@@ -1391,6 +1533,7 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
return []
|
||||
|
||||
candidates: list[dict] = []
|
||||
stale_rows_dropped = 0
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(response.text))
|
||||
for row in reader:
|
||||
@@ -1401,6 +1544,9 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
)
|
||||
if not occurred:
|
||||
continue
|
||||
if occurred < cutoff_str:
|
||||
stale_rows_dropped += 1
|
||||
continue
|
||||
raw_location = _normalize_uap_location(
|
||||
row.get("Location", "")
|
||||
or row.get("City", "")
|
||||
@@ -1435,6 +1581,19 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
logger.warning("UAP sightings: HF fallback parse failed: %s", e)
|
||||
return []
|
||||
|
||||
if not candidates:
|
||||
# HF mirror returned rows, but none inside the rolling window. This is
|
||||
# the smoking gun for "the public HF dataset hasn't been refreshed in
|
||||
# years" — log loudly so the operator sees it instead of guessing.
|
||||
logger.error(
|
||||
"UAP sightings: HF fallback yielded 0 rows within last %d days "
|
||||
"(dropped %d stale rows). HF mirror is likely stale; the layer "
|
||||
"will be empty until the live NUFORC path recovers.",
|
||||
_NUFORC_RECENT_DAYS,
|
||||
stale_rows_dropped,
|
||||
)
|
||||
return []
|
||||
|
||||
candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
|
||||
candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]
|
||||
|
||||
@@ -1493,11 +1652,12 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
"""Fetch last-year UAP sightings from NUFORC.
|
||||
"""Fetch rolling-window UAP sightings from live NUFORC.
|
||||
|
||||
Startup reads the cached daily snapshot when it is still fresh. The daily
|
||||
scheduler forces a rebuild so this layer updates once per day instead of
|
||||
churning continuously.
|
||||
Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS
|
||||
(default 168h / one week). The weekly scheduler forces a rebuild so every
|
||||
install refreshes the same ~60-day layer without daily load on nuforc.org.
|
||||
Operators can also POST /api/refresh (admin) to pull immediately.
|
||||
"""
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
@@ -1506,13 +1666,32 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
|
||||
sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
|
||||
if sightings is None:
|
||||
live_error: Exception | None = None
|
||||
try:
|
||||
sightings = _build_recent_uap_sightings()
|
||||
except Exception as e:
|
||||
live_error = e
|
||||
logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
|
||||
sightings = _build_uap_sightings_from_hf_mirror()
|
||||
if sightings:
|
||||
_save_nuforc_sightings_cache(sightings)
|
||||
elif live_error is not None:
|
||||
# Both paths failed: live raised AND HF fallback returned empty
|
||||
# (either the HF mirror is stale beyond the cutoff or the network
|
||||
# is gone entirely). The previous code silently set the layer to
|
||||
# ``[]`` and kept marking it fresh; that masked the failure for
|
||||
# days. Surface it via assert_canary so the health registry shows
|
||||
# the layer as broken instead of "fresh and empty".
|
||||
from services.slo import assert_canary
|
||||
assert_canary("uap_sightings", 0)
|
||||
logger.error(
|
||||
"UAP sightings: both live NUFORC and HF fallback produced 0 "
|
||||
"rows; layer is unavailable. Live error: %s",
|
||||
live_error,
|
||||
)
|
||||
|
||||
if sightings:
|
||||
sightings = _filter_uap_sightings_recent(sightings)
|
||||
|
||||
with _data_lock:
|
||||
latest_data["uap_sightings"] = sightings or []
|
||||
@@ -1520,6 +1699,7 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
_mark_fresh("uap_sightings")
|
||||
return
|
||||
|
||||
# Unreachable legacy Mapbox tilequery path (kept for reference).
|
||||
cutoff = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
|
||||
# Query the grid concurrently (up to 8 threads)
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Per-aircraft observation tracking for cumulative fuel/CO2 estimates.
|
||||
|
||||
Background
|
||||
----------
|
||||
The pre-existing emissions enrichment attached a *rate* to each flight
|
||||
(GPH and kg/hr) based on aircraft model. Users — reasonably — wanted the
|
||||
running total: how much fuel HAS this plane burned since we started
|
||||
seeing it? Multiplying the rate by elapsed observation time gets us
|
||||
there, but it requires somewhere to remember "when did this icao24
|
||||
first appear on our radar?"
|
||||
|
||||
Why this lives outside ``flight_trails``
|
||||
----------------------------------------
|
||||
``flight_trails`` is sized and pruned aggressively for map rendering
|
||||
(5-minute TTL for untracked aircraft, 200 trail points max). That's
|
||||
wrong for cumulative burn: if a plane has been airborne 2 hours but
|
||||
its trail was pruned 30 min in, the "first trail point" timestamp is
|
||||
30 min ago, not 2h ago. Worse, when the trail expires and re-creates,
|
||||
the cumulative counter would reset mid-flight.
|
||||
|
||||
This module tracks observation lifecycle separately:
|
||||
|
||||
* When a hex is first observed: start a new flight session.
|
||||
* While observed regularly (gap < ``REOPEN_GAP_S``): keep accumulating.
|
||||
* When unseen for longer than ``REOPEN_GAP_S``: treat next sighting as
|
||||
a new session (the plane landed and took off again, or it's a
|
||||
different leg). Reset ``first_seen_at``.
|
||||
* Stale sessions are pruned every ``PRUNE_INTERVAL_S`` so memory stays
|
||||
bounded.
|
||||
|
||||
The user explicitly asked for this counting semantic: "as soon as a
|
||||
plane appears there should be a counter that keeps a running count of
|
||||
the fuel being burned... If there is no estimate take off time then it
|
||||
can just be from the time the server starts to keep a log of whats in
|
||||
the air."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
# Gap between sightings that resets the session. ADS-B refreshes the
|
||||
# whole aircraft list every minute or two, so anything over a few
|
||||
# minutes means the plane left our coverage window (landed, transit
|
||||
# through dead zone, etc). 15 minutes is conservative.
|
||||
REOPEN_GAP_S = 15 * 60
|
||||
|
||||
# Don't accumulate runaway memory: drop entries unseen for an hour.
|
||||
PRUNE_AFTER_S = 60 * 60
|
||||
|
||||
# Cap on accumulated airtime per session so a single bug elsewhere
|
||||
# (e.g. ts clock skew) can't produce comically large numbers.
|
||||
MAX_SESSION_SECONDS = 24 * 3600 # 24h — longest realistic civilian leg
|
||||
|
||||
|
||||
_observations: dict[str, dict[str, float]] = {}
|
||||
_lock = threading.Lock()
|
||||
_last_prune_at = 0.0
|
||||
|
||||
|
||||
def record_observation(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Record a sighting of ``icao_hex`` and return airtime so far (seconds).
|
||||
|
||||
Returns 0 for the first-ever sighting (no elapsed time yet) or when
|
||||
``icao_hex`` is falsy. The caller can multiply the returned seconds
|
||||
by ``rate_per_hour / 3600`` to get cumulative consumption.
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
if not key:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
# Use explicit ``is None`` checks instead of ``or`` short-circuit:
|
||||
# ``0.0`` is a legitimate timestamp value (e.g. test fixtures
|
||||
# seeding a far-past first_seen_at to exercise the clamp) but
|
||||
# ``0.0 or fallback`` collapses to ``fallback`` because 0.0 is
|
||||
# falsy. Bit me on my own test — leaving the safer form here.
|
||||
last_raw = entry.get("last_seen_at")
|
||||
last_seen = float(last_raw) if last_raw is not None else current
|
||||
gap = current - last_seen
|
||||
if gap > REOPEN_GAP_S:
|
||||
# Treat as a new flight session — the plane landed/disappeared
|
||||
# long enough that the prior cumulative count is no longer
|
||||
# the same flight.
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
# Clamp absurd values from clock skew or bad input.
|
||||
elapsed = max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
entry["last_seen_at"] = current
|
||||
return elapsed
|
||||
|
||||
|
||||
def prune(*, now: float | None = None) -> int:
|
||||
"""Drop entries we haven't seen in ``PRUNE_AFTER_S`` seconds.
|
||||
|
||||
Returns number of entries dropped. Safe to call from a scheduler tick;
|
||||
cheap (single dict scan) so cadence doesn't matter much.
|
||||
"""
|
||||
current = float(now if now is not None else time.time())
|
||||
dropped = 0
|
||||
with _lock:
|
||||
stale_keys = []
|
||||
for k, v in _observations.items():
|
||||
last_raw = v.get("last_seen_at")
|
||||
last = float(last_raw) if last_raw is not None else 0.0
|
||||
if current - last > PRUNE_AFTER_S:
|
||||
stale_keys.append(k)
|
||||
for k in stale_keys:
|
||||
del _observations[k]
|
||||
dropped += 1
|
||||
return dropped
|
||||
|
||||
|
||||
def get_session_seconds(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Read-only accessor: airtime for a known icao without bumping last-seen.
|
||||
|
||||
Used by tests and external consumers (e.g. when rendering a snapshot
|
||||
of all in-flight aircraft, you want the current value, not to update
|
||||
last_seen_at as a side effect).
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
return max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Drop all observations. Test helper only."""
|
||||
with _lock:
|
||||
_observations.clear()
|
||||
@@ -17,6 +17,7 @@ from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.retry import with_retry
|
||||
from services.fetchers.route_database import lookup_route
|
||||
from services.fetchers.aircraft_database import lookup_aircraft_type
|
||||
@@ -29,6 +30,88 @@ _RE_AIRLINE_CODE_1 = re.compile(r"^([A-Z]{3})\d")
|
||||
_RE_AIRLINE_CODE_2 = re.compile(r"^([A-Z]{3})[A-Z\d]")
|
||||
|
||||
|
||||
def detect_gps_jamming_zones(
|
||||
raw_flights: list[dict],
|
||||
*,
|
||||
min_aircraft: int | None = None,
|
||||
min_ratio: float | None = None,
|
||||
nacp_threshold: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Detect GPS interference zones from a snapshot of raw ADS-B aircraft.
|
||||
|
||||
Methodology mirrors GPSJam.org / Flightradar24: bin aircraft into 1°x1°
|
||||
grid cells, flag cells where the fraction of aircraft reporting degraded
|
||||
NACp clears a threshold.
|
||||
|
||||
Inputs
|
||||
------
|
||||
raw_flights:
|
||||
Iterable of dicts. Each item is expected to carry ``lat``, ``lng``
|
||||
(or ``lon``), and ``nac_p``. Records missing position OR missing
|
||||
``nac_p`` entirely (typical for OpenSky-sourced flights) are
|
||||
skipped — absence-of-data isn't evidence of anything.
|
||||
|
||||
nac_p == 0 IS counted as degraded. Pre-fix code skipped it on the theory
|
||||
that "0 = old transponder, never computed accuracy." That's only half
|
||||
right: modern Mode-S Enhanced Surveillance transponders also fall back
|
||||
to nac_p=0 when they lose GPS lock entirely — which is exactly the
|
||||
jamming signature we're trying to detect. Filtering 0 out was discarding
|
||||
the strongest evidence.
|
||||
|
||||
Denoising:
|
||||
1. Require ``min_aircraft`` per grid cell for statistical validity.
|
||||
2. Subtract 1 from degraded count per cell (GPSJam's technique) so
|
||||
a single quirky transponder can't flag an entire zone.
|
||||
3. Require ratio ``adjusted_degraded / total > min_ratio``.
|
||||
|
||||
All thresholds default to the module-level constants but can be
|
||||
overridden for testing.
|
||||
"""
|
||||
min_aircraft = GPS_JAMMING_MIN_AIRCRAFT if min_aircraft is None else int(min_aircraft)
|
||||
min_ratio = GPS_JAMMING_MIN_RATIO if min_ratio is None else float(min_ratio)
|
||||
nacp_threshold = (
|
||||
GPS_JAMMING_NACP_THRESHOLD if nacp_threshold is None else int(nacp_threshold)
|
||||
)
|
||||
|
||||
jamming_grid: dict[str, dict[str, int]] = {}
|
||||
for rf in raw_flights or []:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") if rf.get("lng") is not None else rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
cell = jamming_grid.setdefault(grid_key, {"degraded": 0, "total": 0})
|
||||
cell["total"] += 1
|
||||
if nacp < nacp_threshold:
|
||||
cell["degraded"] += 1
|
||||
|
||||
jamming_zones: list[dict] = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < min_aircraft:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > min_ratio:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
return jamming_zones
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenSky Network API Client (OAuth2)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -459,6 +542,18 @@ def _classify_and_publish(all_adsb_flights):
|
||||
|
||||
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
||||
|
||||
# Source attribution: prefer the explicit ``source`` tag stamped
|
||||
# at fetch time (adsb.lol, OpenSky). If absent, fall back to the
|
||||
# legacy ``supplemental_source`` (airplanes.live, adsb.fi) so
|
||||
# supplementals are still attributed without changing their
|
||||
# tagger. Final fallback "adsb.lol" preserves prior behavior for
|
||||
# any caller that synthesizes records without going through one
|
||||
# of our fetchers (e.g. tests).
|
||||
source = (
|
||||
f.get("source")
|
||||
or f.get("supplemental_source")
|
||||
or "adsb.lol"
|
||||
)
|
||||
flights.append(
|
||||
{
|
||||
"callsign": flight_str,
|
||||
@@ -480,6 +575,7 @@ def _classify_and_publish(all_adsb_flights):
|
||||
"airline_code": airline_code,
|
||||
"aircraft_category": ac_category,
|
||||
"nac_p": f.get("nac_p"),
|
||||
"source": source,
|
||||
}
|
||||
)
|
||||
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
||||
@@ -506,6 +602,22 @@ def _classify_and_publish(all_adsb_flights):
|
||||
if model:
|
||||
emi = get_emissions_info(model)
|
||||
if emi:
|
||||
# Cumulative fuel/CO2: multiply the per-hour rate by how
|
||||
# long we've been observing this airframe. Users want to
|
||||
# see the *amount* burned, not just the rate. If we've
|
||||
# never seen this hex before, observed_seconds is 0 and
|
||||
# the cumulative values are 0 until the next refresh —
|
||||
# the rate is still useful info on its own.
|
||||
observed_seconds = _record_flight_observation(
|
||||
f.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emi = {
|
||||
**emi,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emi["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emi["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
f["emissions"] = emi
|
||||
|
||||
callsign = f.get("callsign", "").strip().upper()
|
||||
@@ -724,56 +836,8 @@ def _classify_and_publish(all_adsb_flights):
|
||||
latest_data["military_flights"] = military_snapshot
|
||||
|
||||
# --- GPS Jamming Detection ---
|
||||
# Uses NACp (Navigation Accuracy Category – Position) from ADS-B to infer
|
||||
# GPS interference zones, similar to GPSJam.org / Flightradar24.
|
||||
# NACp < 8 = position accuracy worse than the FAA-mandated 0.05 NM.
|
||||
#
|
||||
# Denoising (to suppress false positives from old GA transponders):
|
||||
# 1. Skip nac_p == 0 ("unknown accuracy") — old transponders that never
|
||||
# computed accuracy, NOT evidence of jamming. Real jamming shows 1-7.
|
||||
# 2. Require minimum aircraft per grid cell for statistical validity.
|
||||
# 3. Subtract 1 from degraded count per cell (GPSJam's technique) so a
|
||||
# single quirky transponder can't flag an entire zone.
|
||||
# 4. Require the adjusted ratio to exceed the threshold.
|
||||
try:
|
||||
jamming_grid = {}
|
||||
raw_flights = raw_flights_snapshot
|
||||
for rf in raw_flights:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") or rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None or nacp == 0:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
if grid_key not in jamming_grid:
|
||||
jamming_grid[grid_key] = {"degraded": 0, "total": 0}
|
||||
jamming_grid[grid_key]["total"] += 1
|
||||
if nacp < GPS_JAMMING_NACP_THRESHOLD:
|
||||
jamming_grid[grid_key]["degraded"] += 1
|
||||
|
||||
jamming_zones = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < GPS_JAMMING_MIN_AIRCRAFT:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > GPS_JAMMING_MIN_RATIO:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
jamming_zones = detect_gps_jamming_zones(raw_flights_snapshot)
|
||||
with _data_lock:
|
||||
latest_data["gps_jamming"] = jamming_zones
|
||||
if jamming_zones:
|
||||
@@ -849,7 +913,15 @@ def _fetch_adsb_lol_regions():
|
||||
res = fetch_with_curl(url, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("ac", [])
|
||||
aircraft = data.get("ac", [])
|
||||
# Stamp the source at the fetch site so attribution survives
|
||||
# the OpenSky/supplemental dedupe-by-hex merge downstream.
|
||||
# Previously adsb.lol records carried no marker while OpenSky
|
||||
# records got ``is_opensky: True`` — which made flight tooltips
|
||||
# look like everything came from OpenSky.
|
||||
for a in aircraft:
|
||||
a["source"] = "adsb.lol"
|
||||
return aircraft
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
@@ -932,6 +1004,7 @@ def _enrich_with_opensky_and_supplemental(adsb_flights):
|
||||
"gs": (s[9] * 1.94384) if s[9] else 0,
|
||||
"t": "Unknown",
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
)
|
||||
elif os_res.status_code == 429:
|
||||
|
||||
@@ -20,17 +20,9 @@ def _env_flag(name: str) -> str:
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Return whether the Playwright-based LiveUAMap scraper should run.
|
||||
from services.liveuamap_settings import liveuamap_scraper_enabled as _enabled
|
||||
|
||||
It is useful enrichment, but it starts a browser/Node driver and must not be
|
||||
allowed to destabilize Windows local startup.
|
||||
"""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return os.name != "nt"
|
||||
return _enabled()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -210,10 +202,17 @@ def update_liveuamap():
|
||||
if not is_any_active("global_incidents"):
|
||||
return
|
||||
if not liveuamap_scraper_enabled():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled for this runtime; set "
|
||||
"SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
from services.liveuamap_settings import liveuamap_requires_ui_opt_in
|
||||
|
||||
if liveuamap_requires_ui_opt_in():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled: enable Global Incidents in the UI to "
|
||||
"consent, or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled; set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
return
|
||||
logger.info("Running scheduled Liveuamap scraper...")
|
||||
try:
|
||||
@@ -279,6 +278,16 @@ _FISHING_FETCH_INTERVAL_S = 3600 # once per hour — GFW data has ~5 day lag
|
||||
_last_fishing_fetch_ts: float = 0.0
|
||||
|
||||
|
||||
def _gfw_int_env(name: str, default: int, *, minimum: int = 1, maximum: int | None = None) -> int:
|
||||
try:
|
||||
value = int(os.environ.get(name, str(default)) or default)
|
||||
except (TypeError, ValueError):
|
||||
value = default
|
||||
if maximum is not None:
|
||||
value = min(maximum, value)
|
||||
return max(minimum, value)
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_fishing_activity():
|
||||
"""Fetch recent fishing events from Global Fishing Watch (~5 day lag)."""
|
||||
@@ -301,10 +310,16 @@ def fetch_fishing_activity():
|
||||
try:
|
||||
import datetime as _dt
|
||||
|
||||
# GFW publishes with ~5 day lag; windows shorter than ~7 days often return 0 events.
|
||||
lookback_days = _gfw_int_env("GFW_EVENTS_LOOKBACK_DAYS", 7, minimum=1, maximum=14)
|
||||
max_pages = _gfw_int_env("GFW_EVENTS_MAX_PAGES", 10, minimum=1, maximum=100)
|
||||
timeout_s = _gfw_int_env("GFW_EVENTS_TIMEOUT_S", 90, minimum=30, maximum=180)
|
||||
_end = _dt.date.today().isoformat()
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=7)).isoformat()
|
||||
page_size = max(1, int(os.environ.get("GFW_EVENTS_PAGE_SIZE", "500") or "500"))
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=lookback_days)).isoformat()
|
||||
page_size = _gfw_int_env("GFW_EVENTS_PAGE_SIZE", 500, minimum=1, maximum=1000)
|
||||
offset = 0
|
||||
pages_fetched = 0
|
||||
total_available: int | None = None
|
||||
seen_offsets: set[int] = set()
|
||||
seen_ids: set[str] = set()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
@@ -325,7 +340,7 @@ def fetch_fishing_activity():
|
||||
}
|
||||
)
|
||||
url = f"https://gateway.api.globalfishingwatch.org/v3/events?{query}"
|
||||
response = fetch_with_curl(url, timeout=30, headers=headers)
|
||||
response = fetch_with_curl(url, timeout=timeout_s, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Fishing activity fetch failed at offset=%s: HTTP %s",
|
||||
@@ -335,10 +350,16 @@ def fetch_fishing_activity():
|
||||
break
|
||||
|
||||
payload = response.json() or {}
|
||||
if total_available is None:
|
||||
try:
|
||||
total_available = int(payload.get("total")) if payload.get("total") is not None else None
|
||||
except (TypeError, ValueError):
|
||||
total_available = None
|
||||
entries = payload.get("entries", [])
|
||||
if not entries:
|
||||
break
|
||||
|
||||
pages_fetched += 1
|
||||
added_this_page = 0
|
||||
for e in entries:
|
||||
pos = e.get("position", {})
|
||||
@@ -373,6 +394,15 @@ def fetch_fishing_activity():
|
||||
if len(entries) < page_size:
|
||||
break
|
||||
|
||||
if pages_fetched >= max_pages:
|
||||
logger.info(
|
||||
"Fishing activity: capped at %s pages (%s events fetched; GFW total=%s)",
|
||||
max_pages,
|
||||
len(events),
|
||||
total_available if total_available is not None else "unknown",
|
||||
)
|
||||
break
|
||||
|
||||
next_offset = payload.get("nextOffset")
|
||||
if next_offset is None:
|
||||
next_offset = (payload.get("pagination") or {}).get("nextOffset")
|
||||
|
||||
@@ -6,7 +6,7 @@ import heapq
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
|
||||
@@ -29,7 +29,7 @@ def _geocode_region(region_name: str, country_name: str) -> tuple:
|
||||
|
||||
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
||||
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": outbound_user_agent("infrastructure-data")})
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
if results:
|
||||
@@ -235,11 +235,11 @@ _DC_GEOCODED_PATH = Path(__file__).parent.parent.parent / "data" / "datacenters_
|
||||
|
||||
|
||||
def fetch_datacenters():
|
||||
"""Load geocoded data centers (5K+ street-level precise locations)."""
|
||||
from services.fetchers._store import is_any_active
|
||||
"""Load geocoded data centers (5K+ street-level precise locations).
|
||||
|
||||
if not is_any_active("datacenters"):
|
||||
return
|
||||
Always loads from disk; /api/live-data/slow gates the payload on the
|
||||
datacenters layer toggle so enabling the layer can render immediately.
|
||||
"""
|
||||
dcs = []
|
||||
try:
|
||||
if not _DC_GEOCODED_PATH.exists():
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Malware C2 / URLhaus feed (abuse.ch, Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COUNTRY_CENTROIDS: dict[str, tuple[float, float]] = {
|
||||
"AF": (65, 33), "AL": (20, 41), "DZ": (3, 28), "AR": (-64, -34), "AU": (134, -25),
|
||||
"AT": (14, 47.5), "BE": (4, 50.8), "BR": (-51, -10), "CA": (-96, 62), "CN": (105, 35),
|
||||
"DE": (10, 51), "FR": (2, 46), "GB": (-2, 54), "IN": (79, 22), "IR": (53, 32),
|
||||
"IT": (12.5, 42.8), "JP": (138, 36), "KR": (128, 36), "MX": (-102, 23.5), "NL": (5.5, 52.5),
|
||||
"PL": (19.5, 52), "RU": (100, 60), "SG": (103.8, 1.35), "TW": (121, 23.7), "UA": (32, 49),
|
||||
"US": (-97, 38), "VN": (106, 16),
|
||||
}
|
||||
|
||||
|
||||
def fetch_malware_threats() -> list[dict[str, Any]]:
|
||||
if not is_any_active("malware_c2"):
|
||||
return latest_data.get("malware_threats") or []
|
||||
|
||||
threats: list[dict[str, Any]] = []
|
||||
threat_id = 0
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://feodotracker.abuse.ch/downloads/ipblocklist.json",
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Shadowbroker/1.0", "Accept": "application/json"},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
entries = resp.json()
|
||||
if not isinstance(entries, list):
|
||||
entries = []
|
||||
for entry in entries[:200]:
|
||||
cc = entry.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
continue
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 173.7) % 200 - 100) / 100 * 4
|
||||
j_lat = ((threat_id * 293.1) % 200 - 100) / 100 * 4
|
||||
threats.append(
|
||||
{
|
||||
"id": f"feodo-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": entry.get("ip_address") or "unknown",
|
||||
"port": entry.get("dst_port") or 0,
|
||||
"malware": entry.get("malware") or "unknown",
|
||||
"status": entry.get("status") or "active",
|
||||
"first_seen": entry.get("first_seen"),
|
||||
"last_online": entry.get("last_online"),
|
||||
"country": cc,
|
||||
"threat_type": "botnet_c2",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.warning("Feodo fetch failed: %s", exc)
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://urlhaus-api.abuse.ch/v1/urls/recent/limit/100/",
|
||||
timeout=8,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
urls = (resp.json() or {}).get("urls") or []
|
||||
for u in urls:
|
||||
cc = u.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
cc = next(iter(COUNTRY_CENTROIDS))
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 137.3) % 200 - 100) / 100 * 5
|
||||
j_lat = ((threat_id * 211.7) % 200 - 100) / 100 * 5
|
||||
threats.append(
|
||||
{
|
||||
"id": f"urlhaus-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": u.get("host") or "unknown",
|
||||
"port": 0,
|
||||
"malware": ", ".join(u.get("tags") or []) or u.get("threat") or "malware",
|
||||
"status": u.get("url_status") or "online",
|
||||
"first_seen": u.get("dateadded"),
|
||||
"country": cc,
|
||||
"threat_type": "malware_url",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.debug("URLhaus supplement failed: %s", exc)
|
||||
|
||||
payload = {
|
||||
"threats": threats,
|
||||
"total": len(threats),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "abuse.ch Feodo Tracker + URLhaus",
|
||||
}
|
||||
with _data_lock:
|
||||
latest_data["malware_threats"] = payload
|
||||
_mark_fresh("malware_threats")
|
||||
return threats
|
||||
@@ -188,11 +188,16 @@ def fetch_meshtastic_nodes():
|
||||
callsign = ""
|
||||
|
||||
send_callsign_header = str(
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")
|
||||
).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
|
||||
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||
# The optional Meshtastic callsign is appended as additional context so
|
||||
# meshtastic.liamcottle.net's operator can identify both the install AND
|
||||
# the registered radio operator (when MESHTASTIC_OPERATOR_CALLSIGN is set
|
||||
# and MESHTASTIC_SEND_CALLSIGN_HEADER is true; see issue #203).
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua_base = f"{outbound_user_agent('meshtastic-map')}; 24h polling"
|
||||
if callsign and send_callsign_header:
|
||||
user_agent = f"{ua_base}; node={callsign}"
|
||||
else:
|
||||
|
||||
@@ -7,6 +7,7 @@ import requests
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert
|
||||
|
||||
logger = logging.getLogger("services.data_fetcher")
|
||||
@@ -171,6 +172,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "adsb.lol"
|
||||
all_mil_ac.append(a)
|
||||
except Exception as e:
|
||||
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
||||
@@ -182,6 +184,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "airplanes.live"
|
||||
all_mil_ac.append(a)
|
||||
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
||||
except Exception as e:
|
||||
@@ -234,6 +237,7 @@ def fetch_military_flights():
|
||||
"registration": f.get("r", "N/A"),
|
||||
"icao24": icao_hex,
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -258,7 +262,8 @@ def fetch_military_flights():
|
||||
"model": f.get("t", "Unknown"),
|
||||
"icao24": icao_hex,
|
||||
"speed_knots": speed_knots,
|
||||
"squawk": f.get("squawk", "")
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
except Exception as loop_e:
|
||||
logger.error(f"Mil flight interpolation error: {loop_e}")
|
||||
@@ -296,6 +301,18 @@ def fetch_military_flights():
|
||||
if model:
|
||||
emissions = get_emissions_info(model)
|
||||
if emissions:
|
||||
# Cumulative fuel/CO2 since first observation — mirrors
|
||||
# the civilian path in flights._classify_and_publish.
|
||||
observed_seconds = _record_flight_observation(
|
||||
mf.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emissions = {
|
||||
**emissions,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emissions["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emissions["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
mf["emissions"] = emissions
|
||||
if mf.get("alert_category"):
|
||||
mf["type"] = "tracked_flight"
|
||||
|
||||
@@ -158,21 +158,26 @@ _KEYWORD_COORDS = {
|
||||
_SORTED_KEYWORDS = sorted(_KEYWORD_COORDS.items(), key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
|
||||
def resolve_coords_match(text: str) -> tuple[tuple[float, float], str] | None:
|
||||
"""Return ((lat, lng), matched_keyword) for the most specific keyword hit."""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords, kw
|
||||
elif re.search(r"\b" + re.escape(kw) + r"\b", text):
|
||||
return coords, kw
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_coords(text: str) -> tuple[float, float] | None:
|
||||
"""Return (lat, lng) for the most specific keyword match, or None.
|
||||
|
||||
Longer keywords are tried first. Space-padded keywords (" us ", " uk ")
|
||||
use substring matching on padded text; all others use word-boundary regex.
|
||||
"""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords
|
||||
else:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', text):
|
||||
return coords
|
||||
return None
|
||||
match = resolve_coords_match(text)
|
||||
return match[0] if match else None
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=2)
|
||||
|
||||
@@ -9,6 +9,7 @@ import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from urllib.parse import urlencode
|
||||
@@ -21,23 +22,34 @@ _prev_probabilities: dict[str, float] = {}
|
||||
_market_cache = TTLCache(maxsize=1, ttl=300)
|
||||
_POLYMARKET_PAGE_DELAY_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_S", "0.02"))
|
||||
_KALSHI_PAGE_DELAY_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_S", "0.08"))
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_JITTER_S", "0.08"))
|
||||
_KALSHI_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_JITTER_S", "0.2"))
|
||||
# Random delay before each full Polymarket+Kalshi cycle (decorrelates from other slow-tier jobs).
|
||||
_PRE_FETCH_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PRE_FETCH_JITTER_S", "90"))
|
||||
# Random pause between finishing Polymarket pagination and starting Kalshi.
|
||||
_PROVIDER_GAP_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S", "45"))
|
||||
_provider_pace_lock = threading.Lock()
|
||||
_provider_last_request_at: dict[str, float] = {}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
|
||||
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
"""Return True when UI opt-in or PREDICTION_MARKETS_ENABLED enables pulls."""
|
||||
from services.prediction_markets_settings import prediction_markets_fetch_enabled as _enabled
|
||||
|
||||
return _enabled()
|
||||
|
||||
|
||||
def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
if min_interval_s <= 0:
|
||||
return
|
||||
jitter_s = (
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S
|
||||
if provider == "polymarket"
|
||||
else _KALSHI_PAGE_DELAY_JITTER_S
|
||||
if provider == "kalshi"
|
||||
else 0.0
|
||||
)
|
||||
min_interval_s += random.uniform(0.0, jitter_s) if jitter_s > 0 else 0.0
|
||||
with _provider_pace_lock:
|
||||
now = time.monotonic()
|
||||
wait_s = min_interval_s - (now - _provider_last_request_at.get(provider, 0.0))
|
||||
@@ -47,6 +59,24 @@ def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
_provider_last_request_at[provider] = now
|
||||
|
||||
|
||||
def _apply_pre_fetch_jitter() -> None:
|
||||
if _PRE_FETCH_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PRE_FETCH_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: pre-fetch jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _apply_provider_gap_jitter() -> None:
|
||||
if _PROVIDER_GAP_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PROVIDER_GAP_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: provider gap jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _finite_or_none(value):
|
||||
try:
|
||||
n = float(value)
|
||||
@@ -750,7 +780,9 @@ def _merge_markets(poly_events: list[dict], kalshi_events: list[dict]) -> list[d
|
||||
@cached(_market_cache)
|
||||
def fetch_prediction_markets_raw() -> list[dict]:
|
||||
"""Fetch and merge prediction markets from both sources. Cached 5 min."""
|
||||
_apply_pre_fetch_jitter()
|
||||
poly = _fetch_polymarket_events()
|
||||
_apply_provider_gap_jitter()
|
||||
kalshi = _fetch_kalshi_events()
|
||||
merged = _merge_markets(poly, kalshi)
|
||||
logger.info(
|
||||
|
||||
@@ -11,15 +11,20 @@ import random
|
||||
import logging
|
||||
import functools
|
||||
import requests
|
||||
from requests.exceptions import ChunkedEncodingError, ConnectionError as RequestsConnectionError
|
||||
from requests.exceptions import Timeout as RequestsTimeout
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Only retry on transient network/OS errors — not on parse errors, key errors, etc.
|
||||
# Only retry on transient network/OS errors — not parse/key errors or HTTP 4xx/5xx.
|
||||
# requests.HTTPError (from raise_for_status) is intentionally excluded.
|
||||
TRANSIENT_ERRORS = (
|
||||
TimeoutError,
|
||||
ConnectionError,
|
||||
OSError,
|
||||
requests.RequestException,
|
||||
RequestsConnectionError,
|
||||
RequestsTimeout,
|
||||
ChunkedEncodingError,
|
||||
)
|
||||
|
||||
|
||||
@@ -43,6 +48,8 @@ def with_retry(max_retries: int = 3, base_delay: float = 2.0, max_delay: float =
|
||||
for attempt in range(1 + max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except requests.HTTPError:
|
||||
raise
|
||||
except TRANSIENT_ERRORS as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Scheduled Sentinel-2 road corridor freight trend fetcher (opt-in, slow tier)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_REFRESH_HOURS = float(os.environ.get("ROAD_CORRIDOR_REFRESH_HOURS", "24"))
|
||||
|
||||
|
||||
def _hours_since(iso_ts: str) -> float | None:
|
||||
try:
|
||||
dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00"))
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return (datetime.now(timezone.utc) - dt).total_seconds() / 3600.0
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _feature_ready() -> bool:
|
||||
from services.road_corridor_sat.config import optional_deps_available, road_corridor_sat_enabled
|
||||
from services.road_corridor_sat.credentials import sentinel_credentials_configured
|
||||
|
||||
if not road_corridor_sat_enabled():
|
||||
return False
|
||||
if not optional_deps_available():
|
||||
logger.debug("road_corridor_trends skipped — optional deps not installed")
|
||||
return False
|
||||
if not sentinel_credentials_configured():
|
||||
logger.debug("road_corridor_trends skipped — Sentinel credentials missing")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def refresh_road_corridor_store() -> None:
|
||||
from services.road_corridor_sat.storage import build_trends_payload
|
||||
|
||||
payload = build_trends_payload()
|
||||
with _data_lock:
|
||||
latest_data["road_corridor_trends"] = payload
|
||||
_mark_fresh("road_corridor_trends")
|
||||
|
||||
|
||||
def fetch_road_corridor_trends(force: bool = False) -> None:
|
||||
"""Refresh scheduled corridor presets (default: laredo_i35 every 24h)."""
|
||||
if not is_any_active("road_corridor_trends"):
|
||||
return
|
||||
if not _feature_ready():
|
||||
return
|
||||
|
||||
from services.road_corridor_sat.config import SCHEDULED_PRESET_IDS
|
||||
from services.road_corridor_sat.pipeline import analyze_preset
|
||||
from services.road_corridor_sat.presets import get_preset
|
||||
from services.road_corridor_sat.storage import load_refresh_state
|
||||
|
||||
state = load_refresh_state()
|
||||
for preset_id in SCHEDULED_PRESET_IDS:
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
logger.warning("Unknown scheduled road corridor preset: %s", preset_id)
|
||||
continue
|
||||
last = state.get(preset_id)
|
||||
if last and not force:
|
||||
age_h = _hours_since(last)
|
||||
if age_h is not None and age_h < _REFRESH_HOURS:
|
||||
logger.info(
|
||||
"road_corridor %s fresh (%.1fh < %.1fh) — skipping",
|
||||
preset_id,
|
||||
age_h,
|
||||
_REFRESH_HOURS,
|
||||
)
|
||||
continue
|
||||
try:
|
||||
logger.info("road_corridor analysis starting for %s", preset_id)
|
||||
analyze_preset(preset_id)
|
||||
except Exception as exc:
|
||||
logger.exception("road_corridor analysis failed for %s: %s", preset_id, exc)
|
||||
|
||||
refresh_road_corridor_store()
|
||||
@@ -17,6 +17,12 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _route_db_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("route-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
||||
@@ -24,8 +30,6 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_HTTP_TIMEOUT_S = 60
|
||||
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
||||
_airports_by_icao: dict[str, dict[str, Any]] = {}
|
||||
@@ -37,7 +41,7 @@ def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=_HTTP_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
||||
headers={"User-Agent": _route_db_user_agent(), "Accept-Encoding": "gzip"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
text = gzip.decompress(response.content).decode("utf-8-sig")
|
||||
|
||||
@@ -21,12 +21,21 @@ def _merge_sigint_snapshot(
|
||||
because they include fresher region/channel metadata.
|
||||
"""
|
||||
|
||||
merged = list(live_signals)
|
||||
# Shallow-copy every entry so the published list owns its own dicts. The
|
||||
# inputs alias objects that other threads keep mutating in place: live
|
||||
# signals are the SIGINT bridge's own dicts (updated as packets arrive),
|
||||
# and api_nodes are the same objects published under latest_data
|
||||
# ["meshtastic_map_nodes"]. Publishing those references into
|
||||
# latest_data["sigint"] lets a concurrent mutation race the lock-free
|
||||
# deepcopy in get_latest_data_deepcopy_snapshot() (/api/health, /api/live-
|
||||
# data) and raise "dictionary changed size during iteration". Copying
|
||||
# honors the replace-don't-mutate contract in fetchers/_store.py.
|
||||
merged = [dict(s) for s in live_signals]
|
||||
live_callsigns = {s["callsign"] for s in merged if s.get("source") == "meshtastic"}
|
||||
for node in api_nodes:
|
||||
if node.get("callsign") in live_callsigns:
|
||||
continue
|
||||
merged.append(node)
|
||||
merged.append(dict(node))
|
||||
merged.sort(key=lambda item: str(item.get("timestamp", "") or ""), reverse=True)
|
||||
return merged
|
||||
|
||||
|
||||
@@ -0,0 +1,377 @@
|
||||
"""Telegram OSINT — public channel web previews (t.me/s) with keyword geoparsing."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import html
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.fetchers.news import resolve_coords_match
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.telegram_translate import apply_post_translation, apply_posts_translations
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_CHANNELS = (
|
||||
"osintdefender",
|
||||
"insiderpaper",
|
||||
"aljazeeraenglish",
|
||||
"nexta_live",
|
||||
"war_monitor",
|
||||
"OSINTtechnical",
|
||||
"Liveuamap",
|
||||
)
|
||||
|
||||
_MESSAGE_BLOCK_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_wrap js-widget_message_wrap"[\s\S]*?</div>\s*</div>\s*</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_TEXT_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_text[^>]*>([\s\S]*?)</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_DATE_RE = re.compile(
|
||||
r'<a class="tgme_widget_message_date" href="(https://t\.me/[^"]+)".*?<time datetime="([^"]+)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_VIDEO_RE = re.compile(
|
||||
r'tgme_widget_message_video|js-message_video|<video\s',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_PHOTO_RE = re.compile(r'tgme_widget_message_photo_wrap', re.IGNORECASE)
|
||||
_VIDEO_SRC_RE = re.compile(r'<video[^>]+src="([^"]+)"', re.IGNORECASE)
|
||||
_BG_IMAGE_RE = re.compile(r"background-image:url\('([^']+)'\)", re.IGNORECASE)
|
||||
|
||||
_TELEGRAM_MEDIA_HOST_SUFFIXES = (".telesco.pe", ".telegram-cdn.org")
|
||||
|
||||
# Cyrillic / Arabic aliases for war-reporting channels (merged after English resolver).
|
||||
_EXTRA_PLACE_KEYWORDS: dict[str, tuple[float, float]] = {
|
||||
"киев": (50.450, 30.523),
|
||||
"київ": (50.450, 30.523),
|
||||
"харьков": (49.993, 36.231),
|
||||
"харків": (49.993, 36.231),
|
||||
"одесса": (46.482, 30.724),
|
||||
"одеса": (46.482, 30.724),
|
||||
"донецк": (48.015, 37.803),
|
||||
"донецьк": (48.015, 37.803),
|
||||
"луганск": (48.574, 39.307),
|
||||
"луганськ": (48.574, 39.307),
|
||||
"москва": (55.755, 37.617),
|
||||
"крым": (45.000, 34.000),
|
||||
"крим": (45.000, 34.000),
|
||||
"бахмут": (48.595, 38.000),
|
||||
"запорожье": (47.838, 35.139),
|
||||
"запоріжжя": (47.838, 35.139),
|
||||
"غزة": (31.416, 34.333),
|
||||
"دمشق": (33.513, 36.276),
|
||||
"بيروت": (33.893, 35.501),
|
||||
"tel aviv": (32.085, 34.781),
|
||||
"תל אביב": (32.085, 34.781),
|
||||
}
|
||||
|
||||
# Country-level news geocodes sit on national centroids that stack with threat alerts.
|
||||
# Telegram uses major metro anchors so pins land on a different map cell than news.
|
||||
_TELEGRAM_ANCHOR_OVERRIDES: dict[str, tuple[float, float]] = {
|
||||
"israel": (32.085, 34.781), # Tel Aviv (news uses central Israel ~Jerusalem corridor)
|
||||
"middle east": (32.085, 34.781),
|
||||
"china": (39.904, 116.407), # Beijing (news uses country centroid)
|
||||
"united states": (40.712, -74.006), # New York (news uses Washington DC)
|
||||
"usa": (40.712, -74.006),
|
||||
"us": (40.712, -74.006),
|
||||
"america": (40.712, -74.006),
|
||||
"uk": (51.507, -0.127), # London
|
||||
"iran": (35.689, 51.389), # Tehran
|
||||
"russia": (55.755, 37.617), # Moscow
|
||||
"ukraine": (50.450, 30.523), # Kyiv
|
||||
"france": (48.856, 2.352), # Paris
|
||||
"germany": (52.520, 13.405), # Berlin
|
||||
"lebanon": (34.433, 35.844), # Tripoli (news uses Beirut corridor)
|
||||
}
|
||||
|
||||
_RISK_KEYWORDS = (
|
||||
"war",
|
||||
"missile",
|
||||
"strike",
|
||||
"attack",
|
||||
"crisis",
|
||||
"tension",
|
||||
"military",
|
||||
"conflict",
|
||||
"defense",
|
||||
"clash",
|
||||
"nuclear",
|
||||
"invasion",
|
||||
"bomb",
|
||||
"drone",
|
||||
"weapon",
|
||||
"sanctions",
|
||||
"ceasefire",
|
||||
"escalation",
|
||||
"killed",
|
||||
"destroyed",
|
||||
"operation",
|
||||
"casualty",
|
||||
"frontline",
|
||||
"threat",
|
||||
"explosion",
|
||||
"shelling",
|
||||
)
|
||||
|
||||
|
||||
def telegram_osint_enabled() -> bool:
|
||||
return str(os.environ.get("TELEGRAM_OSINT_ENABLED", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
|
||||
def _configured_channels() -> list[str]:
|
||||
raw = str(os.environ.get("TELEGRAM_OSINT_CHANNELS", "")).strip()
|
||||
if raw:
|
||||
return [part.strip().lstrip("@") for part in raw.split(",") if part.strip()]
|
||||
return list(_DEFAULT_CHANNELS)
|
||||
|
||||
|
||||
def telegram_media_host_allowed(hostname: str | None) -> bool:
|
||||
host = str(hostname or "").strip().lower()
|
||||
if not host:
|
||||
return False
|
||||
return any(host.endswith(suffix) for suffix in _TELEGRAM_MEDIA_HOST_SUFFIXES)
|
||||
|
||||
|
||||
def _extract_media(block: str, link: str) -> dict[str, Any]:
|
||||
has_video = bool(_HAS_VIDEO_RE.search(block))
|
||||
has_photo = bool(_HAS_PHOTO_RE.search(block))
|
||||
media_type: str | None = None
|
||||
media_url: str | None = None
|
||||
if has_video:
|
||||
media_type = "video"
|
||||
video_match = _VIDEO_SRC_RE.search(block)
|
||||
if video_match:
|
||||
media_url = video_match.group(1).strip()
|
||||
elif has_photo:
|
||||
media_type = "photo"
|
||||
photo_match = _BG_IMAGE_RE.search(block)
|
||||
if photo_match:
|
||||
media_url = photo_match.group(1).strip()
|
||||
|
||||
embed_url: str | None = None
|
||||
if media_type and link:
|
||||
embed_url = f"{link}?embed=1"
|
||||
|
||||
return {
|
||||
"media_type": media_type,
|
||||
"media_url": media_url,
|
||||
"embed_url": embed_url,
|
||||
}
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
return html.unescape(cleaned).strip()
|
||||
|
||||
|
||||
def _score_risk(text: str) -> int:
|
||||
lower = text.lower()
|
||||
score = 1
|
||||
for kw in _RISK_KEYWORDS:
|
||||
if kw in lower:
|
||||
score += 2
|
||||
return min(10, score)
|
||||
|
||||
|
||||
def _refresh_post_coords(post: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Re-apply geoparsing so stored posts pick up anchor updates."""
|
||||
text = "\n".join(
|
||||
str(part).strip()
|
||||
for part in (post.get("title"), post.get("description"))
|
||||
if part and str(part).strip()
|
||||
)
|
||||
if not text:
|
||||
return post
|
||||
coords = _resolve_telegram_coords(text)
|
||||
if not coords:
|
||||
return post
|
||||
updated = dict(post)
|
||||
updated["coords"] = [coords[0], coords[1]]
|
||||
return updated
|
||||
|
||||
|
||||
def _resolve_telegram_coords(text: str) -> tuple[float, float] | None:
|
||||
lower = text.lower()
|
||||
match = resolve_coords_match(lower)
|
||||
if match:
|
||||
_coords, keyword = match
|
||||
anchor = _TELEGRAM_ANCHOR_OVERRIDES.get(keyword.strip().lower())
|
||||
if anchor:
|
||||
return anchor
|
||||
return _coords
|
||||
for keyword, coords in sorted(_EXTRA_PLACE_KEYWORDS.items(), key=lambda x: len(x[0]), reverse=True):
|
||||
if keyword in lower:
|
||||
return coords
|
||||
return None
|
||||
|
||||
|
||||
def _post_link(post: dict[str, Any]) -> str:
|
||||
return str(post.get("link") or "").strip()
|
||||
|
||||
|
||||
def _extract_new_channel_posts(
|
||||
html: str,
|
||||
channel: str,
|
||||
known_links: set[str],
|
||||
*,
|
||||
bootstrap_limit: int = 12,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return unseen posts from a channel page; stop once we hit a stored link."""
|
||||
parsed = parse_telegram_channel_html(html, channel)
|
||||
if not parsed:
|
||||
return []
|
||||
if not known_links:
|
||||
return parsed[-bootstrap_limit:]
|
||||
|
||||
fresh: list[dict[str, Any]] = []
|
||||
for post in reversed(parsed):
|
||||
link = _post_link(post)
|
||||
if not link:
|
||||
continue
|
||||
if link in known_links:
|
||||
break
|
||||
fresh.append(post)
|
||||
fresh.reverse()
|
||||
return fresh
|
||||
|
||||
|
||||
def _merge_telegram_posts(
|
||||
existing: list[dict[str, Any]],
|
||||
incoming: list[dict[str, Any]],
|
||||
*,
|
||||
max_posts: int = 120,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
known_links = {_post_link(post) for post in existing if _post_link(post)}
|
||||
added = 0
|
||||
for post in incoming:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
existing.append(post)
|
||||
added += 1
|
||||
existing.sort(key=lambda p: str(p.get("published") or ""), reverse=True)
|
||||
return existing[:max_posts], added
|
||||
|
||||
|
||||
def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]]:
|
||||
"""Parse public t.me/s channel preview HTML into post dicts."""
|
||||
posts: list[dict[str, Any]] = []
|
||||
for block in _MESSAGE_BLOCK_RE.findall(html or ""):
|
||||
text_match = _TEXT_RE.search(block)
|
||||
if not text_match:
|
||||
continue
|
||||
text = _strip_html(text_match.group(1))
|
||||
if len(text) < 10:
|
||||
continue
|
||||
|
||||
date_match = _DATE_RE.search(block)
|
||||
link = date_match.group(1) if date_match else f"https://t.me/{channel}"
|
||||
published = date_match.group(2) if date_match else datetime.now(timezone.utc).isoformat()
|
||||
title = text.split("\n", 1)[0][:160]
|
||||
risk_score = _score_risk(text)
|
||||
coords = _resolve_telegram_coords(text)
|
||||
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
media = _extract_media(block, link)
|
||||
post = {
|
||||
"id": post_id,
|
||||
"title": title,
|
||||
"description": text[:1200],
|
||||
"link": link,
|
||||
"published": published,
|
||||
"source": f"t.me/{channel}",
|
||||
"channel": channel,
|
||||
"risk_score": risk_score,
|
||||
"coords": [coords[0], coords[1]] if coords else None,
|
||||
**media,
|
||||
}
|
||||
posts.append(apply_post_translation(post))
|
||||
return posts
|
||||
|
||||
|
||||
def fetch_telegram_osint() -> dict[str, Any]:
|
||||
if not is_any_active("telegram_osint"):
|
||||
return latest_data.get("telegram_osint") or {"posts": [], "total": 0, "timestamp": None}
|
||||
|
||||
if not telegram_osint_enabled():
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = {"posts": [], "total": 0, "timestamp": None, "disabled": True}
|
||||
_mark_fresh("telegram_osint")
|
||||
return latest_data["telegram_osint"]
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-osint')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
prior = latest_data.get("telegram_osint") or {}
|
||||
existing_posts = list(prior.get("posts") or [])
|
||||
|
||||
known_links = {_post_link(post) for post in existing_posts if _post_link(post)}
|
||||
incoming: list[dict[str, Any]] = []
|
||||
|
||||
for channel in _configured_channels():
|
||||
url = f"https://t.me/s/{channel}"
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Telegram channel %s fetch failed: HTTP %s",
|
||||
channel,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
continue
|
||||
channel_new = _extract_new_channel_posts(resp.text, channel, known_links)
|
||||
for post in channel_new:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
incoming.append(post)
|
||||
except Exception as exc:
|
||||
logger.warning("Telegram channel %s parse failed: %s", channel, exc)
|
||||
|
||||
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
||||
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
||||
merged_posts = apply_posts_translations(merged_posts)
|
||||
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
||||
|
||||
payload = {
|
||||
"posts": merged_posts,
|
||||
"total": len(merged_posts),
|
||||
"geolocated": geolocated,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"channels": _configured_channels(),
|
||||
"last_fetch_new": added,
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = payload
|
||||
_mark_fresh("telegram_osint")
|
||||
logger.info(
|
||||
"Telegram OSINT: +%s new, %s retained (%s geolocated)",
|
||||
added,
|
||||
len(merged_posts),
|
||||
geolocated,
|
||||
)
|
||||
return payload
|
||||
@@ -10,6 +10,12 @@ from datetime import datetime, timezone
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _trains_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("trains")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EARTH_RADIUS_KM = 6371.0
|
||||
@@ -379,7 +385,7 @@ def _fetch_digitraffic() -> list[dict]:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept-Encoding": "gzip",
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
"User-Agent": _trains_user_agent(),
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
|
||||
@@ -0,0 +1,457 @@
|
||||
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||
position publication.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||
- low-precision (audit issue #245 — false precision from text mentions),
|
||||
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||
certain network paths, including Docker Desktop on some Windows hosts).
|
||||
|
||||
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||
carrier is operating. The article body uses extremely consistent phrasing:
|
||||
|
||||
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||
Yokosuka, Japan."
|
||||
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||
|
||||
Those are deterministic to parse. This module:
|
||||
|
||||
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||
site-wide feed often has fresher posts before the category feed
|
||||
catches up, so we union them.
|
||||
2. Picks the most recent post by parsed ``pubDate``.
|
||||
3. For each carrier in the registry, scans the article body for a
|
||||
"is operating in / is in port in / departed from" pattern near
|
||||
the carrier's name.
|
||||
4. Maps the extracted region phrase to coordinates via the carrier
|
||||
tracker's existing REGION_COORDS.
|
||||
|
||||
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||
with ``position_source_at`` set to the article's actual publication
|
||||
timestamp (not ``now()``).
|
||||
|
||||
Politeness
|
||||
----------
|
||||
We send the per-install operator handle via ``outbound_user_agent``
|
||||
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||
but WordPress RSS feeds are open and serve the full article in
|
||||
``<content:encoded>`` — that's the supported path for aggregators and
|
||||
the one we use. We do not spoof browser headers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Iterable
|
||||
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RSS_URLS: tuple[str, ...] = (
|
||||
# Site-wide feed often has the freshest posts before the category
|
||||
# feed catches up. We try this first.
|
||||
"https://news.usni.org/feed",
|
||||
# Category feed has older fleet trackers for backfill.
|
||||
"https://news.usni.org/category/fleet-tracker/feed",
|
||||
)
|
||||
|
||||
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||
|
||||
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||
)
|
||||
|
||||
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
def _request_headers() -> dict[str, str]:
|
||||
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||
|
||||
The ``Referer`` is the category index page — that's where a real
|
||||
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||
falls back to HTML. No browser UA spoofing.
|
||||
"""
|
||||
return {
|
||||
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||
}
|
||||
|
||||
|
||||
def _parse_pubdate(raw: str) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||
|
||||
De-duplicates by article link. Returns a list of dicts:
|
||||
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||
"""
|
||||
items_by_link: dict[str, dict] = {}
|
||||
for url in rss_urls:
|
||||
try:
|
||||
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||
except Exception as exc:
|
||||
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||
continue
|
||||
if not r or r.status_code != 200 or not r.text:
|
||||
logger.debug(
|
||||
"USNI RSS %s returned status=%s body=%d",
|
||||
url,
|
||||
getattr(r, "status_code", "?"),
|
||||
len(getattr(r, "text", "") or ""),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(r.text)
|
||||
except ET.ParseError as exc:
|
||||
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||
continue
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||
continue
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link or link in items_by_link:
|
||||
continue
|
||||
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||
body_html = (
|
||||
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||
or item.findtext("description", default="")
|
||||
or ""
|
||||
)
|
||||
items_by_link[link] = {
|
||||
"title": title,
|
||||
"link": link,
|
||||
"pub_date": pub_dt,
|
||||
"body": _strip_html(body_html),
|
||||
}
|
||||
return list(items_by_link.values())
|
||||
|
||||
|
||||
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||
# The carrier_tracker table already covers most named bodies of water and
|
||||
# major ports — we just need to teach this module to RECOGNIZE the
|
||||
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||
# the same body of water differently.
|
||||
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||
("eastern mediterranean", "eastern mediterranean"),
|
||||
("western mediterranean", "western mediterranean"),
|
||||
("mediterranean sea", "mediterranean"),
|
||||
("the mediterranean", "mediterranean"),
|
||||
("red sea", "red sea"),
|
||||
("arabian sea area of responsibility", "arabian sea"),
|
||||
("north arabian sea", "north arabian sea"),
|
||||
("arabian sea", "arabian sea"),
|
||||
("persian gulf", "persian gulf"),
|
||||
("gulf of oman", "gulf of oman"),
|
||||
("strait of hormuz", "strait of hormuz"),
|
||||
("south china sea", "south china sea"),
|
||||
("east china sea", "east china sea"),
|
||||
("philippine sea", "philippine sea"),
|
||||
("sea of japan", "sea of japan"),
|
||||
("taiwan strait", "taiwan strait"),
|
||||
("western pacific", "western pacific"),
|
||||
("pacific ocean", "pacific"),
|
||||
("indian ocean", "indian ocean"),
|
||||
("north atlantic", "north atlantic"),
|
||||
("western atlantic", "atlantic"),
|
||||
("eastern atlantic", "atlantic"),
|
||||
("atlantic ocean", "atlantic"),
|
||||
("gulf of aden", "gulf of aden"),
|
||||
("horn of africa", "horn of africa"),
|
||||
("bab el-mandeb", "bab el-mandeb"),
|
||||
("suez canal", "suez canal"),
|
||||
("baltic sea", "baltic sea"),
|
||||
("north sea", "north sea"),
|
||||
("black sea", "black sea"),
|
||||
("south atlantic", "south atlantic"),
|
||||
("coral sea", "coral sea"),
|
||||
("gulf of mexico", "gulf of mexico"),
|
||||
("caribbean sea", "caribbean"),
|
||||
("caribbean", "caribbean"),
|
||||
# Specific ports
|
||||
("naval station norfolk", "norfolk"),
|
||||
("norfolk naval shipyard", "newport news"),
|
||||
("newport news shipbuilding", "newport news"),
|
||||
("newport news", "newport news"),
|
||||
# USNI tags Norfolk mentions with state suffix; match both.
|
||||
("norfolk, va", "norfolk"),
|
||||
("norfolk", "norfolk"),
|
||||
("naval station everett", "puget sound"),
|
||||
("naval base kitsap", "bremerton"),
|
||||
("bremerton", "bremerton"),
|
||||
("puget sound", "puget sound"),
|
||||
("naval base san diego", "san diego"),
|
||||
("san diego, calif", "san diego"),
|
||||
("san diego", "san diego"),
|
||||
("yokosuka, japan", "yokosuka"),
|
||||
("yokosuka", "yokosuka"),
|
||||
("pearl harbor", "pearl harbor"),
|
||||
("apra harbor, guam", "guam"),
|
||||
("guam", "guam"),
|
||||
("bahrain", "bahrain"),
|
||||
("naval station rota", "rota"),
|
||||
("rota, spain", "rota"),
|
||||
("naples, italy", "naples"),
|
||||
# Fleets / AORs
|
||||
("5th fleet", "5th fleet"),
|
||||
("6th fleet", "6th fleet"),
|
||||
("7th fleet", "7th fleet"),
|
||||
("3rd fleet", "3rd fleet"),
|
||||
("2nd fleet", "2nd fleet"),
|
||||
("centcom", "centcom"),
|
||||
("indo-pacific command", "indopacom"),
|
||||
("eucom", "eucom"),
|
||||
("southcom", "southcom"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||
or ``None`` if we don't recognize it.
|
||||
|
||||
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||
``display`` is the phrase we'll show in the dossier description.
|
||||
"""
|
||||
p = (phrase or "").lower().strip()
|
||||
if not p:
|
||||
return None
|
||||
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||
if usni_phrase in p:
|
||||
return canonical, usni_phrase
|
||||
return None
|
||||
|
||||
|
||||
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||
# phrase that immediately follows. Each pattern is designed to swallow
|
||||
# the optional editorial filler that often appears between verb and
|
||||
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||
# filler; "Norfolk" is the location).
|
||||
#
|
||||
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||
# wins over the generic "is".
|
||||
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||
|
||||
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is conducting <stuff> in [the] {REGION}"
|
||||
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port in {LOCATION}"
|
||||
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||
# → not captured here; falls through to homeport
|
||||
# "is underway in [the] {REGION}"
|
||||
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is deployed to [the] {REGION}" / "deployed in"
|
||||
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "arrived [Day] in/at {LOCATION}"
|
||||
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "departed [Day] from {LOCATION}"
|
||||
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is homeported at {LOCATION}"
|
||||
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def _extract_region_for_carrier(
|
||||
body: str,
|
||||
carrier_names: list[str],
|
||||
hull_code: str,
|
||||
) -> str | None:
|
||||
"""Return the best-guess region phrase for one carrier from the
|
||||
article body, or None if no confident match.
|
||||
|
||||
Algorithm:
|
||||
1. Find every mention of the carrier (any name variant or the hull
|
||||
code) in the body.
|
||||
2. For each mention, look in the ~300-char window AFTER it for any
|
||||
of the operating-verb patterns.
|
||||
3. Return the first hit. If a more-confident match later turns up
|
||||
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||
first one in document order still wins — USNI's structure puts
|
||||
the position-update sentence near the top of each carrier's
|
||||
section, and the homeport mention later.
|
||||
"""
|
||||
# Build a master mention regex covering every name variant + the hull.
|
||||
candidates: list[str] = []
|
||||
for name in carrier_names:
|
||||
if name and len(name) >= 4:
|
||||
candidates.append(re.escape(name))
|
||||
if hull_code:
|
||||
candidates.append(re.escape(hull_code))
|
||||
if not candidates:
|
||||
return None
|
||||
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||
|
||||
window_chars = 320
|
||||
seen_phrases: list[str] = []
|
||||
for mention in mention_re.finditer(body):
|
||||
end = mention.end()
|
||||
window = body[end : end + window_chars]
|
||||
# Cut window at the next sentence break for tighter context.
|
||||
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||
if sent_break:
|
||||
window = window[: sent_break.start() + 1]
|
||||
# Try patterns in priority order.
|
||||
for pat in _OPERATING_PATTERNS:
|
||||
m = pat.search(window)
|
||||
if not m:
|
||||
continue
|
||||
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||
if not phrase:
|
||||
continue
|
||||
# Strip trailing editorial filler — USNI often writes
|
||||
# "Norfolk, Va., according to ship spotters" or
|
||||
# "Yokosuka, Japan, according to..."
|
||||
phrase = re.split(
|
||||
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||
phrase,
|
||||
maxsplit=1,
|
||||
)[0].strip()
|
||||
seen_phrases.append(phrase)
|
||||
return phrase
|
||||
return seen_phrases[0] if seen_phrases else None
|
||||
|
||||
|
||||
def fetch_latest_fleet_tracker_positions(
|
||||
carrier_registry: dict | None = None,
|
||||
region_coords: dict | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||
|
||||
Entries look like::
|
||||
|
||||
{
|
||||
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||
"desc": "Red Sea (USNI May 18, 2026)",
|
||||
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||
mention) are simply absent from the result — the caller keeps
|
||||
whatever position they had before.
|
||||
|
||||
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||
module's own tables; passed in here for testability.
|
||||
"""
|
||||
if carrier_registry is None or region_coords is None:
|
||||
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||
region_coords = region_coords or REGION_COORDS
|
||||
|
||||
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||
if not items:
|
||||
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||
return {}
|
||||
|
||||
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||
# date fall to the back of the list.
|
||||
items.sort(
|
||||
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||
reverse=True,
|
||||
)
|
||||
latest = items[0]
|
||||
|
||||
pub_dt: datetime | None = latest["pub_date"]
|
||||
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||
|
||||
body = latest["body"]
|
||||
if not body:
|
||||
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||
return {}
|
||||
|
||||
positions: dict[str, dict] = {}
|
||||
for hull, info in carrier_registry.items():
|
||||
# Build name variants we'll try in the body.
|
||||
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||
last_word = without_hull.split()[-1] # "Ford"
|
||||
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||
|
||||
# Variants ordered most-specific first.
|
||||
variants: list[str] = []
|
||||
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||
if v and v not in variants and len(v) >= 4:
|
||||
variants.append(v)
|
||||
|
||||
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||
if not phrase:
|
||||
continue
|
||||
resolved = _resolve_region_phrase(phrase)
|
||||
if not resolved:
|
||||
logger.debug(
|
||||
"USNI: %s region phrase %r did not match any known region",
|
||||
hull, phrase,
|
||||
)
|
||||
continue
|
||||
canonical_key, display_phrase = resolved
|
||||
coords = region_coords.get(canonical_key)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
positions[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||
"source_url": latest["link"],
|
||||
"position_source_at": pub_iso,
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
if positions:
|
||||
logger.info(
|
||||
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||
len(positions), len(carrier_registry), latest["link"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||
latest["link"],
|
||||
)
|
||||
return positions
|
||||
@@ -21,9 +21,17 @@ _cache_lock = threading.Lock()
|
||||
_local_search_cache: List[Dict[str, Any]] | None = None
|
||||
_local_search_lock = threading.Lock()
|
||||
|
||||
_USER_AGENT = os.environ.get(
|
||||
"NOMINATIM_USER_AGENT", "ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)"
|
||||
)
|
||||
# Round 7a: per-install operator handle threads through every Nominatim
|
||||
# call. NOMINATIM_USER_AGENT env override is still honored for operators
|
||||
# who run a custom relay / known good identity, but the default uses the
|
||||
# per-install handle so OpenStreetMap can rate-limit per install instead
|
||||
# of treating "Shadowbroker" as one big offender.
|
||||
def _nominatim_user_agent() -> str:
|
||||
override = os.environ.get("NOMINATIM_USER_AGENT", "").strip()
|
||||
if override:
|
||||
return override
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("nominatim")
|
||||
|
||||
|
||||
def _get_cache(key: str):
|
||||
@@ -178,7 +186,7 @@ def search_geocode(query: str, limit: int = 5, local_only: bool = False) -> List
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
@@ -241,7 +249,7 @@ def reverse_geocode(lat: float, lng: float, local_only: bool = False) -> Dict[st
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
|
||||
+146
-77
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import zipfile
|
||||
@@ -8,11 +9,62 @@ from datetime import datetime
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _geopolitics_user_agent() -> str:
|
||||
"""Round 7a: GDELT geopolitics fetcher attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("geopolitics-gdelt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
frontline_cache = TTLCache(maxsize=1, ttl=1800)
|
||||
|
||||
_DEFAULT_DEEPSTATE_MIRROR_REPO = "cyterat/deepstate-map-data"
|
||||
|
||||
|
||||
def _deepstate_mirror_ref() -> tuple[str, str]:
|
||||
"""Return (github_repo_slug, git_ref) for the DeepState mirror.
|
||||
|
||||
When ``DEEPSTATE_MIRROR_COMMIT`` is set, ingest is pinned to that immutable
|
||||
SHA instead of following the mutable ``main`` branch (#362).
|
||||
"""
|
||||
repo = (os.environ.get("DEEPSTATE_MIRROR_REPO") or _DEFAULT_DEEPSTATE_MIRROR_REPO).strip()
|
||||
if repo.count("/") != 1:
|
||||
repo = _DEFAULT_DEEPSTATE_MIRROR_REPO
|
||||
commit = (os.environ.get("DEEPSTATE_MIRROR_COMMIT") or "").strip()
|
||||
ref = commit if commit else "main"
|
||||
return repo, ref
|
||||
|
||||
|
||||
def _latest_deepstate_geo_path(tree_items: list) -> str | None:
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_items
|
||||
if isinstance(item, dict)
|
||||
and str(item.get("path", "")).startswith("data/deepstatemap_data_")
|
||||
and str(item.get("path", "")).endswith(".geojson")
|
||||
]
|
||||
return sorted(geo_files)[-1] if geo_files else None
|
||||
|
||||
|
||||
def _annotate_deepstate_geojson(data: dict) -> dict:
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
feature["properties"]["name"] = name_map.get(idx, "Russian-occupied areas")
|
||||
feature["properties"]["zone_id"] = idx
|
||||
return data
|
||||
|
||||
|
||||
@cached(frontline_cache)
|
||||
def fetch_ukraine_frontlines():
|
||||
@@ -20,67 +72,34 @@ def fetch_ukraine_frontlines():
|
||||
Fetches the latest GeoJSON data representing the Ukraine frontline.
|
||||
We use the cyterat/deepstate-map-data github mirror since the public API is locked.
|
||||
"""
|
||||
repo, ref = _deepstate_mirror_ref()
|
||||
try:
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror...")
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror (%s @ %s)...", repo, ref)
|
||||
|
||||
# First, query the repo tree to find the latest file name
|
||||
tree_url = (
|
||||
"https://api.github.com/repos/cyterat/deepstate-map-data/git/trees/main?recursive=1"
|
||||
)
|
||||
tree_url = f"https://api.github.com/repos/{repo}/git/trees/{ref}?recursive=1"
|
||||
res_tree = requests.get(tree_url, timeout=10)
|
||||
|
||||
if res_tree.status_code == 200:
|
||||
tree_data = res_tree.json().get("tree", [])
|
||||
# Filter for geojson files in data folder
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_data
|
||||
if item["path"].startswith("data/deepstatemap_data_")
|
||||
and item["path"].endswith(".geojson")
|
||||
]
|
||||
|
||||
if geo_files:
|
||||
# Get the alphabetically latest file (since it's named with YYYYMMDD)
|
||||
latest_file = sorted(geo_files)[-1]
|
||||
|
||||
raw_url = f"https://raw.githubusercontent.com/cyterat/deepstate-map-data/main/{latest_file}"
|
||||
logger.info(f"Downloading latest DeepStateMap: {raw_url}")
|
||||
latest_file = _latest_deepstate_geo_path(res_tree.json().get("tree", []))
|
||||
if latest_file:
|
||||
raw_url = f"https://raw.githubusercontent.com/{repo}/{ref}/{latest_file}"
|
||||
logger.info("Downloading DeepStateMap: %s", raw_url)
|
||||
|
||||
res_geo = requests.get(raw_url, timeout=20)
|
||||
if res_geo.status_code == 200:
|
||||
data = res_geo.json()
|
||||
|
||||
# The Cyterat GitHub mirror strips all properties and just provides a raw array of Feature polygons.
|
||||
# Based on DeepStateMap's frontend mapping, the array index corresponds to the zone type:
|
||||
# 0: Russian-occupied areas
|
||||
# 1: Russian advance
|
||||
# 2: Liberated area
|
||||
# 3: Uncontested/Crimea (often folded into occupied)
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
|
||||
feature["properties"]["name"] = name_map.get(
|
||||
idx, "Russian-occupied areas"
|
||||
)
|
||||
feature["properties"]["zone_id"] = idx
|
||||
|
||||
return data
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to fetch parsed Github Raw GeoJSON: {res_geo.status_code}"
|
||||
)
|
||||
return _annotate_deepstate_geojson(res_geo.json())
|
||||
logger.error(
|
||||
"Failed to fetch parsed Github Raw GeoJSON: %s", res_geo.status_code
|
||||
)
|
||||
else:
|
||||
logger.error("No deepstatemap_data_*.geojson files in mirror tree at %s", ref)
|
||||
else:
|
||||
logger.error(f"Failed to fetch Github Tree for Deepstatemap: {res_tree.status_code}")
|
||||
logger.error(
|
||||
"Failed to fetch Github tree for Deepstatemap (%s @ %s): %s",
|
||||
repo,
|
||||
ref,
|
||||
res_tree.status_code,
|
||||
)
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"Error fetching DeepStateMap: {e}")
|
||||
return None
|
||||
@@ -316,7 +335,7 @@ def _fetch_article_title(url):
|
||||
resp = requests.get(
|
||||
current_url,
|
||||
timeout=4,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT Dashboard/1.0)"},
|
||||
headers={"User-Agent": _geopolitics_user_agent()},
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
)
|
||||
@@ -521,10 +540,29 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
|
||||
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
||||
|
||||
|
||||
# GDELT's data.gdeltproject.org is a CNAME to a Google Cloud Storage
|
||||
# bucket of the same name. GCS returns the wildcard ``*.storage.googleapis.com``
|
||||
# certificate, which legitimately does NOT cover the GDELT custom domain
|
||||
# — Python's TLS verification correctly refuses it. Some networks/POPs
|
||||
# happen to route through a path where this works; many do not (notably
|
||||
# Docker Desktop's outbound NAT on local installs).
|
||||
#
|
||||
# Fix: rewrite the URL to hit GCS directly with a path-style bucket
|
||||
# reference, where the standard GCS cert is genuinely valid. Same data,
|
||||
# verified TLS, no operator-side workaround needed.
|
||||
def _gcs_direct_gdelt_url(url: str) -> str:
|
||||
"""If ``url`` points at data.gdeltproject.org, return the equivalent
|
||||
GCS-direct URL. Otherwise return the URL unchanged."""
|
||||
prefix = "://data.gdeltproject.org/"
|
||||
if prefix in url:
|
||||
return url.replace(prefix, "://storage.googleapis.com/data.gdeltproject.org/", 1)
|
||||
return url
|
||||
|
||||
|
||||
def _download_gdelt_export(url):
|
||||
"""Download a single GDELT export file, return bytes or None."""
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=15)
|
||||
res = fetch_with_curl(_gcs_direct_gdelt_url(url), timeout=15)
|
||||
if res.status_code == 200:
|
||||
return res.content
|
||||
except (ConnectionError, TimeoutError, OSError): # non-critical
|
||||
@@ -568,8 +606,19 @@ def _build_feature_html(features, fetched_titles=None):
|
||||
|
||||
|
||||
def _enrich_gdelt_titles_background(features, all_article_urls):
|
||||
"""Background thread: fetch real article titles then update features in-place."""
|
||||
"""Background thread: fetch real article titles, then publish enriched COPIES.
|
||||
|
||||
The ``features`` handed to us were already published into
|
||||
``latest_data["gdelt"]`` by ``fetch_gdelt()``. Per the store's thread-safety
|
||||
contract (see ``get_latest_data_subset_refs`` in fetchers/_store.py), HTTP
|
||||
readers hold live references to these nested ``properties`` dicts and
|
||||
serialize them OUTSIDE the data lock. Mutating the published dicts in place
|
||||
here races that serialization and raises
|
||||
``RuntimeError: dictionary changed size during iteration``. So we enrich
|
||||
copies and atomically swap the top-level key under the lock instead.
|
||||
"""
|
||||
import html as html_mod
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
|
||||
try:
|
||||
logger.info(f"[BG] Fetching real article titles for {len(all_article_urls)} URLs...")
|
||||
@@ -577,28 +626,44 @@ def _enrich_gdelt_titles_background(features, all_article_urls):
|
||||
fetched_count = sum(1 for v in fetched_titles.values() if v)
|
||||
logger.info(f"[BG] Resolved {fetched_count}/{len(all_article_urls)} article titles")
|
||||
|
||||
# Update features in-place with real titles and snippets
|
||||
# Build enriched copies — never touch the already-published objects.
|
||||
enriched = []
|
||||
for f in features:
|
||||
urls = f["properties"].get("_urls_list", [])
|
||||
if not urls:
|
||||
continue
|
||||
headlines = []
|
||||
snippets = []
|
||||
for u in urls:
|
||||
real_title = fetched_titles.get(u)
|
||||
headlines.append(real_title if real_title else _url_to_headline(u))
|
||||
snippets.append(_article_snippet_cache.get(u) or "")
|
||||
f["properties"]["_headlines_list"] = headlines
|
||||
f["properties"]["_snippets_list"] = snippets
|
||||
links = []
|
||||
for u, h in zip(urls, headlines):
|
||||
safe_url = u if u.startswith(("http://", "https://")) else "about:blank"
|
||||
safe_h = html_mod.escape(h)
|
||||
links.append(
|
||||
f'<div style="margin-bottom:6px;"><a href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_h}</a></div>'
|
||||
)
|
||||
f["properties"]["html"] = "".join(links)
|
||||
logger.info(f"[BG] GDELT title enrichment complete")
|
||||
nf = dict(f)
|
||||
props = dict(f.get("properties", {}))
|
||||
urls = props.get("_urls_list", [])
|
||||
if urls:
|
||||
headlines = []
|
||||
snippets = []
|
||||
for u in urls:
|
||||
real_title = fetched_titles.get(u)
|
||||
headlines.append(real_title if real_title else _url_to_headline(u))
|
||||
snippets.append(_article_snippet_cache.get(u) or "")
|
||||
props["_headlines_list"] = headlines
|
||||
props["_snippets_list"] = snippets
|
||||
links = []
|
||||
for u, h in zip(urls, headlines):
|
||||
safe_url = u if u.startswith(("http://", "https://")) else "about:blank"
|
||||
safe_h = html_mod.escape(h)
|
||||
links.append(
|
||||
f'<div style="margin-bottom:6px;"><a href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_h}</a></div>'
|
||||
)
|
||||
props["html"] = "".join(links)
|
||||
nf["properties"] = props
|
||||
enriched.append(nf)
|
||||
|
||||
# Atomically publish — but only if a newer fetch_gdelt() hasn't already
|
||||
# replaced the layer while we were fetching titles (identity guard).
|
||||
published = False
|
||||
with _data_lock:
|
||||
if latest_data.get("gdelt") is features:
|
||||
latest_data["gdelt"] = enriched
|
||||
published = True
|
||||
if published:
|
||||
_mark_fresh("gdelt")
|
||||
logger.info(f"[BG] GDELT title enrichment complete ({len(enriched)} features)")
|
||||
else:
|
||||
logger.info("[BG] GDELT layer changed under us; skipping stale enrichment swap")
|
||||
except Exception as e:
|
||||
logger.error(f"[BG] GDELT title enrichment failed: {e}")
|
||||
|
||||
@@ -620,8 +685,12 @@ def fetch_global_military_incidents():
|
||||
# HTTPS is used to prevent passive network observers from injecting
|
||||
# poisoned export records into the global incident map via MITM.
|
||||
# GDELT serves the same content over HTTPS as HTTP.
|
||||
# Use the GCS-direct URL because data.gdeltproject.org's CNAME
|
||||
# serves a wildcard *.storage.googleapis.com cert that legitimately
|
||||
# doesn't cover the GDELT hostname. See _gcs_direct_gdelt_url above.
|
||||
index_res = fetch_with_curl(
|
||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt", timeout=10
|
||||
_gcs_direct_gdelt_url("https://data.gdeltproject.org/gdeltv2/lastupdate.txt"),
|
||||
timeout=10,
|
||||
)
|
||||
if index_res.status_code != 200:
|
||||
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
"""Function Keys — anonymous citizenship proof.
|
||||
"""Function Keys — anonymous credential scaffolding.
|
||||
|
||||
Source of truth: ``infonet-economy/IMPLEMENTATION_PLAN.md`` §4.4,
|
||||
``infonet-economy/BRAINDUMP.md`` §11 item 9.
|
||||
|
||||
A citizen should be able to prove "I am a UBI-eligible Infonet
|
||||
citizen" to a real-world operator (food bank, community service)
|
||||
**without revealing their Infonet identity**. The naive approach
|
||||
(scramble a public key, record each redemption on chain) leaks
|
||||
identity through metadata correlation (time, location, operator,
|
||||
frequency).
|
||||
A citizen should eventually be able to prove "I am a UBI-eligible
|
||||
Infonet citizen" to a real-world operator (food bank, community
|
||||
service) **without revealing their Infonet identity**. The current
|
||||
Python implementation wires the accounting, nullifier, receipt, and
|
||||
operator flows, but its HMAC challenge-response is a placeholder for
|
||||
integration tests. It is not a production anonymous or zero-knowledge
|
||||
citizenship proof until blind signatures or anonymous credentials are
|
||||
selected and wired.
|
||||
|
||||
The naive approach (scramble a public key, record each redemption on
|
||||
chain) leaks identity through metadata correlation (time, location,
|
||||
operator, frequency).
|
||||
|
||||
The full design has six pieces; five are implemented in pure Python
|
||||
here. The remaining piece — issuance via blind signatures or
|
||||
@@ -27,7 +33,8 @@ Pieces:
|
||||
operator: tracked via ``NullifierTracker``.
|
||||
3. **Challenge-response** (`challenge_response.py`) — operator
|
||||
issues a fresh nonce, key-holder signs with the Function Key's
|
||||
secret. Prevents screenshot attacks, key sharing, replay.
|
||||
secret. This is HMAC placeholder plumbing for screenshot/replay
|
||||
resistance, not the final anonymous credential proof.
|
||||
4. **Two-phase commit receipts** (`receipt.py`) — Phase 1
|
||||
verification receipt (operator-signed, day-level date NOT
|
||||
timestamp, no node_id). Phase 2 fulfillment receipt (citizen
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Country risk index (static scores + USGS quake enrichment)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
RISK_FACTORS: dict[str, dict[str, Any]] = {
|
||||
"UA": {"base": 85, "tags": ["active_conflict", "infrastructure_damage"]},
|
||||
"RU": {"base": 72, "tags": ["sanctions", "military_mobilization"]},
|
||||
"IL": {"base": 78, "tags": ["active_conflict", "regional_instability"]},
|
||||
"PS": {"base": 90, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"SY": {"base": 82, "tags": ["post_conflict", "infrastructure_damage"]},
|
||||
"YE": {"base": 88, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"MM": {"base": 76, "tags": ["civil_unrest", "military_junta"]},
|
||||
"SD": {"base": 84, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"AF": {"base": 80, "tags": ["post_conflict", "governance_collapse"]},
|
||||
"KP": {"base": 70, "tags": ["nuclear_risk", "isolation"]},
|
||||
"IR": {"base": 68, "tags": ["sanctions", "nuclear_program", "regional_proxy"]},
|
||||
"CN": {"base": 35, "tags": ["strategic_competition", "taiwan_tensions"]},
|
||||
"TW": {"base": 45, "tags": ["invasion_risk", "semiconductor_dependency"]},
|
||||
"VE": {"base": 60, "tags": ["economic_collapse", "political_instability"]},
|
||||
"HT": {"base": 85, "tags": ["gang_violence", "governance_collapse"]},
|
||||
"LB": {"base": 65, "tags": ["economic_crisis", "political_deadlock"]},
|
||||
"PK": {"base": 55, "tags": ["terrorism", "political_instability"]},
|
||||
"SO": {"base": 82, "tags": ["terrorism", "state_fragility"]},
|
||||
"LY": {"base": 72, "tags": ["divided_government", "militia_control"]},
|
||||
"ET": {"base": 62, "tags": ["ethnic_tensions", "regional_conflicts"]},
|
||||
}
|
||||
|
||||
EXCHANGES = [
|
||||
{"name": "NYSE", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "NASDAQ", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "LSE", "tz": "Europe/London", "open": 8, "close": 16.5, "country": "GB"},
|
||||
{"name": "TSE", "tz": "Asia/Tokyo", "open": 9, "close": 15, "country": "JP"},
|
||||
{"name": "SSE", "tz": "Asia/Shanghai", "open": 9.5, "close": 15, "country": "CN"},
|
||||
{"name": "HKEX", "tz": "Asia/Hong_Kong", "open": 9.5, "close": 16, "country": "HK"},
|
||||
{"name": "FRA", "tz": "Europe/Berlin", "open": 8, "close": 20, "country": "DE"},
|
||||
{"name": "TSX", "tz": "America/Toronto", "open": 9.5, "close": 16, "country": "CA"},
|
||||
{"name": "MOEX", "tz": "Europe/Moscow", "open": 10, "close": 18.5, "country": "RU"},
|
||||
]
|
||||
|
||||
|
||||
def _exchange_open(ex: dict[str, Any]) -> bool:
|
||||
try:
|
||||
now = datetime.now(ZoneInfo(ex["tz"]))
|
||||
if now.weekday() >= 5:
|
||||
return False
|
||||
decimal = now.hour + now.minute / 60
|
||||
return ex["open"] <= decimal < ex["close"]
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_country_risk_payload() -> dict[str, Any]:
|
||||
quake_risks: dict[str, float] = {}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_day.geojson",
|
||||
timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
for f in resp.json().get("features") or []:
|
||||
place = (f.get("properties") or {}).get("place") or ""
|
||||
mag = (f.get("properties") or {}).get("mag") or 0
|
||||
for code in RISK_FACTORS:
|
||||
if code.lower() in place.lower():
|
||||
quake_risks[code] = quake_risks.get(code, 0) + mag
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
countries = []
|
||||
for code, data in RISK_FACTORS.items():
|
||||
base = data["base"]
|
||||
score = min(100, base + quake_risks.get(code, 0))
|
||||
countries.append(
|
||||
{
|
||||
"code": code,
|
||||
"risk_score": score,
|
||||
"risk_level": "CRITICAL" if base >= 80 else "HIGH" if base >= 60 else "ELEVATED" if base >= 40 else "LOW",
|
||||
"tags": data["tags"],
|
||||
}
|
||||
)
|
||||
countries.sort(key=lambda c: c["risk_score"], reverse=True)
|
||||
exchanges = [{"name": e["name"], "country": e["country"], "open": _exchange_open(e)} for e in EXCHANGES]
|
||||
return {
|
||||
"countries": countries,
|
||||
"exchanges": exchanges,
|
||||
"open_exchanges": sum(1 for e in exchanges if e["open"]),
|
||||
"total_exchanges": len(exchanges),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
@@ -32,14 +32,14 @@ logger = logging.getLogger(__name__)
|
||||
_REFRESH_SECONDS = 24 * 3600
|
||||
kiwisdr_cache: TTLCache = TTLCache(maxsize=1, ttl=_REFRESH_SECONDS)
|
||||
|
||||
_SOURCE_URL = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTP = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTPS = "https://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_CACHE_FILE = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_cache.json"
|
||||
# Bundled fallback — shipped with the codebase so the KiwiSDR layer always
|
||||
# has something to render even when the upstream is unreachable, returns
|
||||
# garbage, or appears to have been tampered with. Issue #206: the upstream
|
||||
# only speaks HTTP, so we can't rely on TLS for integrity — instead we
|
||||
# validate the response's shape and fall back to this bundle if it doesn't
|
||||
# look right.
|
||||
# garbage, or appears to have been tampered with. Issue #206 / #364: try HTTPS
|
||||
# first, then HTTP; we still validate shape and fall back to this bundle if the
|
||||
# payload does not look right.
|
||||
_BUNDLED_FALLBACK = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_directory.json"
|
||||
|
||||
# Minimum number of receivers we expect from a healthy upstream response.
|
||||
@@ -184,6 +184,29 @@ def _validate_fetched_nodes(nodes: list[dict]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _fetch_mirror_payload_text() -> str | None:
|
||||
"""Try HTTPS first, then HTTP. Shape validation still applies (#364)."""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
last_error: Exception | None = None
|
||||
for url in (_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP):
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
if url == _SOURCE_URL_HTTP:
|
||||
logger.info(
|
||||
"KiwiSDR: HTTPS mirror unavailable; using HTTP with shape validation"
|
||||
)
|
||||
return res.text
|
||||
last_error = RuntimeError(f"HTTP {getattr(res, 'status_code', 'unknown')}")
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("KiwiSDR mirror fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
logger.warning("KiwiSDR mirror fetch failed: %s", last_error)
|
||||
return None
|
||||
|
||||
|
||||
def _load_bundled_fallback() -> list[dict]:
|
||||
"""Last-resort directory shipped with the codebase. Always returns a
|
||||
list (may be empty if the bundle is missing in older deployments)."""
|
||||
@@ -202,9 +225,8 @@ def _load_bundled_fallback() -> list[dict]:
|
||||
def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
"""Return the KiwiSDR receiver list, refreshed at most once per day.
|
||||
|
||||
Layered fallback (issue #206 — upstream is HTTP-only, so we defend with
|
||||
content validation + bundled static directory rather than trying to
|
||||
upgrade the transport):
|
||||
Layered fallback (issue #206 / #364 — HTTPS first, HTTP fallback, plus
|
||||
content validation + bundled static directory):
|
||||
|
||||
1. In-memory cache (handled by @cached on this function)
|
||||
2. On-disk cache if <24h old
|
||||
@@ -216,8 +238,6 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
tampered upstream returning garbage is caught by _validate_fetched_nodes()
|
||||
and falls through to whatever previously-trusted snapshot we have.
|
||||
"""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
# 1. Trust on-disk cache if fresh.
|
||||
cached_nodes = _load_disk_cache()
|
||||
if cached_nodes is not None:
|
||||
@@ -230,14 +250,12 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
fresh_nodes: list[dict] = []
|
||||
fetch_succeeded = False
|
||||
try:
|
||||
res = fetch_with_curl(_SOURCE_URL, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
fresh_nodes = _parse_mirror_payload(res.text)
|
||||
body = _fetch_mirror_payload_text()
|
||||
if body:
|
||||
fresh_nodes = _parse_mirror_payload(body)
|
||||
fetch_succeeded = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"KiwiSDR fetch returned HTTP {res.status_code if res else 'no response'}"
|
||||
)
|
||||
logger.warning("KiwiSDR fetch returned no usable mirror payload")
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.warning(f"KiwiSDR fetch exception: {e}")
|
||||
|
||||
|
||||
@@ -27,11 +27,21 @@ def fetch_liveuamap():
|
||||
browser = p.chromium.launch(
|
||||
headless=True, args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
# Per-install handle (no shared Shadowbroker product token). Stealth remains
|
||||
# for Turnstile; see docs/OUTBOUND_DATA.md #348.
|
||||
playwright_ua = (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('liveuamap')})"
|
||||
)
|
||||
context = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
user_agent=playwright_ua,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
color_scheme="dark",
|
||||
)
|
||||
# Bound navigation and script evaluation so a stuck region cannot hang the slow pool.
|
||||
context.set_default_navigation_timeout(60_000)
|
||||
context.set_default_timeout(30_000)
|
||||
page = context.new_page()
|
||||
stealth_sync(page)
|
||||
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
"""LiveUAMap Playwright scraper opt-in (#348) — UI consent on Windows."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "liveuamap_scraper_opt_in.json"
|
||||
_OPT_IN_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def liveuamap_requires_ui_opt_in() -> bool:
|
||||
"""Windows local installs need explicit consent before Playwright contacts LiveUAMap."""
|
||||
return os.name == "nt"
|
||||
|
||||
|
||||
def get_liveuamap_ui_opt_in() -> bool:
|
||||
if not _OPT_IN_FILE.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8"))
|
||||
return bool(payload.get("opted_in"))
|
||||
except (OSError, json.JSONDecodeError, TypeError) as e:
|
||||
logger.warning("LiveUAMap opt-in file unreadable: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def set_liveuamap_ui_opt_in(opted_in: bool) -> None:
|
||||
_OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _OPT_IN_LOCK:
|
||||
_OPT_IN_FILE.write_text(
|
||||
json.dumps({"opted_in": bool(opted_in)}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Whether the Playwright LiveUAMap scraper may run on this backend."""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
if not liveuamap_requires_ui_opt_in():
|
||||
return True
|
||||
return get_liveuamap_ui_opt_in()
|
||||
|
||||
|
||||
def liveuamap_scraper_status() -> dict[str, Any]:
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
env_override = None
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
env_override = "on"
|
||||
elif setting in {"0", "false", "no", "off"}:
|
||||
env_override = "off"
|
||||
ui_opted_in = get_liveuamap_ui_opt_in()
|
||||
requires = liveuamap_requires_ui_opt_in()
|
||||
return {
|
||||
"platform_requires_opt_in": requires,
|
||||
"ui_opted_in": ui_opted_in,
|
||||
"scraper_enabled": liveuamap_scraper_enabled(),
|
||||
"env_override": env_override,
|
||||
}
|
||||
@@ -287,28 +287,18 @@ def write_signed_bootstrap_manifest(
|
||||
return manifest
|
||||
|
||||
|
||||
def load_bootstrap_manifest(
|
||||
path: str | Path,
|
||||
def parse_bootstrap_manifest_dict(
|
||||
raw: dict[str, Any],
|
||||
*,
|
||||
signer_public_key_b64: str,
|
||||
now: float | None = None,
|
||||
) -> BootstrapManifest:
|
||||
manifest_path = _resolve_manifest_path(str(path))
|
||||
try:
|
||||
raw = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except FileNotFoundError as exc:
|
||||
raise BootstrapManifestError(f"bootstrap manifest not found: {manifest_path}") from exc
|
||||
except json.JSONDecodeError as exc:
|
||||
raise BootstrapManifestError("bootstrap manifest is not valid JSON") from exc
|
||||
|
||||
if not isinstance(raw, dict):
|
||||
raise BootstrapManifestError("bootstrap manifest root must be an object")
|
||||
|
||||
signature = str(raw.get("signature", "") or "").strip()
|
||||
payload = {key: value for key, value in raw.items() if key != "signature"}
|
||||
if not signature:
|
||||
raise BootstrapManifestError("bootstrap manifest signature is required")
|
||||
|
||||
_verify_manifest_signature(
|
||||
payload,
|
||||
signature_b64=signature,
|
||||
@@ -325,11 +315,36 @@ def load_bootstrap_manifest(
|
||||
)
|
||||
|
||||
|
||||
def load_bootstrap_manifest(
|
||||
path: str | Path,
|
||||
*,
|
||||
signer_public_key_b64: str,
|
||||
now: float | None = None,
|
||||
) -> BootstrapManifest:
|
||||
manifest_path = _resolve_manifest_path(str(path))
|
||||
try:
|
||||
raw = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except FileNotFoundError as exc:
|
||||
raise BootstrapManifestError(f"bootstrap manifest not found: {manifest_path}") from exc
|
||||
except json.JSONDecodeError as exc:
|
||||
raise BootstrapManifestError("bootstrap manifest is not valid JSON") from exc
|
||||
|
||||
if not isinstance(raw, dict):
|
||||
raise BootstrapManifestError("bootstrap manifest root must be an object")
|
||||
return parse_bootstrap_manifest_dict(
|
||||
raw,
|
||||
signer_public_key_b64=signer_public_key_b64,
|
||||
now=now,
|
||||
)
|
||||
|
||||
|
||||
def load_bootstrap_manifest_from_settings(*, now: float | None = None) -> BootstrapManifest | None:
|
||||
settings = get_settings()
|
||||
if bool(getattr(settings, "MESH_BOOTSTRAP_DISABLED", False)):
|
||||
return None
|
||||
signer_public_key_b64 = str(getattr(settings, "MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY", "") or "").strip()
|
||||
from services.mesh.mesh_fleet_defaults import effective_bootstrap_signer_public_key_b64
|
||||
|
||||
signer_public_key_b64 = effective_bootstrap_signer_public_key_b64()
|
||||
if not signer_public_key_b64:
|
||||
return None
|
||||
manifest_path = _resolve_manifest_path(str(getattr(settings, "MESH_BOOTSTRAP_MANIFEST_PATH", "") or ""))
|
||||
|
||||
@@ -69,6 +69,115 @@ def _derive_peer_key(shared_secret: str, peer_url: str) -> bytes:
|
||||
).digest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Issue #256 (tg12): per-peer HMAC secrets
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Before this change, ALL peer-push HMACs were derived from a single
|
||||
# fleet-shared ``MESH_PEER_PUSH_SECRET``. The receiver could prove a
|
||||
# request was signed by *someone who knows the fleet secret*, but it
|
||||
# could NOT prove which peer signed it — any peer could compute the
|
||||
# expected HMAC for any other peer's URL and impersonate that peer.
|
||||
#
|
||||
# Fix: an optional ``MESH_PEER_SECRETS`` env var maps specific peer URLs
|
||||
# to per-peer secrets. When a peer URL is listed there, only that
|
||||
# per-peer secret is accepted for that URL — the global secret is
|
||||
# ignored for that peer. Peer A no longer learns peer B's secret, so
|
||||
# peer A cannot forge a request claiming to be peer B.
|
||||
#
|
||||
# Backwards-compatible by design:
|
||||
#
|
||||
# - Single-peer installs (``MESH_PEER_SECRETS`` empty) keep using the
|
||||
# global secret. Zero behavior change. Zero operator action required.
|
||||
# - Multi-peer installs that haven't migrated yet keep using the global
|
||||
# secret for every peer. Same behavior as before — same exposure.
|
||||
# - Multi-peer installs that have migrated configure
|
||||
# ``MESH_PEER_SECRETS=urlA=secretA,urlB=secretB`` and immediately get
|
||||
# per-peer identity. Migration is incremental: peers not yet listed
|
||||
# continue using the global secret until both sides of that peering
|
||||
# add their entry.
|
||||
|
||||
_PEER_SECRETS_CACHE: dict[str, str] = {}
|
||||
_PEER_SECRETS_CACHE_RAW: str = ""
|
||||
|
||||
|
||||
def _lookup_per_peer_secret(normalized_url: str) -> str:
|
||||
"""Return the per-peer secret for ``normalized_url`` from MESH_PEER_SECRETS.
|
||||
|
||||
Returns "" if no per-peer entry is configured for that URL. The parser
|
||||
is forgiving:
|
||||
|
||||
- Whitespace around items, URLs, and secrets is stripped.
|
||||
- Items without ``=`` or with empty URL/secret halves are skipped.
|
||||
- The URL half is normalized via ``normalize_peer_url`` so config
|
||||
authors don't have to match scheme/port/path quirks exactly.
|
||||
|
||||
The cache is invalidated whenever the env var's raw value changes,
|
||||
which keeps tests' ``monkeypatch.setenv`` calls effective without
|
||||
forcing a process restart.
|
||||
"""
|
||||
import os
|
||||
|
||||
raw = str(os.environ.get("MESH_PEER_SECRETS", "") or "").strip()
|
||||
|
||||
global _PEER_SECRETS_CACHE, _PEER_SECRETS_CACHE_RAW
|
||||
if raw != _PEER_SECRETS_CACHE_RAW:
|
||||
new_cache: dict[str, str] = {}
|
||||
for chunk in raw.split(","):
|
||||
chunk = chunk.strip()
|
||||
if not chunk or "=" not in chunk:
|
||||
continue
|
||||
url_part, _, secret_part = chunk.partition("=")
|
||||
normalized = normalize_peer_url(url_part.strip())
|
||||
secret = secret_part.strip()
|
||||
if normalized and secret:
|
||||
new_cache[normalized] = secret
|
||||
_PEER_SECRETS_CACHE = new_cache
|
||||
_PEER_SECRETS_CACHE_RAW = raw
|
||||
|
||||
return _PEER_SECRETS_CACHE.get(normalized_url, "")
|
||||
|
||||
|
||||
def resolve_peer_key_for_url(peer_url: str) -> bytes:
|
||||
"""Return the HMAC key for ``peer_url``, preferring per-peer secret.
|
||||
|
||||
Issue #256: this is the function every peer-push call site should
|
||||
use. It looks up the peer-specific secret first, falling back to the
|
||||
fleet-shared ``MESH_PEER_PUSH_SECRET`` only when the URL is NOT
|
||||
listed in ``MESH_PEER_SECRETS``.
|
||||
|
||||
Both sender (computing X-Peer-HMAC) and receiver (verifying it) call
|
||||
this with the SENDER's URL — they must derive the same key, so
|
||||
operators on both ends of a peering need matching MESH_PEER_SECRETS
|
||||
entries for that URL to stay in sync.
|
||||
|
||||
Returns empty bytes when no usable secret exists. Callers must treat
|
||||
that as fail-closed (skip the push, reject the verification).
|
||||
"""
|
||||
normalized_url = normalize_peer_url(peer_url)
|
||||
if not normalized_url:
|
||||
return b""
|
||||
|
||||
per_peer_secret = _lookup_per_peer_secret(normalized_url)
|
||||
if per_peer_secret:
|
||||
return _derive_peer_key(per_peer_secret, normalized_url)
|
||||
|
||||
# No per-peer entry for this URL — fall back to the legacy global
|
||||
# secret. This is what preserves zero-hostility for single-peer
|
||||
# installs and the migration window for multi-peer installs.
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
from services.mesh.mesh_fleet_defaults import effective_peer_push_secret
|
||||
|
||||
global_secret = effective_peer_push_secret()
|
||||
except Exception:
|
||||
return b""
|
||||
if not global_secret:
|
||||
return b""
|
||||
return _derive_peer_key(global_secret, normalized_url)
|
||||
|
||||
|
||||
def _node_digest(public_key_b64: str) -> str:
|
||||
raw = base64.b64decode(public_key_b64)
|
||||
return hashlib.sha256(raw).hexdigest()
|
||||
|
||||
@@ -0,0 +1,179 @@
|
||||
"""Invite-scoped DM connect delivery: auto relay release and contact severance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
CONNECT_AUTO_RELEASE_INTENTS = frozenset(
|
||||
{
|
||||
"invite_short_address",
|
||||
"invite_import",
|
||||
"contact_request",
|
||||
"contact_accept",
|
||||
"contact_offer",
|
||||
}
|
||||
)
|
||||
|
||||
INVITE_CONNECT_TRUST_LEVELS = frozenset({"invite_pinned", "sas_verified"})
|
||||
|
||||
|
||||
def _release_profile() -> str:
|
||||
try:
|
||||
from services.release_profiles import current_release_profile
|
||||
|
||||
return str(current_release_profile() or "dev")
|
||||
except Exception:
|
||||
return "dev"
|
||||
|
||||
|
||||
def grant_connect_relay_policy(
|
||||
recipient_id: str,
|
||||
*,
|
||||
reason: str = "connect_scoped_auto_release",
|
||||
) -> dict[str, Any]:
|
||||
"""Pre-authorize hidden relay delivery for an explicit connect target."""
|
||||
peer_key = str(recipient_id or "").strip()
|
||||
if not peer_key:
|
||||
return {"ok": False, "detail": "recipient_id required"}
|
||||
try:
|
||||
from services.mesh.mesh_relay_policy import grant_relay_policy
|
||||
|
||||
return grant_relay_policy(
|
||||
scope_type="dm_contact",
|
||||
scope_id=peer_key,
|
||||
profile=_release_profile(),
|
||||
hidden_transport_required=True,
|
||||
reason=str(reason or "connect_scoped_auto_release"),
|
||||
)
|
||||
except Exception as exc:
|
||||
return {"ok": False, "detail": str(exc) or type(exc).__name__}
|
||||
|
||||
|
||||
def revoke_connect_relay_policy(recipient_id: str) -> dict[str, Any]:
|
||||
peer_key = str(recipient_id or "").strip()
|
||||
if not peer_key:
|
||||
return {"ok": False, "detail": "recipient_id required"}
|
||||
try:
|
||||
from services.mesh.mesh_relay_policy import revoke_relay_policy
|
||||
|
||||
revoked = int(
|
||||
revoke_relay_policy(
|
||||
scope_type="dm_contact",
|
||||
scope_id=peer_key,
|
||||
profile=_release_profile(),
|
||||
)
|
||||
or 0
|
||||
)
|
||||
return {"ok": True, "revoked": revoked}
|
||||
except Exception as exc:
|
||||
return {"ok": False, "detail": str(exc) or type(exc).__name__}
|
||||
|
||||
|
||||
def recipient_has_invite_connect_scope(recipient_id: str) -> bool:
|
||||
peer_key = str(recipient_id or "").strip()
|
||||
if not peer_key:
|
||||
return False
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_contacts import get_wormhole_dm_contact
|
||||
|
||||
contact = get_wormhole_dm_contact(peer_key) or {}
|
||||
except Exception:
|
||||
return False
|
||||
if str(contact.get("invitePinnedPrekeyLookupHandle", "") or "").strip():
|
||||
return True
|
||||
if str(contact.get("invitePinnedLookupPeerUrl", "") or "").strip():
|
||||
return True
|
||||
trust = str(contact.get("trust_level", "") or "").strip().lower()
|
||||
return trust in INVITE_CONNECT_TRUST_LEVELS
|
||||
|
||||
|
||||
def relay_push_peer_urls_for_payload(payload: dict[str, Any]) -> list[str]:
|
||||
urls: list[str] = []
|
||||
for raw in list(payload.get("relay_push_peer_urls") or []):
|
||||
normalized = str(raw or "").strip().rstrip("/")
|
||||
if normalized and normalized not in urls:
|
||||
urls.append(normalized)
|
||||
lookup_peer_url = str(payload.get("lookup_peer_url", "") or "").strip().rstrip("/")
|
||||
if lookup_peer_url:
|
||||
urls = [url for url in urls if url != lookup_peer_url]
|
||||
urls.insert(0, lookup_peer_url)
|
||||
recipient_id = str(payload.get("recipient_id", "") or "").strip()
|
||||
if recipient_id and not urls:
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_contacts import get_wormhole_dm_contact
|
||||
|
||||
contact = get_wormhole_dm_contact(recipient_id) or {}
|
||||
pinned = str(contact.get("invitePinnedLookupPeerUrl", "") or "").strip().rstrip("/")
|
||||
if pinned:
|
||||
urls.append(pinned)
|
||||
except Exception:
|
||||
pass
|
||||
return urls
|
||||
|
||||
|
||||
def should_auto_release_dm_payload(payload: dict[str, Any]) -> bool:
|
||||
if str(payload.get("delivery_class", "") or "").strip().lower() != "request":
|
||||
return False
|
||||
intent = str(payload.get("connect_intent", "") or "").strip().lower()
|
||||
if intent in CONNECT_AUTO_RELEASE_INTENTS:
|
||||
return True
|
||||
if str(payload.get("lookup_peer_url", "") or "").strip():
|
||||
return True
|
||||
recipient_id = str(payload.get("recipient_id", "") or "").strip()
|
||||
return bool(recipient_id and recipient_has_invite_connect_scope(recipient_id))
|
||||
|
||||
|
||||
def enrich_connect_release_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Attach invite-owner relay hints used during private release."""
|
||||
enriched = dict(payload or {})
|
||||
recipient_id = str(enriched.get("recipient_id", "") or "").strip()
|
||||
lookup_peer_url = str(enriched.get("lookup_peer_url", "") or "").strip().rstrip("/")
|
||||
if not lookup_peer_url and recipient_id:
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_contacts import get_wormhole_dm_contact
|
||||
|
||||
contact = get_wormhole_dm_contact(recipient_id) or {}
|
||||
lookup_peer_url = str(contact.get("invitePinnedLookupPeerUrl", "") or "").strip().rstrip("/")
|
||||
except Exception:
|
||||
lookup_peer_url = ""
|
||||
if lookup_peer_url:
|
||||
enriched["lookup_peer_url"] = lookup_peer_url
|
||||
push_urls = relay_push_peer_urls_for_payload(enriched)
|
||||
if push_urls:
|
||||
enriched["relay_push_peer_urls"] = push_urls
|
||||
return enriched
|
||||
|
||||
|
||||
def auto_release_connect_dm_outbox(*, outbox_id: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Grant scoped relay policy and approve release for invite-scoped connect traffic."""
|
||||
normalized_outbox = str(outbox_id or "").strip()
|
||||
enriched = enrich_connect_release_payload(payload)
|
||||
if not normalized_outbox:
|
||||
return {"ok": False, "detail": "missing outbox_id"}
|
||||
if not should_auto_release_dm_payload(enriched):
|
||||
return {"ok": True, "skipped": True, "reason": "not_connect_scoped"}
|
||||
recipient_id = str(enriched.get("recipient_id", "") or "").strip()
|
||||
if not recipient_id:
|
||||
return {"ok": False, "detail": "missing recipient_id"}
|
||||
grant = grant_connect_relay_policy(recipient_id)
|
||||
try:
|
||||
from services.mesh.mesh_private_outbox import private_delivery_outbox
|
||||
from services.mesh.mesh_private_release_worker import private_release_worker
|
||||
|
||||
private_delivery_outbox.approve_relay_release(normalized_outbox)
|
||||
private_release_worker.ensure_started()
|
||||
private_release_worker.wake()
|
||||
except Exception as exc:
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": str(exc) or type(exc).__name__,
|
||||
"grant": grant,
|
||||
}
|
||||
return {
|
||||
"ok": True,
|
||||
"auto_released": True,
|
||||
"outbox_id": normalized_outbox,
|
||||
"recipient_id": recipient_id,
|
||||
"grant": grant,
|
||||
"relay_push_peer_urls": relay_push_peer_urls_for_payload(enriched),
|
||||
}
|
||||
@@ -317,6 +317,39 @@ class DMRelay:
|
||||
def _self_mailbox_limit(self) -> int:
|
||||
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
||||
|
||||
def _per_sender_pending_limit(self) -> int:
|
||||
"""Anti-spam cap on UNACKED messages a single sender can have parked
|
||||
in a single recipient mailbox at any one time. See ``config.py``
|
||||
``MESH_DM_PENDING_PER_SENDER_LIMIT`` for the threat model — this
|
||||
rule is enforced both at ``deposit`` (local) and at
|
||||
``accept_replica`` (peer push acceptance), making it a network
|
||||
rule rather than a client-side honor system."""
|
||||
try:
|
||||
limit = int(getattr(self._settings(), "MESH_DM_PENDING_PER_SENDER_LIMIT", 2) or 2)
|
||||
except (TypeError, ValueError):
|
||||
limit = 2
|
||||
return max(1, limit)
|
||||
|
||||
def _per_sender_pending_count(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
sender_block_ref: str,
|
||||
) -> int:
|
||||
"""Count UNACKED messages from ``sender_block_ref`` currently parked
|
||||
in ``mailbox_key``. Caller already holds ``self._lock``.
|
||||
|
||||
Messages that have been claimed/acked are removed from the mailbox
|
||||
list (see ``claim_message_ids``), so anything still here is by
|
||||
definition unacked. We count by exact ``sender_block_ref`` match
|
||||
— that's the per-pair sender identity used for blocking too, so
|
||||
the cap is naturally per-(sender, recipient).
|
||||
"""
|
||||
if not mailbox_key or not sender_block_ref:
|
||||
return 0
|
||||
messages = self._mailboxes.get(mailbox_key, [])
|
||||
return sum(1 for m in messages if m.sender_block_ref == sender_block_ref)
|
||||
|
||||
def _nonce_ttl_seconds(self) -> int:
|
||||
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
||||
|
||||
@@ -1473,6 +1506,7 @@ class DMRelay:
|
||||
sender_token_hash: str = "",
|
||||
payload_format: str = "dm1",
|
||||
session_welcome: str = "",
|
||||
replication_peer_urls: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
@@ -1515,32 +1549,426 @@ class DMRelay:
|
||||
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
||||
metrics_inc("dm_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
# Anti-spam: per-(sender, recipient) cap on unacked messages.
|
||||
# A sender who already has the configured number of messages
|
||||
# parked in this mailbox can't deposit more until the recipient
|
||||
# pulls (acks) at least one. The same cap is re-enforced on
|
||||
# inbound replication in ``accept_replica`` so this rule isn't
|
||||
# bypassable by patching out the local check on a hostile
|
||||
# sender's relay — see config.py
|
||||
# MESH_DM_PENDING_PER_SENDER_LIMIT for the threat model.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_drop_per_sender_cap")
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
f"Recipient already has {pending} unread message"
|
||||
f"{'s' if pending != 1 else ''} from you. Wait for "
|
||||
"them to read your messages before sending more."
|
||||
),
|
||||
}
|
||||
if not msg_id:
|
||||
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
||||
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
relay_sender_id = (
|
||||
f"sender_token:{sender_token_hash}"
|
||||
if sender_token_hash
|
||||
else sender_id
|
||||
duplicate_hit = any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key])
|
||||
if not duplicate_hit:
|
||||
relay_sender_id = (
|
||||
f"sender_token:{sender_token_hash}"
|
||||
if sender_token_hash
|
||||
else sender_id
|
||||
)
|
||||
self._mailboxes[mailbox_key].append(
|
||||
DMMessage(
|
||||
sender_id=relay_sender_id,
|
||||
ciphertext=ciphertext,
|
||||
timestamp=time.time(),
|
||||
msg_id=msg_id,
|
||||
delivery_class=delivery_class,
|
||||
sender_seal=sender_seal,
|
||||
sender_block_ref=sender_block_ref,
|
||||
payload_format=str(payload_format or "dm1"),
|
||||
session_welcome=str(session_welcome or ""),
|
||||
)
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
preferred_urls = list(replication_peer_urls or [])
|
||||
envelope_for_push: dict[str, Any] | None = None
|
||||
try:
|
||||
envelope_for_push = self.envelope_for_replication(
|
||||
mailbox_key=mailbox_key,
|
||||
msg_id=msg_id,
|
||||
recipient_id=recipient_id,
|
||||
recipient_token=recipient_token,
|
||||
)
|
||||
except Exception:
|
||||
metrics_inc("dm_replication_push_error")
|
||||
deposit_result = {"ok": True, "msg_id": msg_id}
|
||||
if duplicate_hit:
|
||||
deposit_result["duplicate"] = True
|
||||
|
||||
if envelope_for_push:
|
||||
# Invite-scoped connect traffic names an explicit recipient relay
|
||||
# (lookup_peer_url). Block until that push completes so the
|
||||
# recipient can poll their own node; fleet-wide fan-out stays
|
||||
# async so dead manifest peers cannot wedge deposit().
|
||||
if preferred_urls:
|
||||
logger.info(
|
||||
"DM deposit awaiting scoped replicate to %d peer(s)",
|
||||
len(preferred_urls),
|
||||
)
|
||||
deposit_result["replicate"] = self._replicate_envelope_to_peers(
|
||||
envelope=envelope_for_push,
|
||||
preferred_peer_urls=preferred_urls,
|
||||
)
|
||||
else:
|
||||
self._replicate_envelope_to_peers_async(
|
||||
envelope=envelope_for_push,
|
||||
preferred_peer_urls=[],
|
||||
)
|
||||
elif preferred_urls:
|
||||
logger.warning(
|
||||
"DM deposit skipped scoped replicate: envelope missing for msg_id=%s",
|
||||
msg_id,
|
||||
)
|
||||
return deposit_result
|
||||
|
||||
def _replicate_envelope_to_peers(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
preferred_peer_urls: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Push an envelope to relay peers. Returns per-peer results."""
|
||||
import hashlib
|
||||
import hmac
|
||||
import requests as _requests
|
||||
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_router import authenticated_push_peer_urls
|
||||
|
||||
peers: list[str] = []
|
||||
for raw_url in list(preferred_peer_urls or []):
|
||||
normalized_preferred = normalize_peer_url(str(raw_url or "").strip())
|
||||
if normalized_preferred and normalized_preferred not in peers:
|
||||
peers.append(normalized_preferred)
|
||||
if not peers:
|
||||
for peer_url in authenticated_push_peer_urls():
|
||||
normalized_peer = normalize_peer_url(str(peer_url or "").strip())
|
||||
if normalized_peer and normalized_peer not in peers:
|
||||
peers.append(normalized_peer)
|
||||
if not peers:
|
||||
return {"ok": False, "detail": "no_relay_peers", "pushed": [], "failed": []}
|
||||
|
||||
logger.info(
|
||||
"DM replicate push starting for %d peer(s): %s",
|
||||
len(peers),
|
||||
", ".join(peers[:3]) + ("..." if len(peers) > 3 else ""),
|
||||
)
|
||||
|
||||
payload = json.dumps(
|
||||
{"envelope": envelope},
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
).encode("utf-8")
|
||||
|
||||
base_timeout = max(
|
||||
1,
|
||||
int(getattr(self._settings(), "MESH_RELAY_PUSH_TIMEOUT_S", 10) or 10),
|
||||
)
|
||||
|
||||
from main import _infonet_peer_requests_proxies
|
||||
|
||||
preferred_set = {
|
||||
normalize_peer_url(str(raw_url or "").strip())
|
||||
for raw_url in list(preferred_peer_urls or [])
|
||||
}
|
||||
preferred_set.discard("")
|
||||
|
||||
pushed: list[str] = []
|
||||
failed: list[dict[str, str]] = []
|
||||
for peer_url in peers:
|
||||
try:
|
||||
normalized = normalize_peer_url(peer_url)
|
||||
timeout = max(180 if ".onion" in normalized else 1, base_timeout)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key, payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
url = f"{peer_url}/api/mesh/dm/replicate-envelope"
|
||||
request_kwargs: dict[str, Any] = {
|
||||
"data": payload,
|
||||
"timeout": timeout,
|
||||
"headers": headers,
|
||||
}
|
||||
proxies = _infonet_peer_requests_proxies(normalized)
|
||||
if proxies:
|
||||
request_kwargs["proxies"] = proxies
|
||||
resp = None
|
||||
max_attempts = 3 if normalized in preferred_set else 2
|
||||
last_exc = ""
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
resp = _requests.post(url, **request_kwargs)
|
||||
break
|
||||
except Exception as exc:
|
||||
last_exc = str(exc) or type(exc).__name__
|
||||
if attempt + 1 < max_attempts:
|
||||
time.sleep(5.0 * (attempt + 1))
|
||||
continue
|
||||
logger.warning(
|
||||
"DM replicate push to %s failed: %s",
|
||||
peer_url,
|
||||
last_exc,
|
||||
)
|
||||
metrics_inc("dm_replication_push_error")
|
||||
resp = None
|
||||
break
|
||||
if resp is None:
|
||||
failed.append({"url": peer_url, "detail": last_exc or "request_failed"})
|
||||
continue
|
||||
if resp.status_code == 200:
|
||||
body_ok = True
|
||||
detail = ""
|
||||
try:
|
||||
body = resp.json()
|
||||
if isinstance(body, dict) and body.get("ok") is False:
|
||||
body_ok = False
|
||||
detail = str(body.get("detail", "") or "replicate rejected")[:200]
|
||||
except Exception:
|
||||
body_ok = True
|
||||
if body_ok:
|
||||
logger.info("DM replicate push to %s succeeded", peer_url)
|
||||
metrics_inc("dm_replication_push_ok")
|
||||
pushed.append(peer_url)
|
||||
else:
|
||||
logger.warning(
|
||||
"DM replicate push to %s rejected: %s",
|
||||
peer_url,
|
||||
detail,
|
||||
)
|
||||
metrics_inc("dm_replication_push_rejected")
|
||||
failed.append({"url": peer_url, "detail": detail or "replicate_rejected"})
|
||||
else:
|
||||
detail = (resp.text or "")[:200]
|
||||
logger.warning(
|
||||
"DM replicate push to %s -> %s: %s",
|
||||
peer_url,
|
||||
resp.status_code,
|
||||
detail,
|
||||
)
|
||||
metrics_inc("dm_replication_push_rejected")
|
||||
failed.append({"url": peer_url, "detail": f"http_{resp.status_code}: {detail}"})
|
||||
except Exception as exc:
|
||||
logger.warning("DM replicate push outer failure for %s: %s", peer_url, exc)
|
||||
metrics_inc("dm_replication_push_error")
|
||||
failed.append({"url": peer_url, "detail": str(exc) or type(exc).__name__})
|
||||
|
||||
scoped = bool(preferred_set)
|
||||
ok = bool(pushed) if scoped else bool(pushed) or not failed
|
||||
return {
|
||||
"ok": ok,
|
||||
"scoped": scoped,
|
||||
"pushed": pushed,
|
||||
"failed": failed,
|
||||
}
|
||||
|
||||
def accept_replica(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
originating_peer_url: str = "",
|
||||
) -> dict[str, Any]:
|
||||
"""Receive a DM envelope replicated from a peer relay.
|
||||
|
||||
Cross-node mailbox replication entry point. When a sender's local
|
||||
relay accepts a ``deposit`` and pushes the envelope to
|
||||
``MESH_RELAY_PEERS`` (so the recipient can log into any peer
|
||||
node and find their messages), each receiving peer calls
|
||||
``accept_replica`` to ingest it.
|
||||
|
||||
The per-(sender, recipient) cap is re-enforced HERE. That's what
|
||||
makes the rule a NETWORK rule rather than a client-side honor
|
||||
system: a hostile sender who patches out the local ``deposit``
|
||||
check still can't get a 3rd unacked message to spread, because
|
||||
every honest peer enforces the same cap on inbound replicas.
|
||||
Result: hostile relays can hold extras locally, but those extras
|
||||
never reach any node a legitimate recipient is polling from.
|
||||
|
||||
Returns the same shape as ``deposit`` so the calling endpoint can
|
||||
forward the result back to the originating peer.
|
||||
"""
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
msg_id = str(envelope.get("msg_id", "") or "").strip()
|
||||
mailbox_key = str(envelope.get("mailbox_key", "") or "").strip()
|
||||
sender_block_ref = str(envelope.get("sender_block_ref", "") or "").strip()
|
||||
ciphertext = str(envelope.get("ciphertext", "") or "")
|
||||
delivery_class = str(envelope.get("delivery_class", "") or "").strip().lower()
|
||||
recipient_id = str(envelope.get("recipient_id", "") or "").strip()
|
||||
recipient_token = str(envelope.get("recipient_token", "") or "").strip()
|
||||
if delivery_class not in ("request", "shared", "self"):
|
||||
if recipient_id and not recipient_token:
|
||||
delivery_class = "request"
|
||||
elif recipient_token:
|
||||
delivery_class = "shared"
|
||||
if delivery_class == "request":
|
||||
if not recipient_id:
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_persona import get_dm_identity
|
||||
|
||||
recipient_id = str((get_dm_identity() or {}).get("node_id") or "").strip()
|
||||
except Exception:
|
||||
recipient_id = ""
|
||||
if recipient_id:
|
||||
mailbox_key = self.mailbox_key_for_delivery(
|
||||
recipient_id=recipient_id,
|
||||
delivery_class="request",
|
||||
)
|
||||
elif delivery_class == "shared" and recipient_token:
|
||||
mailbox_key = self.mailbox_key_for_delivery(
|
||||
recipient_id=recipient_id,
|
||||
delivery_class="shared",
|
||||
recipient_token=recipient_token,
|
||||
)
|
||||
if not msg_id or not mailbox_key or not sender_block_ref or not ciphertext:
|
||||
return {"ok": False, "detail": "envelope missing required fields"}
|
||||
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
self._cleanup_expired()
|
||||
|
||||
# Idempotent — if we already hold this exact msg_id, the
|
||||
# replication round-tripped or a peer pushed the same
|
||||
# envelope through multiple paths. Accept silently.
|
||||
if any(m.msg_id == msg_id for m in self._mailboxes.get(mailbox_key, [])):
|
||||
metrics_inc("dm_replica_duplicate")
|
||||
return {"ok": True, "msg_id": msg_id, "duplicate": True}
|
||||
|
||||
# Same per-class cap as the deposit path — defense in depth
|
||||
# against a peer that wraps a "deposit" as a "replica" to
|
||||
# bypass the class limit.
|
||||
if delivery_class in ("request", "shared", "self"):
|
||||
class_limit = self._mailbox_limit_for_class(delivery_class)
|
||||
else:
|
||||
class_limit = self._shared_mailbox_limit()
|
||||
if len(self._mailboxes.get(mailbox_key, [])) >= class_limit:
|
||||
metrics_inc("dm_replica_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
|
||||
# THE network rule: per-(sender, recipient) anti-spam cap.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_replica_drop_per_sender_cap")
|
||||
# Returning a structured rejection — the sender's relay
|
||||
# learns its envelope was rejected by an honest peer and
|
||||
# can stop trying to push it.
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
"Per-sender cap reached on this relay; refusing replica"
|
||||
),
|
||||
"cap_violation": True,
|
||||
"pending": pending,
|
||||
"limit": per_sender_limit,
|
||||
}
|
||||
|
||||
# Accept the replica into the local mailbox.
|
||||
self._mailboxes[mailbox_key].append(
|
||||
DMMessage(
|
||||
sender_id=relay_sender_id,
|
||||
sender_id=str(envelope.get("sender_id", "") or ""),
|
||||
ciphertext=ciphertext,
|
||||
timestamp=time.time(),
|
||||
timestamp=float(envelope.get("timestamp", time.time()) or time.time()),
|
||||
msg_id=msg_id,
|
||||
delivery_class=delivery_class,
|
||||
sender_seal=sender_seal,
|
||||
delivery_class=str(envelope.get("delivery_class", "shared") or "shared"),
|
||||
sender_seal=str(envelope.get("sender_seal", "") or ""),
|
||||
relay_salt=str(envelope.get("relay_salt", "") or ""),
|
||||
sender_block_ref=sender_block_ref,
|
||||
payload_format=str(payload_format or "dm1"),
|
||||
session_welcome=str(session_welcome or ""),
|
||||
payload_format=str(envelope.get("payload_format", "dm1") or "dm1"),
|
||||
session_welcome=str(envelope.get("session_welcome", "") or ""),
|
||||
)
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
metrics_inc("dm_replica_accepted")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def _replicate_envelope_to_peers_async(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
preferred_peer_urls: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget fleet-wide replicate push (non-scoped traffic)."""
|
||||
import threading
|
||||
|
||||
def _do_push() -> None:
|
||||
try:
|
||||
self._replicate_envelope_to_peers(
|
||||
envelope=envelope,
|
||||
preferred_peer_urls=preferred_peer_urls,
|
||||
)
|
||||
except Exception:
|
||||
metrics_inc("dm_replication_push_error")
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_do_push,
|
||||
name="dm-replicate-push",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
def envelope_for_replication(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
msg_id: str,
|
||||
recipient_id: str = "",
|
||||
recipient_token: str | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return the wire-form envelope for a stored message, suitable
|
||||
for POSTing to a peer relay's replicate-envelope endpoint.
|
||||
|
||||
Returns ``None`` if the message isn't in the mailbox (already
|
||||
acked, expired, never existed). The caller holds the
|
||||
responsibility for transport security (Tor SOCKS for .onion
|
||||
peers, per-peer HMAC) and for not leaking the envelope to
|
||||
clearnet peers when private transport is required.
|
||||
"""
|
||||
with self._lock:
|
||||
for m in self._mailboxes.get(mailbox_key, []):
|
||||
if m.msg_id == msg_id:
|
||||
return {
|
||||
"msg_id": m.msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"recipient_id": str(recipient_id or "").strip(),
|
||||
"recipient_token": str(recipient_token or "").strip(),
|
||||
"sender_id": m.sender_id,
|
||||
"sender_block_ref": m.sender_block_ref,
|
||||
"sender_seal": m.sender_seal,
|
||||
"ciphertext": m.ciphertext,
|
||||
"timestamp": m.timestamp,
|
||||
"delivery_class": m.delivery_class,
|
||||
"relay_salt": m.relay_salt,
|
||||
"payload_format": m.payload_format,
|
||||
"session_welcome": m.session_welcome,
|
||||
}
|
||||
return None
|
||||
|
||||
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Public Infonet fleet defaults for sb-testnet-0 participants.
|
||||
|
||||
Operators who run private single-node installs can set ``MESH_INFONET_FLEET_JOIN=false``
|
||||
and provide their own signer keys / peer secrets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
FLEET_NETWORK_ID = "sb-testnet-0"
|
||||
FLEET_SEED_ONION_URL = (
|
||||
"http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
)
|
||||
FLEET_BOOTSTRAP_SIGNER_PUBLIC_KEY_B64 = (
|
||||
"ul1d0kj/ODPIp0OhHzX8eLAVXzJ3CVvzW1vn2IC6q3I="
|
||||
)
|
||||
# Shared fleet HMAC for sb-testnet peer announce/push/sync. Public testnet join model.
|
||||
FLEET_PEER_PUSH_SECRET = "b7GoqsvoUD9MV7tyt0ZOzMptLA84QG6KCfaV9nDqz5Y"
|
||||
|
||||
|
||||
def infonet_fleet_join_enabled() -> bool:
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
if bool(getattr(get_settings(), "MESH_INFONET_FLEET_JOIN_DISABLED", False)):
|
||||
return False
|
||||
return bool(getattr(get_settings(), "MESH_INFONET_FLEET_JOIN", True))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
def effective_bootstrap_signer_public_key_b64() -> str:
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
configured = str(getattr(get_settings(), "MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY", "") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
except Exception:
|
||||
pass
|
||||
if infonet_fleet_join_enabled():
|
||||
return FLEET_BOOTSTRAP_SIGNER_PUBLIC_KEY_B64
|
||||
return ""
|
||||
|
||||
|
||||
def effective_peer_push_secret() -> str:
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
configured = str(getattr(get_settings(), "MESH_PEER_PUSH_SECRET", "") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
except Exception:
|
||||
pass
|
||||
if infonet_fleet_join_enabled():
|
||||
return FLEET_PEER_PUSH_SECRET
|
||||
return ""
|
||||
|
||||
|
||||
def configured_bootstrap_seed_peers_with_fleet_default(peers: list[str]) -> list[str]:
|
||||
if peers:
|
||||
return peers
|
||||
if infonet_fleet_join_enabled():
|
||||
return [FLEET_SEED_ONION_URL]
|
||||
return []
|
||||
@@ -33,8 +33,9 @@ Each event contains:
|
||||
|
||||
Persistence: JSON file at backend/data/infonet.json
|
||||
|
||||
Encrypted gate chat events are intentionally kept off the public chain and
|
||||
persisted separately via GateMessageStore.
|
||||
Encrypted gate chat events are private-chain ciphertext records. They are
|
||||
excluded from public read surfaces and replicated only over private Infonet
|
||||
transports.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -64,6 +65,8 @@ from services.mesh.mesh_schema import (
|
||||
ACTIVE_PUBLIC_LEDGER_EVENT_TYPES,
|
||||
PUBLIC_LEDGER_EVENT_TYPES,
|
||||
validate_event_payload,
|
||||
validate_private_dm_ledger_payload,
|
||||
validate_private_gate_ledger_payload,
|
||||
validate_protocol_fields,
|
||||
validate_public_ledger_payload,
|
||||
)
|
||||
@@ -127,6 +130,12 @@ GATE_SEGMENT_MAX_COMPRESSED_BYTES = max(
|
||||
int(os.environ.get("MESH_GATE_SEGMENT_MAX_COMPRESSED_BYTES", str(2 * 1024 * 1024)) or str(2 * 1024 * 1024)),
|
||||
)
|
||||
GATE_SEGMENT_STORAGE_VERSION = 1
|
||||
DM_HASHCHAIN_SPOOL_LIMIT = max(1, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_LIMIT", "2") or "2"))
|
||||
DM_HASHCHAIN_SPOOL_SENDER_LIMIT = max(
|
||||
1,
|
||||
int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_SENDER_LIMIT", "1") or "1"),
|
||||
)
|
||||
DM_HASHCHAIN_SPOOL_TTL_S = max(60, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_TTL_S", "3600") or "3600"))
|
||||
_PUBLIC_EVENT_APPEND_HOOKS: list[Any] = []
|
||||
_PUBLIC_EVENT_APPEND_HOOKS_LOCK = threading.Lock()
|
||||
|
||||
@@ -216,18 +225,19 @@ def _peer_pair_ref_key(peer_url: str) -> bytes:
|
||||
Returns an empty key on misconfiguration so callers fail closed.
|
||||
"""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
except Exception:
|
||||
return b""
|
||||
if not secret:
|
||||
return b""
|
||||
normalized = normalize_peer_url(peer_url or "")
|
||||
if not normalized:
|
||||
return b""
|
||||
peer_key = _derive_peer_key(secret, normalized)
|
||||
# Issue #256: resolve_peer_key_for_url() prefers per-peer secrets
|
||||
# from MESH_PEER_SECRETS and falls back to the global
|
||||
# MESH_PEER_PUSH_SECRET only when the URL has no per-peer entry.
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if not peer_key:
|
||||
return b""
|
||||
# Domain-separate from the transport HMAC key so the two
|
||||
@@ -339,6 +349,32 @@ def _private_gate_event_id(
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def _private_gate_signature_payload_variants(gate_id: str, event: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
payload = _private_gate_signature_payload(gate_id, event)
|
||||
variants: list[dict[str, Any]] = [payload]
|
||||
event_payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
reply_to = str(event_payload.get("reply_to", "") or "").strip()
|
||||
if reply_to:
|
||||
variants.append(_private_gate_signature_payload(gate_id, event, include_reply_to=False))
|
||||
if "epoch" in payload:
|
||||
no_epoch = dict(payload)
|
||||
no_epoch.pop("epoch", None)
|
||||
variants.append(no_epoch)
|
||||
if reply_to:
|
||||
no_epoch_no_reply = _private_gate_signature_payload(gate_id, event, include_reply_to=False)
|
||||
no_epoch_no_reply.pop("epoch", None)
|
||||
variants.append(no_epoch_no_reply)
|
||||
deduped: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for variant in variants:
|
||||
material = json.dumps(variant, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if material in seen:
|
||||
continue
|
||||
seen.add(material)
|
||||
deduped.append(variant)
|
||||
return deduped
|
||||
|
||||
|
||||
def _sanitize_private_gate_event(gate_id: str, event: dict[str, Any]) -> dict[str, Any]:
|
||||
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
sanitized = {
|
||||
@@ -1567,11 +1603,18 @@ class Infonet:
|
||||
def _rebuild_state(self) -> None:
|
||||
self.event_index = {}
|
||||
self.node_sequences = {}
|
||||
# Keep private signed-write replay domains across public-chain
|
||||
# rebuilds; these domains protect local side effects that are not
|
||||
# represented as public Infonet events.
|
||||
if not isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
self.sequence_domains = {}
|
||||
# Keep private signed-write replay domains that are not represented
|
||||
# on-chain, but rebuild the gate_message sequence domain from chain
|
||||
# events so reloads/fork application do not mix it with public
|
||||
# per-node message sequences.
|
||||
preserved_domains = {}
|
||||
if isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
preserved_domains = {
|
||||
key: value
|
||||
for key, value in self.sequence_domains.items()
|
||||
if not str(key or "").endswith("|gate_message")
|
||||
}
|
||||
self.sequence_domains = dict(preserved_domains)
|
||||
self.public_key_bindings = {}
|
||||
self.revocations = {}
|
||||
self._replay_filter = ReplayFilter()
|
||||
@@ -1583,9 +1626,12 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(
|
||||
evt.get("event_type", ""), node_id
|
||||
)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence > last:
|
||||
self.node_sequences[node_id] = sequence
|
||||
sequence_table[sequence_key] = sequence
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
existing = self.public_key_bindings.get(public_key)
|
||||
@@ -1897,6 +1943,295 @@ class Infonet:
|
||||
self._save()
|
||||
return True, "ok"
|
||||
|
||||
def _sequence_table_for_event(self, event_type: str, node_id: str) -> tuple[dict[str, int], str]:
|
||||
normalized = str(event_type or "").strip().lower()
|
||||
if normalized == "gate_message":
|
||||
return self.sequence_domains, f"{node_id}|gate_message"
|
||||
if normalized == "dm_message":
|
||||
return self.sequence_domains, f"{node_id}|dm_message"
|
||||
return self.node_sequences, node_id
|
||||
|
||||
def _dm_spool_target_key(self, payload: dict[str, Any]) -> tuple[str, str]:
|
||||
delivery_class = str(payload.get("delivery_class", "") or "").strip().lower()
|
||||
if delivery_class == "shared":
|
||||
key = str(payload.get("recipient_token", "") or "").strip()
|
||||
else:
|
||||
key = str(payload.get("recipient_id", "") or "").strip()
|
||||
return delivery_class, key
|
||||
|
||||
def _dm_spool_active_counts(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
sender_id: str = "",
|
||||
now: float | None = None,
|
||||
) -> tuple[int, int]:
|
||||
delivery_class, key = self._dm_spool_target_key(payload)
|
||||
if not key:
|
||||
return 0, 0
|
||||
sender_id = str(sender_id or "").strip()
|
||||
current = time.time() if now is None else float(now)
|
||||
total_count = 0
|
||||
sender_count = 0
|
||||
for evt in reversed(self.events):
|
||||
if evt.get("event_type") != "dm_message":
|
||||
continue
|
||||
evt_payload = evt.get("payload") if isinstance(evt.get("payload"), dict) else {}
|
||||
evt_delivery_class, evt_key = self._dm_spool_target_key(evt_payload)
|
||||
if evt_delivery_class != delivery_class:
|
||||
continue
|
||||
if evt_key != key:
|
||||
continue
|
||||
evt_ts = float(evt_payload.get("timestamp", evt.get("timestamp", 0)) or 0)
|
||||
if evt_ts > 0 and current - evt_ts > DM_HASHCHAIN_SPOOL_TTL_S:
|
||||
continue
|
||||
total_count += 1
|
||||
if sender_id and str(evt.get("node_id", "") or "").strip() == sender_id:
|
||||
sender_count += 1
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT and (
|
||||
not sender_id or sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT
|
||||
):
|
||||
break
|
||||
return total_count, sender_count
|
||||
|
||||
def _dm_spool_active_count(self, payload: dict[str, Any], *, now: float | None = None) -> int:
|
||||
total_count, _sender_count = self._dm_spool_active_counts(payload, now=now)
|
||||
return total_count
|
||||
|
||||
def append_private_dm_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted DM dead-drop message to the private Infonet ledger.
|
||||
|
||||
The event is a small offline spool, capped per mailbox target, so the
|
||||
hashchain can carry a couple of sealed DMs without becoming an
|
||||
unbounded global mailbox.
|
||||
"""
|
||||
event_type = "dm_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "plaintext" in raw_payload or "_local_plaintext" in raw_payload:
|
||||
raise ValueError("private DM ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("DM hashchain spool requires private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=node_id)
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
raise ValueError("DM hashchain sender spool full for recipient")
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
raise ValueError("DM hashchain spool full for recipient")
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
logger.info(
|
||||
f"Infonet append [dm_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append_private_gate_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted gate message to the private Infonet ledger.
|
||||
|
||||
Gate messages use their own sequence domain so a gate post cannot
|
||||
consume or replay-block the author's public broadcast sequence.
|
||||
"""
|
||||
event_type = "gate_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "_local_plaintext" in raw_payload or "_local_reply_to" in raw_payload:
|
||||
raise ValueError("private gate ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("gate messages require private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
event_for_signature = {"payload": payload}
|
||||
signature_ok = False
|
||||
for signature_payload in _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
event_for_signature,
|
||||
):
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
|
||||
logger.info(
|
||||
f"Infonet append [gate_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append(
|
||||
self,
|
||||
event_type: str,
|
||||
@@ -2077,6 +2412,18 @@ class Infonet:
|
||||
if not event_id or not prev_hash:
|
||||
rejected.append({"index": idx, "reason": "Missing event_id or prev_hash"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != expected_prev:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2095,25 +2442,14 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "network_id mismatch"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != self.head_hash:
|
||||
rejected.append({"index": idx, "reason": "prev_hash does not match head"})
|
||||
continue
|
||||
if sequence <= 0:
|
||||
rejected.append({"index": idx, "reason": "Invalid sequence"})
|
||||
continue
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
rejected.append({"index": idx, "reason": "Replay detected"})
|
||||
continue
|
||||
@@ -2148,7 +2484,18 @@ class Infonet:
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if ok:
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=str(evt.get("node_id", "") or ""))
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
ok, reason = False, "DM hashchain sender spool full for recipient"
|
||||
elif total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
ok, reason = False, "DM hashchain spool full for recipient"
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
@@ -2224,7 +2571,7 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "public key is revoked"})
|
||||
continue
|
||||
last_seq = self.node_sequences.get(node_id, 0)
|
||||
last_seq = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last_seq:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2260,18 +2607,30 @@ class Infonet:
|
||||
rejected.append({"index": idx, "reason": bind_reason})
|
||||
continue
|
||||
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
@@ -2301,7 +2660,7 @@ class Infonet:
|
||||
self.events.append(evt)
|
||||
self.event_index[event_id] = len(self.events) - 1
|
||||
self.head_hash = event_id
|
||||
self.node_sequences[node_id] = sequence
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._update_counters_for_event(evt)
|
||||
accepted += 1
|
||||
expected_prev = event_id
|
||||
@@ -2364,6 +2723,7 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
@@ -2374,21 +2734,41 @@ class Infonet:
|
||||
return False, f"public key binding conflict at index {i}"
|
||||
seen_public_keys[public_key] = node_id
|
||||
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, f"Invalid signature at index {i}"
|
||||
|
||||
prev = evt_dict["event_id"]
|
||||
@@ -2453,27 +2833,48 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
return False, f"node_id mismatch at index {i}"
|
||||
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, f"Invalid signature at index {i}"
|
||||
prev = evt_dict["event_id"]
|
||||
|
||||
@@ -2537,7 +2938,14 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
last_seq[node_id] = max(last_seq.get(node_id, 0), sequence)
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last_seq[sequence_key] = max(last_seq.get(sequence_key, 0), sequence)
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
seen_public_keys.setdefault(public_key, node_id)
|
||||
@@ -2557,8 +2965,21 @@ class Infonet:
|
||||
existing_idx = self.event_index.get(event_id)
|
||||
if existing_idx is not None and existing_idx <= prev_index:
|
||||
return False, "duplicate event_id"
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
if event_type == "gate_message":
|
||||
payload = dict(payload or {})
|
||||
elif event_type == "dm_message":
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
else:
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
ok, reason = validate_event_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
proto = evt.get("protocol_version") or PROTOCOL_VERSION
|
||||
@@ -2572,7 +2993,14 @@ class Infonet:
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked and event_type != "key_revoke":
|
||||
return False, "public key revoked"
|
||||
last = last_seq.get(node_id, 0)
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if event_type == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if event_type == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last = last_seq.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
return False, "sequence replay"
|
||||
from services.mesh.mesh_crypto import (
|
||||
@@ -2590,23 +3018,35 @@ class Infonet:
|
||||
if existing and existing != node_id:
|
||||
return False, "public key binding conflict"
|
||||
seen_public_keys[public_key] = node_id
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, "invalid signature"
|
||||
computed = ChainEvent.from_dict(evt).event_id
|
||||
if computed != event_id:
|
||||
return False, "event_id mismatch"
|
||||
last_seq[node_id] = sequence
|
||||
last_seq[sequence_key] = sequence
|
||||
|
||||
# Apply fork
|
||||
self.events = prefix + ordered
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
"""Auto-enable Tor wormhole transport on Infonet relay/seed nodes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from services.config import get_settings
|
||||
from services.wormhole_settings import read_wormhole_settings, write_wormhole_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def infonet_relay_auto_wormhole_requested() -> bool:
|
||||
settings = get_settings()
|
||||
if bool(settings.MESH_INFONET_RELAY_AUTO_WORMHOLE_DISABLED):
|
||||
return False
|
||||
if bool(settings.MESH_INFONET_RELAY_AUTO_WORMHOLE):
|
||||
return True
|
||||
if str(settings.MESH_BOOTSTRAP_SIGNER_PRIVATE_KEY or "").strip():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _relay_tor_wormhole_target_settings() -> dict[str, Any]:
|
||||
settings = get_settings()
|
||||
socks_port = int(settings.MESH_ARTI_SOCKS_PORT or 9050)
|
||||
return {
|
||||
"enabled": True,
|
||||
"transport": "tor_arti",
|
||||
"socks_proxy": f"socks5h://127.0.0.1:{socks_port}",
|
||||
"socks_dns": True,
|
||||
"anonymous_mode": True,
|
||||
}
|
||||
|
||||
|
||||
def _wormhole_settings_match(existing: dict[str, Any], target: dict[str, Any]) -> bool:
|
||||
return (
|
||||
bool(existing.get("enabled")) is bool(target["enabled"])
|
||||
and str(existing.get("transport", "")) == str(target["transport"])
|
||||
and str(existing.get("socks_proxy", "")) == str(target["socks_proxy"])
|
||||
and bool(existing.get("socks_dns", True)) is bool(target["socks_dns"])
|
||||
and bool(existing.get("anonymous_mode", False)) is bool(target["anonymous_mode"])
|
||||
)
|
||||
|
||||
|
||||
def ensure_infonet_relay_wormhole_ready(*, reason: str = "relay_auto") -> dict[str, Any]:
|
||||
"""Persist Tor wormhole settings and connect on relay/seed startup."""
|
||||
if not infonet_relay_auto_wormhole_requested():
|
||||
return {"ok": True, "skipped": True, "reason": "not_requested"}
|
||||
|
||||
from routers.ai_intel import _write_env_value
|
||||
from services.tor_hidden_service import tor_service
|
||||
from services.wormhole_supervisor import connect_wormhole, restart_wormhole
|
||||
|
||||
existing = read_wormhole_settings()
|
||||
target = _relay_tor_wormhole_target_settings()
|
||||
settings_updated = not _wormhole_settings_match(existing, target)
|
||||
updated = write_wormhole_settings(**target) if settings_updated else existing
|
||||
|
||||
tor_result: dict[str, Any] = {"ok": False, "detail": "not started"}
|
||||
try:
|
||||
tor_result = tor_service.start(target_port=8000)
|
||||
if tor_result.get("ok"):
|
||||
_write_env_value("MESH_ARTI_ENABLED", "true")
|
||||
get_settings.cache_clear()
|
||||
except Exception as exc:
|
||||
tor_result = {"ok": False, "detail": str(exc or type(exc).__name__)}
|
||||
|
||||
runtime = (
|
||||
restart_wormhole(reason=reason)
|
||||
if settings_updated
|
||||
else connect_wormhole(reason=reason)
|
||||
)
|
||||
|
||||
if settings_updated:
|
||||
logger.info("Infonet relay auto-wormhole enabled (%s)", reason)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"skipped": False,
|
||||
"settings_updated": settings_updated,
|
||||
"tor": tor_result,
|
||||
"runtime": runtime,
|
||||
"settings": updated,
|
||||
}
|
||||
@@ -2,10 +2,64 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from email.utils import parsedate_to_datetime
|
||||
from datetime import timezone
|
||||
|
||||
from services.mesh.mesh_peer_store import PeerRecord
|
||||
|
||||
|
||||
class PeerSyncRateLimited(Exception):
|
||||
"""Upstream peer returned HTTP 429 — Too Many Requests.
|
||||
|
||||
Carries the ``Retry-After`` header value (parsed to seconds) so
|
||||
the caller can pass it to ``finish_sync(retry_after_s=...)`` and
|
||||
actually wait that long instead of hammering the upstream every
|
||||
60s and keeping its rate-limit bucket full.
|
||||
|
||||
``retry_after_s`` is 0 when the upstream didn't provide a header.
|
||||
Caller should still apply the exponential backoff in that case.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after_s: int = 0, status: int = 429):
|
||||
super().__init__(message)
|
||||
self.retry_after_s = max(0, int(retry_after_s or 0))
|
||||
self.status = int(status or 429)
|
||||
|
||||
|
||||
def parse_retry_after_header(header_value: str, *, now: float | None = None) -> int:
|
||||
"""Parse the ``Retry-After`` HTTP header.
|
||||
|
||||
Two valid forms per RFC 7231 §7.1.3:
|
||||
|
||||
* Delay-seconds: a non-negative integer (e.g. ``Retry-After: 120``)
|
||||
* HTTP-date: an absolute time (e.g. ``Retry-After: Wed, 21 Oct 2026 07:28:00 GMT``)
|
||||
|
||||
Returns the wait in **seconds from now**. Unparseable / empty headers
|
||||
return 0 (caller falls back to exponential backoff). Clamped at a
|
||||
sane upper bound (1 hour) so a typo'd or hostile peer can't pin us
|
||||
silent for days.
|
||||
"""
|
||||
value = str(header_value or "").strip()
|
||||
if not value:
|
||||
return 0
|
||||
upper_bound = 3600 # never trust a peer to silence us > 1h
|
||||
# Form 1: pure integer seconds.
|
||||
if value.isdigit():
|
||||
return min(max(0, int(value)), upper_bound)
|
||||
# Form 2: HTTP-date.
|
||||
try:
|
||||
target = parsedate_to_datetime(value)
|
||||
if target is None:
|
||||
return 0
|
||||
if target.tzinfo is None:
|
||||
target = target.replace(tzinfo=timezone.utc)
|
||||
current = float(now if now is not None else time.time())
|
||||
delta = int(target.timestamp() - current)
|
||||
return min(max(0, delta), upper_bound)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SyncWorkerState:
|
||||
last_sync_started_at: int = 0
|
||||
@@ -72,6 +126,59 @@ def begin_sync(
|
||||
)
|
||||
|
||||
|
||||
def _failure_backoff_seconds(
|
||||
*,
|
||||
base_backoff_s: int,
|
||||
consecutive_failures: int,
|
||||
retry_after_s: int,
|
||||
cap_s: int = 1800,
|
||||
) -> int:
|
||||
"""Compute the next-attempt delay after a failed sync.
|
||||
|
||||
Two inputs combine:
|
||||
|
||||
* ``retry_after_s`` — when an upstream peer answered HTTP 429
|
||||
with a ``Retry-After`` header, we honor it exactly. Continuing
|
||||
to hammer the upstream every 60s is the bug this fix exists to
|
||||
close: it keeps the upstream's rate-limit bucket full
|
||||
indefinitely and no sync ever lands.
|
||||
|
||||
* Exponential growth on ``consecutive_failures`` — even without an
|
||||
explicit Retry-After, repeated failures should slow us down. The
|
||||
first failure waits ``base`` (preserves pre-fix behavior for
|
||||
one-off blips). Each subsequent failure doubles the wait, capped
|
||||
to ``cap_s`` (default 30 minutes). With base=60 and cap=1800,
|
||||
the schedule is 60s → 120s → 240s → 480s → 960s → 1800s →
|
||||
1800s → … .
|
||||
|
||||
The actual delay is the MAX of the two — whichever asks for more
|
||||
patience wins. ``retry_after_s == 0`` (no header) falls back to
|
||||
pure exponential. An aggressive ``Retry-After`` (say 600s while
|
||||
we're only at 1 failure) wins over the exponential ladder.
|
||||
"""
|
||||
base = max(0, int(base_backoff_s or 0))
|
||||
failures = max(0, int(consecutive_failures or 0))
|
||||
cap = max(0, int(cap_s or 0))
|
||||
retry_after = max(0, int(retry_after_s or 0))
|
||||
# ``cap_s=0`` explicitly disables the exponential ladder entirely
|
||||
# — operators who want the pre-fix "honor Retry-After only" behavior
|
||||
# can set this. The default cap of 1800s is what saturates the
|
||||
# ladder at the 5th-6th failure for base=60.
|
||||
if cap == 0:
|
||||
return retry_after
|
||||
# 2^(failures-1) — so failure #1 = base (preserves the pre-fix
|
||||
# default for transient blips), failure #2 = 2*base, etc. Cap on
|
||||
# the exponent (16) is defense against integer overflow on a
|
||||
# hostile or very large failures counter.
|
||||
if base > 0 and failures > 0:
|
||||
exponent = min(max(0, failures - 1), 16)
|
||||
grown = base * (2 ** exponent)
|
||||
else:
|
||||
grown = 0
|
||||
exponential = min(max(0, grown), cap)
|
||||
return max(exponential, retry_after)
|
||||
|
||||
|
||||
def finish_sync(
|
||||
state: SyncWorkerState,
|
||||
*,
|
||||
@@ -83,7 +190,26 @@ def finish_sync(
|
||||
now: float | None = None,
|
||||
interval_s: int = 300,
|
||||
failure_backoff_s: int = 60,
|
||||
retry_after_s: int = 0,
|
||||
failure_backoff_cap_s: int = 1800,
|
||||
) -> SyncWorkerState:
|
||||
"""Finalise a sync attempt and compute when the next one should run.
|
||||
|
||||
New args (added for the 429 retry storm fix):
|
||||
|
||||
* ``retry_after_s`` — if the peer responded with HTTP 429 + a
|
||||
``Retry-After`` header, pass that value here. ``finish_sync``
|
||||
will use ``max(exponential, retry_after_s)`` for the delay so
|
||||
we never hammer a peer that asked us to back off.
|
||||
* ``failure_backoff_cap_s`` — upper bound on the exponential
|
||||
ladder. Default 1800 (30 min) — keeps a sync queue from going
|
||||
silent for hours while still cutting the request rate to
|
||||
something the upstream can absorb.
|
||||
|
||||
The pre-fix behavior (constant 60s on every failure) is recoverable
|
||||
by passing ``failure_backoff_cap_s=0`` and ``retry_after_s=0``, but
|
||||
there's no reason to.
|
||||
"""
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
if ok:
|
||||
return SyncWorkerState(
|
||||
@@ -99,17 +225,25 @@ def finish_sync(
|
||||
consecutive_failures=0,
|
||||
)
|
||||
|
||||
next_failures = state.consecutive_failures + 1
|
||||
delay_s = _failure_backoff_seconds(
|
||||
base_backoff_s=failure_backoff_s,
|
||||
consecutive_failures=next_failures,
|
||||
retry_after_s=retry_after_s,
|
||||
cap_s=failure_backoff_cap_s,
|
||||
)
|
||||
|
||||
return SyncWorkerState(
|
||||
last_sync_started_at=state.last_sync_started_at,
|
||||
last_sync_finished_at=timestamp,
|
||||
last_sync_ok_at=state.last_sync_ok_at,
|
||||
next_sync_due_at=timestamp + max(0, int(failure_backoff_s or 0)),
|
||||
next_sync_due_at=timestamp + delay_s,
|
||||
last_peer_url=peer_url or state.last_peer_url,
|
||||
last_error=str(error or "").strip(),
|
||||
last_outcome="fork" if fork_detected else "error",
|
||||
current_head=current_head or state.current_head,
|
||||
fork_detected=bool(fork_detected),
|
||||
consecutive_failures=state.consecutive_failures + 1,
|
||||
consecutive_failures=next_failures,
|
||||
)
|
||||
|
||||
|
||||
@@ -142,5 +276,6 @@ def should_run_sync(
|
||||
) -> bool:
|
||||
current_time = int(now if now is not None else time.time())
|
||||
if state.last_outcome == "running":
|
||||
return False
|
||||
started_at = int(state.last_sync_started_at or 0)
|
||||
return started_at <= 0 or current_time - started_at >= 300
|
||||
return int(state.next_sync_due_at or 0) <= current_time
|
||||
|
||||
@@ -125,8 +125,8 @@ def dm_lookup_response_view(
|
||||
view.pop("lookup_mode", None)
|
||||
view.pop("removal_target", None)
|
||||
return view
|
||||
if invite_lookup:
|
||||
view.pop("agent_id", None)
|
||||
# Successful invite lookups keep agent_id: the handle is the capability and
|
||||
# first-contact messaging needs a delivery target. Failures stay generic.
|
||||
return view
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""Operator-signed peer registry for private Infonet swarm discovery."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from services.mesh.mesh_crypto import normalize_peer_url
|
||||
from services.mesh.mesh_router import peer_transport_kind
|
||||
|
||||
BACKEND_DIR = Path(__file__).resolve().parents[2]
|
||||
DATA_DIR = BACKEND_DIR / "data"
|
||||
DEFAULT_PEER_REGISTRY_PATH = DATA_DIR / "peer_registry.json"
|
||||
REGISTRY_VERSION = 1
|
||||
ALLOWED_REGISTRY_ROLES = {"participant", "relay", "seed"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegistryPeer:
|
||||
peer_url: str
|
||||
transport: str
|
||||
role: str
|
||||
node_id: str = ""
|
||||
label: str = ""
|
||||
announced_at: int = 0
|
||||
last_seen_at: int = 0
|
||||
failure_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
def manifest_peer(self) -> dict[str, str]:
|
||||
return {
|
||||
"peer_url": self.peer_url,
|
||||
"transport": self.transport,
|
||||
"role": self.role,
|
||||
"label": self.label or self.node_id[:16],
|
||||
}
|
||||
|
||||
|
||||
class PeerRegistry:
|
||||
def __init__(self, path: str | Path = DEFAULT_PEER_REGISTRY_PATH):
|
||||
self.path = Path(path)
|
||||
self._peers: dict[str, RegistryPeer] = {}
|
||||
|
||||
def load(self) -> list[RegistryPeer]:
|
||||
if not self.path.exists():
|
||||
self._peers = {}
|
||||
return []
|
||||
raw = json.loads(self.path.read_text(encoding="utf-8"))
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError("peer registry root must be an object")
|
||||
version = int(raw.get("version", 0) or 0)
|
||||
if version != REGISTRY_VERSION:
|
||||
raise ValueError(f"unsupported peer registry version: {version}")
|
||||
entries = raw.get("peers", [])
|
||||
if not isinstance(entries, list):
|
||||
raise ValueError("peer registry peers must be a list")
|
||||
peers: dict[str, RegistryPeer] = {}
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
peer = self._normalize_entry(entry)
|
||||
peers[peer.peer_url] = peer
|
||||
self._peers = peers
|
||||
return self.records()
|
||||
|
||||
def save(self) -> None:
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"version": REGISTRY_VERSION,
|
||||
"updated_at": int(time.time()),
|
||||
"peers": [peer.to_dict() for peer in self.records()],
|
||||
}
|
||||
self.path.write_text(
|
||||
json.dumps(payload, sort_keys=True, indent=2) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def records(self) -> list[RegistryPeer]:
|
||||
return sorted(self._peers.values(), key=lambda item: (item.role, item.peer_url))
|
||||
|
||||
def upsert_announcement(
|
||||
self,
|
||||
*,
|
||||
peer_url: str,
|
||||
transport: str,
|
||||
role: str,
|
||||
node_id: str = "",
|
||||
label: str = "",
|
||||
now: float | None = None,
|
||||
) -> RegistryPeer:
|
||||
normalized = normalize_peer_url(peer_url)
|
||||
if not normalized:
|
||||
raise ValueError("peer_url is required")
|
||||
resolved_transport = str(transport or "").strip().lower() or str(peer_transport_kind(normalized) or "")
|
||||
if resolved_transport not in {"onion", "clearnet"}:
|
||||
raise ValueError("unsupported peer transport")
|
||||
resolved_role = str(role or "participant").strip().lower()
|
||||
if resolved_role not in ALLOWED_REGISTRY_ROLES:
|
||||
raise ValueError("unsupported peer role")
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
existing = self._peers.get(normalized)
|
||||
peer = RegistryPeer(
|
||||
peer_url=normalized,
|
||||
transport=resolved_transport,
|
||||
role=resolved_role,
|
||||
node_id=str(node_id or (existing.node_id if existing else "") or "").strip(),
|
||||
label=str(label or (existing.label if existing else "") or "").strip(),
|
||||
announced_at=int(existing.announced_at if existing and existing.announced_at else timestamp),
|
||||
last_seen_at=timestamp,
|
||||
failure_count=int(existing.failure_count if existing else 0),
|
||||
)
|
||||
self._peers[normalized] = peer
|
||||
return peer
|
||||
|
||||
def prune_stale(self, *, max_age_s: int, now: float | None = None) -> int:
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
removed = 0
|
||||
for peer_url, peer in list(self._peers.items()):
|
||||
if peer.role == "seed":
|
||||
continue
|
||||
last_seen = int(peer.last_seen_at or peer.announced_at or 0)
|
||||
if last_seen > 0 and timestamp - last_seen > max(60, int(max_age_s or 0)):
|
||||
del self._peers[peer_url]
|
||||
removed += 1
|
||||
return removed
|
||||
|
||||
def manifest_peers(self) -> list[dict[str, str]]:
|
||||
return [peer.manifest_peer() for peer in self.records()]
|
||||
|
||||
def _normalize_entry(self, entry: dict[str, Any]) -> RegistryPeer:
|
||||
peer_url = normalize_peer_url(str(entry.get("peer_url", "") or ""))
|
||||
if not peer_url:
|
||||
raise ValueError("registry peer_url is required")
|
||||
transport = str(entry.get("transport", "") or peer_transport_kind(peer_url) or "").strip().lower()
|
||||
role = str(entry.get("role", "participant") or "participant").strip().lower()
|
||||
if role not in ALLOWED_REGISTRY_ROLES:
|
||||
raise ValueError("registry role unsupported")
|
||||
return RegistryPeer(
|
||||
peer_url=peer_url,
|
||||
transport=transport,
|
||||
role=role,
|
||||
node_id=str(entry.get("node_id", "") or "").strip(),
|
||||
label=str(entry.get("label", "") or "").strip(),
|
||||
announced_at=int(entry.get("announced_at", 0) or 0),
|
||||
last_seen_at=int(entry.get("last_seen_at", 0) or entry.get("announced_at", 0) or 0),
|
||||
failure_count=int(entry.get("failure_count", 0) or 0),
|
||||
)
|
||||
@@ -16,7 +16,7 @@ DATA_DIR = BACKEND_DIR / "data"
|
||||
DEFAULT_PEER_STORE_PATH = DATA_DIR / "peer_store.json"
|
||||
PEER_STORE_VERSION = 1
|
||||
ALLOWED_PEER_BUCKETS = {"bootstrap", "sync", "push"}
|
||||
ALLOWED_PEER_SOURCES = {"bundle", "operator", "bootstrap_promoted", "runtime"}
|
||||
ALLOWED_PEER_SOURCES = {"bundle", "operator", "bootstrap_promoted", "runtime", "swarm"}
|
||||
ALLOWED_PEER_TRANSPORTS = {"clearnet", "onion"}
|
||||
ALLOWED_PEER_ROLES = {"participant", "relay", "seed"}
|
||||
|
||||
|
||||
@@ -140,10 +140,24 @@ def transport_tier_from_state(state: dict[str, Any] | None) -> str:
|
||||
snapshot = state or {}
|
||||
if not bool(snapshot.get("configured")):
|
||||
return "public_degraded"
|
||||
if not bool(snapshot.get("ready")):
|
||||
return "public_degraded"
|
||||
arti_ready = bool(snapshot.get("arti_ready"))
|
||||
rns_ready = bool(snapshot.get("rns_ready"))
|
||||
running = bool(snapshot.get("running"))
|
||||
transport_usable = bool(snapshot.get("ready"))
|
||||
if not transport_usable:
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
if (
|
||||
bool(getattr(get_settings(), "MESH_WORMHOLE_TRUST_FILE_READY", False))
|
||||
and running
|
||||
and arti_ready
|
||||
):
|
||||
transport_usable = True
|
||||
except Exception:
|
||||
pass
|
||||
if not transport_usable:
|
||||
return "public_degraded"
|
||||
if arti_ready and rns_ready:
|
||||
return "private_strong"
|
||||
if arti_ready or rns_ready:
|
||||
@@ -157,8 +171,45 @@ def transport_tier_is_sufficient(current_tier: str | None, required_tier: str |
|
||||
return TRANSPORT_TIER_ORDER[current] >= TRANSPORT_TIER_ORDER[required]
|
||||
|
||||
|
||||
def release_lane_required_tier(lane: str) -> str:
|
||||
return network_release_required_tier(lane)
|
||||
_DM_RUNTIME_ENFORCEMENT_ROUTES = {
|
||||
("POST", "/api/mesh/dm/send"),
|
||||
("POST", "/api/mesh/dm/poll"),
|
||||
("GET", "/api/mesh/dm/poll"),
|
||||
("GET", "/api/mesh/dm/count"),
|
||||
("POST", "/api/mesh/dm/count"),
|
||||
}
|
||||
|
||||
|
||||
def runtime_route_enforcement_tier(path: str, method: str, *, static_tier: str) -> str:
|
||||
"""Adjust static route tiers for Tor-only nodes that never reach private_strong."""
|
||||
normalized_path = str(path or "").strip()
|
||||
normalized_method = str(method or "").strip().upper()
|
||||
static = normalize_transport_tier(static_tier)
|
||||
if (normalized_method, normalized_path) not in _DM_RUNTIME_ENFORCEMENT_ROUTES:
|
||||
return static
|
||||
if static != "private_strong":
|
||||
return static
|
||||
return release_lane_required_tier("dm")
|
||||
|
||||
|
||||
def release_lane_required_tier(lane: str, *, wormhole_state: dict[str, Any] | None = None) -> str:
|
||||
normalized_lane = str(lane or "").strip().lower()
|
||||
required = network_release_required_tier(normalized_lane)
|
||||
if normalized_lane != "dm":
|
||||
return required
|
||||
state = wormhole_state
|
||||
if state is None:
|
||||
try:
|
||||
from services.wormhole_supervisor import get_wormhole_state
|
||||
|
||||
state = get_wormhole_state()
|
||||
except Exception:
|
||||
state = {}
|
||||
# Tor-only nodes never reach private_strong (needs Arti + RNS). Encrypted
|
||||
# relay over Arti still preserves ciphertext privacy for offline delivery.
|
||||
if not bool((state or {}).get("rns_enabled")):
|
||||
return "private_transitional"
|
||||
return required
|
||||
|
||||
|
||||
def private_delivery_status(status_code: str, *, reason_code: str = "", plain_reason: str = "") -> dict[str, str]:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user