mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-06 06:13:54 +02:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6b86a6b461 |
+2
-8
@@ -39,8 +39,8 @@ ADMIN_KEY=
|
||||
# NUFORC_MAPBOX_TOKEN=
|
||||
|
||||
# Optional startup-risk controls.
|
||||
# On Windows, external curl fallback is off by default. LiveUAMap uses UI consent
|
||||
# when you enable Global Incidents (or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true).
|
||||
# On Windows, external curl fallback and the Playwright LiveUAMap scraper are
|
||||
# disabled by default so blocked upstream feeds cannot interrupt start.bat.
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=false
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false
|
||||
# AIS starts by default when AIS_API_KEY is set. Set to 0/false to force-disable.
|
||||
@@ -128,14 +128,8 @@ ADMIN_KEY=
|
||||
# MESH_DM_ROOT_TRANSPARENCY_LEDGER_READBACK_URI=backend/../ops/root_transparency_ledger.json
|
||||
|
||||
# ── Self Update ────────────────────────────────────────────────
|
||||
# Optional ZIP updater digest pin. The updater checks this first, then
|
||||
# backend/data/release_digests.json, then the release SHA256SUMS.txt asset.
|
||||
# MESH_UPDATE_SHA256=
|
||||
|
||||
# Optional strict nonce-only frontend CSP. Leave unset unless the exact build
|
||||
# has been verified to hydrate cleanly in your deployment.
|
||||
# SHADOWBROKER_STRICT_CSP=1
|
||||
|
||||
# ── Wormhole (Local Agent) ─────────────────────────────────────
|
||||
# WORMHOLE_URL=http://127.0.0.1:8787
|
||||
# WORMHOLE_TRANSPORT=direct
|
||||
|
||||
@@ -7,28 +7,6 @@ on:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
# CI flake mitigation:
|
||||
# ci.yml is triggered TWICE per PR on the same commit — once directly via
|
||||
# the `pull_request` trigger above ("Frontend Tests & Build" check) and once
|
||||
# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests &
|
||||
# Build" check). Both jobs land on the same Actions runner pool at the same
|
||||
# time and fight for CPU/RAM. Under contention, React's reconciliation in
|
||||
# `messagesViewFirstContact.test.tsx > removes an approved contact …`
|
||||
# overruns its 5s waitFor timeout — that's the single failure mode we've
|
||||
# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the
|
||||
# fd7d6fa push. Backend tests and every other frontend test pass under
|
||||
# the same conditions, which is what made this look random.
|
||||
#
|
||||
# Pinning a concurrency group on the SHA (PR head, or the pushed commit
|
||||
# for main) serializes the two invocations so neither starves the other.
|
||||
# We use cancel-in-progress: false so the second one queues instead of
|
||||
# cancelling — cancelling could leave the PR check stuck "Expected" if
|
||||
# only one of the two ever finishes. Total CI time grows by ~2 min in
|
||||
# exchange for deterministic outcomes.
|
||||
concurrency:
|
||||
group: ci-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
frontend:
|
||||
name: Frontend Tests & Build
|
||||
|
||||
-38
@@ -101,14 +101,6 @@ backend/data/*
|
||||
# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream
|
||||
# Let's Encrypt renewal failures without disabling TLS validation entirely.
|
||||
!backend/data/aisstream_spki_pins.json
|
||||
# Issue #231: pinned SHA-256 digests for known release archives. Used by
|
||||
# the self-updater as a second-line integrity check when the release's
|
||||
# SHA256SUMS.txt asset can't be fetched.
|
||||
!backend/data/release_digests.json
|
||||
# Issue #244/#245/#246: one-shot carrier-position seed shipped with each
|
||||
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||
# after that the cache reflects this install's own GDELT observations.
|
||||
!backend/data/carrier_seed.json
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
@@ -198,7 +190,6 @@ graphify-out/
|
||||
# Internal docs & brainstorming (never commit)
|
||||
# ========================
|
||||
docs/*
|
||||
!docs/OUTBOUND_DATA.md
|
||||
!docs/mesh/
|
||||
docs/mesh/*
|
||||
!docs/mesh/threat-model.md
|
||||
@@ -262,32 +253,3 @@ backend/data/wormhole_stdout.log
|
||||
|
||||
# Compressed snapshot archives (can be 100 MB+)
|
||||
*.json.gz
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# AI assistant / coding-agent scratch
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-tool config + scratch directories. These are private to whichever
|
||||
# coding agent the operator happens to be using and have no business in
|
||||
# the repo. If a tool's instructions need to be canonical for the project,
|
||||
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||
# repo root.
|
||||
|
||||
# OpenAI Codex CLI
|
||||
.codex/
|
||||
.codex-app-schema/
|
||||
.codex-app-ts/
|
||||
|
||||
# Per-agent instruction files dropped at repo root by various tools.
|
||||
# These are operator-side preferences, not part of the project contract.
|
||||
AGENTS.md
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
.github/copilot-instructions.md
|
||||
|
||||
# Stale AI-generated test file that referenced fields that don't exist in
|
||||
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||
# ignored so it doesn't accidentally get committed if it shows up again
|
||||
# from a tool that's working off an out-of-date understanding of the
|
||||
# module. If a real test for that function is needed, write it under a
|
||||
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||
backend/tests/test_carrier_tracker_region_centers.py
|
||||
|
||||
+12
-42
@@ -13,22 +13,13 @@
|
||||
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
||||
# on GitLab) so the two sources stay in sync.
|
||||
#
|
||||
# Pipelines on this repo were instant-failing for free-tier accounts until
|
||||
# identity verification was added — the May 2026 bump in this comment is
|
||||
# the marker commit that confirms runner allocation after verification.
|
||||
#
|
||||
# Auth notes:
|
||||
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
||||
# automatically. No credentials need to be configured.
|
||||
# - The reverse mirror authenticates to GitHub via a per-repo SSH
|
||||
# deploy key. The private half is stored as the File-type GitLab
|
||||
# CI/CD variable GITHUB_MIRROR_SSH_KEY (Protected). The matching
|
||||
# public key is added to github.com/BigBodyCobain/Shadowbroker/
|
||||
# settings/keys with write access. This is a tighter-scoped
|
||||
# replacement for a personal access token: it can ONLY push to
|
||||
# Shadowbroker, never expires, and rotating it is a one-click
|
||||
# delete on GitHub's deploy-keys page. If the variable isn't set,
|
||||
# the mirror job is skipped — image builds still run.
|
||||
# - The reverse mirror requires a GitHub personal access token stored
|
||||
# as the GitLab CI/CD variable GITHUB_MIRROR_TOKEN (Protected + Masked).
|
||||
# Scope: public_repo (or repo for private). If the variable isn't
|
||||
# set the mirror job is skipped — image builds still run.
|
||||
|
||||
stages:
|
||||
- build
|
||||
@@ -57,11 +48,7 @@ variables:
|
||||
- docker info
|
||||
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
||||
- docker run --privileged --rm tonistiigi/binfmt --install all
|
||||
# buildx --driver docker-container can't read TLS from the env vars
|
||||
# the GitLab dind service exports. Wrap them in a docker context and
|
||||
# bind buildx to it. See https://docs.gitlab.com/ee/ci/docker/using_docker_build.html#use-docker-buildx
|
||||
- docker context create tls-env
|
||||
- docker buildx create --use --name multiarch --driver docker-container tls-env
|
||||
- docker buildx create --use --name multiarch --driver docker-container
|
||||
|
||||
# ── Backend image ────────────────────────────────────────────────────────
|
||||
build-backend:
|
||||
@@ -106,35 +93,18 @@ build-frontend:
|
||||
- .gitlab-ci.yml
|
||||
|
||||
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker via SSH
|
||||
# using a per-repo deploy key. Fast-forward-only by default — if GitLab
|
||||
# main and GitHub main have diverged, the push fails loudly rather than
|
||||
# silently overwriting either side.
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker.
|
||||
# Fast-forward-only — if GitLab main and GitHub main have diverged, this
|
||||
# fails loudly rather than silently overwriting either side.
|
||||
#
|
||||
# Only runs if GITHUB_MIRROR_SSH_KEY is set as a File-type CI/CD variable.
|
||||
# See the header comment of this file for setup instructions.
|
||||
# Only runs if GITHUB_MIRROR_TOKEN is set as a CI/CD variable. See the
|
||||
# header comment of this file for setup instructions.
|
||||
mirror-to-github:
|
||||
stage: mirror
|
||||
image: alpine:3.20
|
||||
needs: []
|
||||
before_script:
|
||||
- apk add --no-cache git openssh-client ca-certificates
|
||||
- mkdir -p ~/.ssh
|
||||
- chmod 700 ~/.ssh
|
||||
# Install the deploy key. File-type CI variable exposes the path; copy
|
||||
# to ~/.ssh/id_ed25519 with restrictive perms so ssh accepts it.
|
||||
- cp "$GITHUB_MIRROR_SSH_KEY" ~/.ssh/id_ed25519
|
||||
- chmod 600 ~/.ssh/id_ed25519
|
||||
# Pin github.com's current host keys so we never trust a man-in-the-
|
||||
# middle. Sourced from https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints
|
||||
# (rotated 2023-03-24 after the previous RSA key leak).
|
||||
- |
|
||||
cat > ~/.ssh/known_hosts <<'EOF'
|
||||
github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
|
||||
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
|
||||
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=
|
||||
EOF
|
||||
- chmod 644 ~/.ssh/known_hosts
|
||||
script:
|
||||
- git config --global user.email "ci-mirror@gitlab.com"
|
||||
- git config --global user.name "GitLab CI Mirror"
|
||||
@@ -145,7 +115,7 @@ mirror-to-github:
|
||||
- cd repo
|
||||
- >
|
||||
git push
|
||||
"git@github.com:BigBodyCobain/Shadowbroker.git"
|
||||
"https://x-access-token:${GITHUB_MIRROR_TOKEN}@github.com/BigBodyCobain/Shadowbroker.git"
|
||||
"${CI_COMMIT_SHA}:refs/heads/main"
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_SSH_KEY
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_TOKEN
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
**ShadowBroker** is a decentralized intelligence platform that aggregates real-time, multi-domain OSINT telemetry from 60+ live intelligence feeds into a single dark-ops map interface. Aircraft, ships, satellites, conflict zones, CCTV networks, GPS jamming, internet-connected devices, police scanners, mesh radio nodes, and breaking geopolitical events — all updating in real time on one screen as well as an obfuscated communications protocol and information exchange infrastructure.
|
||||
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers, including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. ShadowBroker has no accounts, product telemetry, or analytics; the dashboard talks to your self-hosted backend, while optional live OSINT panels may contact their configured public data providers when you use them.
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers, including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
|
||||
|
||||
Designed for analysts, researchers, radio operators, and anyone who wants to see what the world looks like when every public signal is on the same map.
|
||||
|
||||
@@ -28,7 +28,7 @@ Designed for analysts, researchers, radio operators, and anyone who wants to see
|
||||
|
||||
A surprising amount of global telemetry is already public — aircraft ADS-B broadcasts, maritime AIS signals, satellite orbital data, earthquake sensors, mesh radio networks, police scanner feeds, environmental monitoring stations, internet infrastructure telemetry, and more. This data is scattered across dozens of tools and APIs. ShadowBroker combines all of it into a single interface.
|
||||
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. ShadowBroker does not include product telemetry, analytics, or accounts. Operator-supplied keys stay in your local deployment, but live OSINT features necessarily make outbound requests to the public data providers you enable or query.
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. No user data is collected or transmitted — everything runs locally against a self-hosted backend. No telemetry, no analytics, no accounts.
|
||||
|
||||
### Shodan Connector
|
||||
|
||||
@@ -113,20 +113,6 @@ That's it. `pull` grabs the latest images, `up -d` restarts the containers.
|
||||
>
|
||||
> Podman users should run the equivalent provider command, for example `podman-compose pull` and `podman-compose up -d`, or use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d` from a bash-compatible shell.
|
||||
|
||||
### Update Integrity
|
||||
|
||||
Docker updates are delivered through signed container registries. The legacy ZIP self-updater verifies release archives through this chain, in order:
|
||||
|
||||
* `MESH_UPDATE_SHA256` when an operator pins a digest explicitly.
|
||||
* `backend/data/release_digests.json` for bundled release pins.
|
||||
* The release `SHA256SUMS.txt` asset on GitHub when a bundled pin is not present.
|
||||
|
||||
Release maintainers should run `python backend/scripts/release_helper.py hash <ShadowBroker_vX.Y.Z.zip>` before publishing, then publish `SHA256SUMS.txt` and update `backend/data/release_digests.json` when shipping a ZIP updater target. The updater keeps the operator override path intact instead of failing closed on missing bundled digests, so existing installs do not get stranded by a release-process mistake.
|
||||
|
||||
### CSP Hardening
|
||||
|
||||
The production frontend ships with a hydration-compatible CSP and a strict nonce-only CSP in `Content-Security-Policy-Report-Only`. Set `SHADOWBROKER_STRICT_CSP=1` only after verifying the exact build hydrates correctly in your deployment. Runtime Google Fonts are not required; the bundled Next font pipeline serves the dashboard font from the app build.
|
||||
|
||||
### ⚠️ **Stuck on the old version?**
|
||||
|
||||
**If `git pull` fails or `docker compose up` keeps building from source instead of pulling images**, your clone predates a March 2026 repository migration that rewrote commit history. A normal `git pull` cannot fix this. Run:
|
||||
@@ -188,7 +174,7 @@ ShadowBroker v0.9.7 ships **InfoNet** (decentralized intelligence mesh + Soverei
|
||||
| Channel | Privacy Status | Details |
|
||||
|---|---|---|
|
||||
| **Meshtastic / APRS** | **PUBLIC** | RF radio transmissions are public and interceptable by design. |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden despite being designed through Tor and Reticulum (Work in progress). |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden. |
|
||||
| **Dead Drop DMs** | **STRONGEST CURRENT LANE** | Token-based epoch mailbox with SAS word verification. Strongest lane in this build, but not yet confidently private. |
|
||||
| **Sovereign Shell governance** | **PUBLIC LEDGER** | Petitions, votes, upgrade hashes, and dispute stakes are signed events on a public hashchain. Pseudonymous via gate persona, but governance actions are intentionally observable. |
|
||||
| **Privacy primitives (RingCT / stealth / DEX)** | **NOT YET WIRED** | Locked Protocol contracts are in place, but the cryptographic scheme has not been chosen. The privacy-core Rust crate is the integration target for a future sprint. |
|
||||
@@ -213,7 +199,7 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Communication layer (since v0.9.6):**
|
||||
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay using Tor and Reticulum. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **Mesh Chat Panel** — Three-tab interface: **INFONET** (gate chat with obfuscated transport), **MESH** (Meshtastic radio integration), **DEAD DROP** (peer-to-peer message exchange with token-based epoch mailboxes — strongest current lane).
|
||||
* **Gate Persona System** — Pseudonymous identities with Ed25519 signing keys, prekey bundles, SAS word contact verification, and abuse reporting.
|
||||
* **Mesh Terminal** — Built-in CLI: `send`, `dm`, market commands, gate state inspection. Draggable panel, minimizes to the top bar. Type `help` to see all commands.
|
||||
@@ -233,7 +219,7 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Privacy primitive runway (NEW in v0.9.7):**
|
||||
|
||||
* **Function Keys — Anonymous Credential Scaffolding** — The plumbing is in place for nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, and batched settlement. Today's challenge-response is an HMAC-based placeholder for integration testing, not a production anonymous or zero-knowledge citizenship proof. True unlinkable issuance still waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Function Keys — Anonymous Citizenship Proof** — A citizen proves "I am an Infonet citizen" without revealing their Infonet identity. 5 of 6 pieces shipped: nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, batched settlement. Issuance via blind signatures waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Locked Protocol Contracts** — Stable interfaces in `services/infonet/privacy/contracts.py` for ring signatures, stealth addresses, Pedersen commitments, range proofs, and DEX matching. The `privacy-core` Rust crate is the integration target — no caller of the privacy module needs to know which scheme is active.
|
||||
* **Sprint 11+ Path** — When the cryptographic scheme is chosen, primitives wire into the locked Protocols without API churn.
|
||||
|
||||
@@ -577,8 +563,6 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes — the **Ope
|
||||
| [OSM Nominatim](https://nominatim.openstreetmap.org) | Place name geocoding (LOCATE bar) | On-demand | No |
|
||||
| [CARTO Basemaps](https://carto.com) | Dark map tiles | Continuous | No |
|
||||
|
||||
**Outbound privacy & audit (#348–#366):** Each self-hosted install uses its own backend IP and per-install User-Agent handle. See [docs/OUTBOUND_DATA.md](docs/OUTBOUND_DATA.md) for what contacts third parties, opt-in/env controls, and accepted tradeoffs (CCTV Referer, basemap CDN, LiveUAMap, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Getting Started
|
||||
@@ -600,16 +584,9 @@ Open `http://localhost:3000` to view the dashboard.
|
||||
> **Deploying publicly or on a LAN?** No configuration needed for most setups.
|
||||
> The frontend proxies all API calls through the Next.js server to `BACKEND_URL`,
|
||||
> which defaults to `http://backend:8000` (Docker internal networking).
|
||||
> Host port `8000` is only published for local API/debug access (`127.0.0.1:8000`
|
||||
> in `docker-compose.yml`). If it conflicts with another service, set
|
||||
> `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL` as `http://backend:8000`
|
||||
> because that is the Docker-internal port.
|
||||
>
|
||||
> **Running the backend outside Docker** (`cd backend && python main.py`):
|
||||
> the dev server binds **loopback only** (`127.0.0.1:8000`) so other machines on
|
||||
> your LAN cannot hit admin/local-trust routes with an empty `ADMIN_KEY`. Set
|
||||
> `SHADOWBROKER_DEV_BIND_ALL=true` in `.env` only when you deliberately need
|
||||
> `0.0.0.0` and use a strong `ADMIN_KEY` for any non-local callers.
|
||||
> Host port `8000` is only published for local API/debug access. If it conflicts
|
||||
> with another service, set `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL`
|
||||
> as `http://backend:8000` because that is the Docker-internal port.
|
||||
> The backend memory cap is controlled by `BACKEND_MEMORY_LIMIT` and defaults
|
||||
> to `4G`. If Docker reports OOM events, the backend will restart and slow
|
||||
> layers can look empty until they repopulate.
|
||||
|
||||
+11
-62
@@ -11,22 +11,6 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
|
||||
# ── Optional ───────────────────────────────────────────────────
|
||||
|
||||
# AISHub REST fallback. Used when stream.aisstream.io is unreachable
|
||||
# (e.g. their cert expires or server goes offline). Free tier requires
|
||||
# registration at https://www.aishub.net/api. Poll cadence defaults to
|
||||
# 20 min to stay courteous; tunable via AISHUB_POLL_INTERVAL_MINUTES.
|
||||
# AISHUB_USERNAME=
|
||||
# AISHUB_POLL_INTERVAL_MINUTES=20
|
||||
|
||||
# `python main.py` (uvicorn reload) binds 127.0.0.1:8000 by default so LAN clients
|
||||
# cannot reach a dev server with empty ADMIN_KEY (#375). Set true only when you
|
||||
# intentionally need 0.0.0.0 and understand the local-trust implications.
|
||||
# SHADOWBROKER_DEV_BIND_ALL=false
|
||||
#
|
||||
# Thread pool for GDELT, LiveUAMap, CCTV ingest, and slow-tier refresh batches.
|
||||
# Keeps heavy jobs from starving fast flight/ship workers (default 2).
|
||||
# SHADOWBROKER_HEAVY_FETCH_WORKERS=2
|
||||
|
||||
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
||||
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
||||
|
||||
@@ -40,24 +24,14 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# Per-install operator handle. Round 7a: outbound third-party API calls send
|
||||
# this handle as the User-Agent (e.g. operator-7f3a92), not a shared app name,
|
||||
# so upstreams rate-limit one install instead of blocking every user.
|
||||
#
|
||||
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||
# Operators who want a meaningful handle (real name, org, GitHub login) can
|
||||
# set it here. Special characters are sanitized to dashes.
|
||||
# OPERATOR_HANDLE=
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers.
|
||||
# Project-generic by default — does NOT include any personal contact info or
|
||||
# operator-specific identifier. Override only if you run a public relay and
|
||||
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
|
||||
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
|
||||
|
||||
# Full User-Agent override (replaces the operator handle entirely). Rare;
|
||||
# most installs should use OPERATOR_HANDLE only.
|
||||
# SHADOWBROKER_USER_AGENT=
|
||||
|
||||
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||
# use the per-install handle (default) — set only if you have a registered
|
||||
# Nominatim relay identity.
|
||||
# NOMINATIM_USER_AGENT=
|
||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
|
||||
|
||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||
# These data sources phone home to politically/commercially sensitive
|
||||
@@ -71,29 +45,14 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# FIMI_ENABLED=false
|
||||
#
|
||||
# Polymarket + Kalshi — US political/election prediction markets.
|
||||
# Default off; enable from Global Threat Intercept (MKT toggle) or set true here.
|
||||
# PREDICTION_MARKETS_ENABLED=false
|
||||
# When enabled, polls use a jittered schedule (not the fixed 5-minute slow tier):
|
||||
# PREDICTION_MARKETS_INTERVAL_MINUTES=7
|
||||
# PREDICTION_MARKETS_SCHEDULER_JITTER_S=240
|
||||
# PREDICTION_MARKETS_INITIAL_DELAY_MAX_S=180
|
||||
# PREDICTION_MARKETS_PRE_FETCH_JITTER_S=90
|
||||
# PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S=45
|
||||
# MESH_POLYMARKET_PAGE_DELAY_JITTER_S=0.08
|
||||
# MESH_KALSHI_PAGE_DELAY_JITTER_S=0.2
|
||||
#
|
||||
# Finnhub fallback / yfinance — financial market data.
|
||||
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
|
||||
# the unauthenticated yfinance fallback to call Yahoo Finance.
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days).
|
||||
# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs.
|
||||
# NUFORC_RECENT_DAYS=60
|
||||
# NUFORC_CACHE_TTL_HOURS=168
|
||||
# On Windows, live scrape uses Python requests by default; optional:
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true
|
||||
# NUFORC enrichment index (HF dataset) is separate — opt-in only:
|
||||
# NUFORC UAP sightings — huggingface.co dataset download.
|
||||
# NUFORC_ENABLED=false
|
||||
#
|
||||
# News RSS aggregator — defaults ON. Set to "false" to disable all
|
||||
@@ -107,12 +66,6 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Free MAP_KEY from https://firms.modaps.eosdis.nasa.gov/map/#d:24hrs;@0.0,0.0,3.0z
|
||||
# FIRMS_MAP_KEY=
|
||||
|
||||
# Ukraine frontline mirror (GitHub). Default follows cyterat/deepstate-map-data@main.
|
||||
# Pin an immutable commit SHA so ingest cannot silently change if main is force-pushed (#362).
|
||||
# Example (verify on GitHub before use): main @ b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_COMMIT=b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_REPO=cyterat/deepstate-map-data
|
||||
|
||||
# Ukraine air raid alerts from alerts.in.ua — free token from https://alerts.in.ua/
|
||||
# ALERTS_IN_UA_TOKEN=
|
||||
|
||||
@@ -142,16 +95,12 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# can identify per-install traffic instead of aggregated "ShadowBroker" hits.
|
||||
# Leave blank to send a generic UA. If you set MESHTASTIC_OPERATOR_CALLSIGN,
|
||||
# it is included in outbound headers to meshtastic.org by default so they
|
||||
# can rate-limit per-operator. Callsign is NOT sent upstream unless you opt in.
|
||||
# can rate-limit per-operator. Set MESHTASTIC_SEND_CALLSIGN_HEADER=false to
|
||||
# suppress the callsign while still using it locally (e.g. for APRS).
|
||||
# MESHTASTIC_OPERATOR_CALLSIGN=
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=false
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=true
|
||||
# MESH_MQTT_PSK= # hex-encoded, empty = default LongFast key
|
||||
|
||||
# LiveUAMap Playwright scraper (#348). Linux/macOS: on by default when Global
|
||||
# Incidents layer is active. Windows: off until the operator enables Global
|
||||
# Incidents in the UI (consent dialog) or sets SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true.
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false forces off on all platforms.
|
||||
|
||||
# ── Mesh / Reticulum (RNS) ─────────────────────────────────────
|
||||
# Full-node / participant-node posture for public Infonet sync.
|
||||
# MESH_NODE_MODE=participant # participant | relay | perimeter
|
||||
|
||||
+40
-121
@@ -45,7 +45,6 @@ from services.mesh.mesh_compatibility import (
|
||||
from services.mesh.mesh_crypto import (
|
||||
_derive_peer_key,
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
verify_signature,
|
||||
verify_node_binding,
|
||||
parse_public_key_algo,
|
||||
@@ -113,14 +112,8 @@ def _scoped_admin_tokens() -> dict[str, list[str]]:
|
||||
return normalized
|
||||
|
||||
|
||||
def _request_scope_path(request: Request) -> str:
|
||||
"""Return the ASGI request-line path, not the Host-derived URL path."""
|
||||
scope = getattr(request, "scope", {}) or {}
|
||||
return str(scope.get("path") or "")
|
||||
|
||||
|
||||
def _required_scope_for_request(request: Request) -> str:
|
||||
path = _request_scope_path(request)
|
||||
path = str(request.url.path or "")
|
||||
if path.startswith("/api/wormhole/gate/"):
|
||||
return "gate"
|
||||
if path.startswith("/api/wormhole/dm/"):
|
||||
@@ -252,90 +245,15 @@ def _docker_bridge_local_operator_enabled() -> bool:
|
||||
}
|
||||
|
||||
|
||||
# Issue #250 (tg12): the previous implementation returned True for any IP
|
||||
# in the entire 172.16.0.0/12 range. Anyone with `docker run` access on
|
||||
# the same daemon could spin up a container that automatically passed
|
||||
# local-operator auth. The fix narrows trust to ONLY connections whose
|
||||
# source IP matches the configured frontend container's hostname.
|
||||
#
|
||||
# Docker DNS resolves both the compose service name (``frontend``) and
|
||||
# the explicit ``container_name`` (``shadowbroker-frontend``) to the
|
||||
# frontend container's bridge IP. We forward-resolve both, cache the
|
||||
# result for 30s, and only trust connections from those exact IPs.
|
||||
#
|
||||
# Operators on shared Docker hosts get the benefit of the narrower
|
||||
# surface. Operators on single-user installs see no behavior change —
|
||||
# their frontend container still resolves and is still trusted.
|
||||
_DOCKER_BRIDGE_TRUST_CACHE: dict = {"ips": frozenset(), "expires": 0.0}
|
||||
_DOCKER_BRIDGE_TRUST_TTL = 30.0
|
||||
|
||||
|
||||
def _trusted_bridge_frontend_hostnames() -> list[str]:
|
||||
"""Container hostnames whose IPs we treat as local-operator on the bridge.
|
||||
|
||||
Default covers both Docker Compose service name (``frontend``) and the
|
||||
explicit ``container_name`` from the shipped docker-compose.yml
|
||||
(``shadowbroker-frontend``). Operators with non-default names can
|
||||
override via the ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS`` env var
|
||||
(comma-separated, no spaces).
|
||||
"""
|
||||
raw = str(
|
||||
os.environ.get(
|
||||
"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS",
|
||||
"frontend,shadowbroker-frontend",
|
||||
)
|
||||
).strip()
|
||||
return [h.strip() for h in raw.split(",") if h.strip()]
|
||||
|
||||
|
||||
def _resolve_trusted_bridge_ips() -> frozenset[str]:
|
||||
"""Resolve trusted frontend hostnames to a set of IPs, with caching.
|
||||
|
||||
Cached for 30s so we don't hit DNS on every request. The cache is
|
||||
process-local — frontend container IP rotations during a backend's
|
||||
lifetime will be picked up within 30s.
|
||||
|
||||
Returns frozenset() if Docker DNS can't resolve any of the configured
|
||||
hostnames (fail-closed — when in doubt, refuse to trust the bridge).
|
||||
"""
|
||||
import socket
|
||||
import time as _time
|
||||
|
||||
now = _time.time()
|
||||
cache = _DOCKER_BRIDGE_TRUST_CACHE
|
||||
if cache["expires"] > now:
|
||||
return cache["ips"]
|
||||
|
||||
ips: set[str] = set()
|
||||
for hostname in _trusted_bridge_frontend_hostnames():
|
||||
try:
|
||||
_, _, addrs = socket.gethostbyname_ex(hostname)
|
||||
except (OSError, socket.gaierror):
|
||||
continue
|
||||
for addr in addrs:
|
||||
ips.add(addr)
|
||||
|
||||
resolved = frozenset(ips)
|
||||
cache["ips"] = resolved
|
||||
cache["expires"] = now + _DOCKER_BRIDGE_TRUST_TTL
|
||||
return resolved
|
||||
|
||||
|
||||
def _is_docker_bridge_host(host: str) -> bool:
|
||||
"""Return True only when the source IP matches our trusted frontend
|
||||
container hostname(s).
|
||||
|
||||
Previously trusted any 172.16.0.0/12 IP unconditionally. See the
|
||||
block comment above for the security rationale.
|
||||
"""
|
||||
try:
|
||||
ip = ipaddress.ip_address(host)
|
||||
except ValueError:
|
||||
return False
|
||||
# Public IPs are never our frontend container — skip DNS work for them.
|
||||
if not ip.is_private:
|
||||
return False
|
||||
return host in _resolve_trusted_bridge_ips()
|
||||
# Docker Desktop and the default compose bridge normally sit inside
|
||||
# 172.16.0.0/12. Keep this narrower than "any private IP" so a user who
|
||||
# intentionally binds the backend to LAN does not silently trust LAN clients.
|
||||
return ip in ipaddress.ip_network("172.16.0.0/12")
|
||||
|
||||
|
||||
def _is_trusted_local_runtime_host(host: str) -> bool:
|
||||
@@ -449,7 +367,7 @@ async def _verify_openclaw_hmac(request: Request) -> bool:
|
||||
|
||||
# Compute expected signature: HMAC-SHA256(secret, METHOD|path|ts|nonce|body_digest)
|
||||
method = str(request.method or "").upper()
|
||||
path = _request_scope_path(request)
|
||||
path = str(request.url.path or "")
|
||||
message = f"{method}|{path}|{ts_str}|{nonce}|{body_digest}"
|
||||
expected = hmac.new(
|
||||
secret.encode("utf-8"),
|
||||
@@ -521,32 +439,33 @@ _KNOWN_COMPROMISED_PEER_PUSH_SECRET_SHA256 = (
|
||||
def _validate_admin_startup() -> None:
|
||||
admin_key = _current_admin_key()
|
||||
|
||||
if not admin_key:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is not set. Local-operator/admin endpoints will reject "
|
||||
"remote callers until ADMIN_KEY is configured."
|
||||
)
|
||||
return
|
||||
if not admin_key or len(admin_key) < 32:
|
||||
import secrets
|
||||
|
||||
if len(admin_key) < 32:
|
||||
reason = f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
reason = "not set" if not admin_key else f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
new_key = secrets.token_hex(32) # 64-char hex string
|
||||
try:
|
||||
debug_mode = bool(getattr(get_settings(), "MESH_DEBUG_MODE", False))
|
||||
except Exception:
|
||||
debug_mode = False
|
||||
if debug_mode:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is %s. Debug mode is enabled, so startup will continue, "
|
||||
"but production deployments must use a 32+ character key.",
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
_write_env_value("ADMIN_KEY", new_key)
|
||||
os.environ["ADMIN_KEY"] = new_key
|
||||
logger.info(
|
||||
"ADMIN_KEY was %s — auto-generated a strong 64-character key and "
|
||||
"saved it to .env. Admin/mesh endpoints are now secured.",
|
||||
reason,
|
||||
)
|
||||
return
|
||||
logger.error(
|
||||
"ADMIN_KEY is %s. Refusing to start because auto-generating a backend-only "
|
||||
"replacement would desynchronize the frontend and backend containers.",
|
||||
reason,
|
||||
)
|
||||
raise SystemExit(1)
|
||||
# Clear settings cache so the rest of startup picks up the new key
|
||||
try:
|
||||
get_settings.cache_clear()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is %s and could not auto-generate: %s. "
|
||||
"Admin/mesh endpoints may be unavailable.",
|
||||
reason,
|
||||
exc,
|
||||
)
|
||||
|
||||
|
||||
def _validate_insecure_admin_startup() -> None:
|
||||
@@ -749,7 +668,8 @@ def _is_debug_test_request(request: Request) -> bool:
|
||||
if not _debug_mode_enabled():
|
||||
return False
|
||||
client_host = (request.client.host or "").lower() if request.client else ""
|
||||
return client_host == "test"
|
||||
url_host = (request.url.hostname or "").lower() if request.url else ""
|
||||
return client_host == "test" or url_host == "test"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1401,19 +1321,18 @@ def _peer_hmac_url_from_request(request: Request) -> str:
|
||||
header_url = normalize_peer_url(str(request.headers.get("x-peer-url", "") or ""))
|
||||
if header_url:
|
||||
return header_url
|
||||
return ""
|
||||
if not request.url:
|
||||
return ""
|
||||
base_url = f"{request.url.scheme}://{request.url.netloc}".rstrip("/")
|
||||
return normalize_peer_url(base_url)
|
||||
|
||||
|
||||
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests.
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests."""
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
if not secret:
|
||||
return False
|
||||
|
||||
Issue #256: ``resolve_peer_key_for_url`` looks up a per-peer secret
|
||||
in ``MESH_PEER_SECRETS`` first, then falls back to the global
|
||||
``MESH_PEER_PUSH_SECRET``. When a peer URL is listed in the per-peer
|
||||
map, only the listed secret is accepted for it — the global secret
|
||||
is ignored, so any peer that knows only the global secret cannot
|
||||
forge a request claiming to be that peer.
|
||||
"""
|
||||
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
||||
if not provided:
|
||||
return False
|
||||
@@ -1422,7 +1341,7 @@ def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
allowed_peers = set(authenticated_push_peer_urls())
|
||||
if not peer_url or peer_url not in allowed_peers:
|
||||
return False
|
||||
peer_key = resolve_peer_key_for_url(peer_url)
|
||||
peer_key = _derive_peer_key(secret, peer_url)
|
||||
if not peer_key:
|
||||
return False
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
},
|
||||
{
|
||||
"name": "BBC",
|
||||
"url": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"url": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"weight": 3
|
||||
},
|
||||
{
|
||||
@@ -47,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"name": "Xinhua",
|
||||
"url": "https://www.news.cn/english/rss/worldrss.xml",
|
||||
"url": "http://www.news.cn/english/rss/worldrss.xml",
|
||||
"weight": 2
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
{
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026",
|
||||
"note": "One-shot bootstrap for first-run carrier positions. Once carrier_cache.json exists in the runtime data volume, this seed file is never read again. All subsequent updates come from GDELT (and any future sources) and are written to carrier_cache.json. A year from now, your runtime cache reflects whatever your install has observed since first launch — not these snapshot positions."
|
||||
},
|
||||
"carriers": {
|
||||
"CVN-68": {
|
||||
"lat": 47.5535,
|
||||
"lng": -122.6400,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-76": {
|
||||
"lat": 47.5580,
|
||||
"lng": -122.6360,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Decommissioning)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-69": {
|
||||
"lat": 36.9465,
|
||||
"lng": -76.3265,
|
||||
"heading": 0,
|
||||
"desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-74": {
|
||||
"lat": 36.98,
|
||||
"lng": -76.43,
|
||||
"heading": 0,
|
||||
"desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-75": {
|
||||
"lat": 36.0,
|
||||
"lng": 15.0,
|
||||
"heading": 0,
|
||||
"desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-77": {
|
||||
"lat": 36.5,
|
||||
"lng": -74.0,
|
||||
"heading": 0,
|
||||
"desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-70": {
|
||||
"lat": 32.6840,
|
||||
"lng": -117.1290,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Homeport)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-71": {
|
||||
"lat": 32.6885,
|
||||
"lng": -117.1280,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-72": {
|
||||
"lat": 20.0,
|
||||
"lng": 64.0,
|
||||
"heading": 0,
|
||||
"desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-73": {
|
||||
"lat": 35.2830,
|
||||
"lng": 139.6700,
|
||||
"heading": 180,
|
||||
"desc": "Yokosuka, Japan (Forward deployed)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
{
|
||||
"_comment": [
|
||||
"Baked-in SHA-256 digests for known Shadowbroker release archives.",
|
||||
"",
|
||||
"Issue #231: the self-updater previously skipped integrity verification",
|
||||
"entirely whenever the MESH_UPDATE_SHA256 env var was unset (which is the",
|
||||
"default — nothing in the install docs tells operators to set it). That",
|
||||
"made the auto-update a supply-chain RCE on any compromise of the GitHub",
|
||||
"release pipeline.",
|
||||
"",
|
||||
"The fix uses a multi-source verification chain mirroring the Tor bundle",
|
||||
"digest approach in #201:",
|
||||
"",
|
||||
" 1. MESH_UPDATE_SHA256 env var (operator override, preserved)",
|
||||
" 2. SHA256SUMS.txt asset published alongside each release (primary —",
|
||||
" the maintainer's release process already publishes this)",
|
||||
" 3. This baked-in digest list (second line of defense for releases",
|
||||
" missing a SHA256SUMS asset, or when the asset can't be fetched)",
|
||||
" 4. HTTPS-only fallback with a loud warning (preserves auto-update",
|
||||
" flow during transient outages so users don't get stuck)",
|
||||
"",
|
||||
"Mismatch from a source that DID respond is fatal — the update is",
|
||||
"refused and the existing install keeps running. Only the 'no source",
|
||||
"reachable at all' case falls back to HTTPS-only.",
|
||||
"",
|
||||
"Format: each entry is keyed by release tag and maps asset filenames",
|
||||
"to their canonical SHA-256 digest (hex, lowercase). The updater",
|
||||
"compares the locally-computed digest of the downloaded asset against",
|
||||
"the value here.",
|
||||
"",
|
||||
"When the maintainer ships a new release, add its digests here BEFORE",
|
||||
"removing the old ones so operators on the old code still validate",
|
||||
"against the previous entries during the transition."
|
||||
],
|
||||
"v0.9.79": {
|
||||
"ShadowBroker_v0.9.79.zip": "f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47",
|
||||
"ShadowBroker_0.9.79_x64-setup.exe": "f7b676ada45cac7da05868b0a353678c9ee700e3abcf456a7c0c038c36da446f",
|
||||
"ShadowBroker_0.9.79_x64_en-US.msi": "e0713c3cdda184cfbea750bfac0d62a35678fec00847e6476f2cac8e7e42046e"
|
||||
},
|
||||
"v0.9.8": {
|
||||
"ShadowBroker_v0.9.8.zip": "183bb5cd62b9b9349d95df5ef7696cb6ca810ab4b991fa9dab6f898af4c7a175",
|
||||
"ShadowBroker_0.9.8_x64-setup.exe": "94a0309862e9c81c92cdcbfea8eec9dbb97eef19ded82b26217b397defbc810c",
|
||||
"ShadowBroker_0.9.8_x64_en-US.msi": "fe22f9d51e4360d74c18a7250c2fbb9ed4fa4c7a884b3ac0d04a21115466386b"
|
||||
},
|
||||
"v0.9.81": {
|
||||
"ShadowBroker_v0.9.81.zip": "f81f454bdc88e9a32c351df38212b8cfa624704d65764b971bb091eef62259c6",
|
||||
"ShadowBroker_0.9.81_x64-setup.exe": "25e9a95d0d8ce959a7d08fe8e7406772ae24b596652793e81d1de5d02510a5a6",
|
||||
"ShadowBroker_0.9.81_x64_en-US.msi": "34e655fc0c0f195ee4ac978f228a4b2b9d5565253b8771aca9ef4693409e9e70"
|
||||
}
|
||||
}
|
||||
+1
-105
@@ -1,108 +1,4 @@
|
||||
"""Rate-limit key function for slowapi.
|
||||
|
||||
Issue #287 (tg12): the previous implementation used
|
||||
``slowapi.util.get_remote_address`` which only ever returns
|
||||
``request.client.host``. Behind the bundled Next.js proxy (or any other
|
||||
reverse proxy), every connected operator's ``client.host`` is the
|
||||
frontend container's bridge IP. ``@limiter.limit("120/minute")`` then
|
||||
collapses into one shared bucket for everybody on the same backend —
|
||||
one heavy tab can starve every other operator on the node.
|
||||
|
||||
This module replaces that key function with one that:
|
||||
|
||||
* Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted
|
||||
frontend container (same allowlist used by the Docker bridge
|
||||
local-operator trust path — see ``backend/auth.py`` ``#250``).
|
||||
* Picks the FIRST entry in the XFF chain. That's the client end of
|
||||
the proxy chain, which is the operator we want to bucket on.
|
||||
* Falls back to ``request.client.host`` for any peer that isn't on
|
||||
the trusted-frontend allowlist. Direct hits, unrelated containers,
|
||||
and unknown hosts are bucketed exactly like before — there is no
|
||||
way for an untrusted caller to spoof XFF and steal another
|
||||
operator's rate-limit bucket.
|
||||
|
||||
Single-operator nodes are unaffected: the frontend resolves to one IP,
|
||||
that IP is on the trust list, the XFF header is read, and you get one
|
||||
bucket per operator (i.e. you).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
|
||||
def _client_host(request: Any) -> str:
|
||||
"""Return the immediate peer's IP, normalised to a lowercase string."""
|
||||
client = getattr(request, "client", None)
|
||||
if client is None:
|
||||
return ""
|
||||
host = getattr(client, "host", "") or ""
|
||||
return host.lower()
|
||||
|
||||
|
||||
def _first_forwarded_for(value: str) -> str:
|
||||
"""Return the first non-empty entry from an ``X-Forwarded-For`` header.
|
||||
|
||||
RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The
|
||||
client end is what we want to bucket on. Empty parts (which appear
|
||||
in some malformed headers) are skipped so we don't end up keying on
|
||||
an empty string.
|
||||
"""
|
||||
for raw in value.split(","):
|
||||
candidate = raw.strip()
|
||||
if candidate:
|
||||
return candidate.lower()
|
||||
return ""
|
||||
|
||||
|
||||
def _is_trusted_frontend_peer(host: str) -> bool:
|
||||
"""True iff ``host`` is one of the resolved trusted-frontend IPs.
|
||||
|
||||
Imported lazily so this module stays usable in unit tests that
|
||||
don't want to pull the whole auth module into scope.
|
||||
"""
|
||||
if not host:
|
||||
return False
|
||||
try:
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
try:
|
||||
trusted_ips = _resolve_trusted_bridge_ips()
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
return host in trusted_ips
|
||||
|
||||
|
||||
def shadowbroker_rate_limit_key(request: Any) -> str:
|
||||
"""slowapi key_func that is proxy-aware on trusted frontend peers only.
|
||||
|
||||
Behaviour matrix:
|
||||
|
||||
* Direct loopback / unknown peer → ``request.client.host``
|
||||
(identical to slowapi's default ``get_remote_address``).
|
||||
* Peer is a trusted frontend container AND ``X-Forwarded-For`` is
|
||||
present → first XFF entry (the actual operator).
|
||||
* Peer is a trusted frontend container but no XFF → fall back to
|
||||
``request.client.host`` (the bridge IP). One shared bucket for
|
||||
everyone in that case, same as before — but you only get there
|
||||
if the trusted frontend forgot to forward XFF, which it won't.
|
||||
"""
|
||||
peer = _client_host(request)
|
||||
if _is_trusted_frontend_peer(peer):
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is not None:
|
||||
xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For")
|
||||
if xff:
|
||||
first = _first_forwarded_for(xff)
|
||||
if first:
|
||||
return first
|
||||
# Untrusted peer (or trusted peer without XFF): match the original
|
||||
# get_remote_address behaviour byte-for-byte.
|
||||
return get_remote_address(request)
|
||||
|
||||
|
||||
limiter = Limiter(key_func=shadowbroker_rate_limit_key)
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
+132
-555
File diff suppressed because it is too large
Load Diff
@@ -7,15 +7,16 @@ py-modules = []
|
||||
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.9.81"
|
||||
version = "0.9.79"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"apscheduler==3.10.3",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
"cachetools==5.5.2",
|
||||
"cryptography>=46.0.7",
|
||||
"cloudscraper==1.2.71",
|
||||
"cryptography>=41.0.0",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.136.3",
|
||||
"fastapi==0.115.12",
|
||||
"feedparser==6.0.10",
|
||||
"httpx==0.28.1",
|
||||
"playwright==1.59.0",
|
||||
@@ -24,7 +25,7 @@ dependencies = [
|
||||
"pydantic-settings==2.8.1",
|
||||
"pystac-client==0.8.6",
|
||||
"python-dotenv==1.2.2",
|
||||
"requests==2.33.0",
|
||||
"requests==2.31.0",
|
||||
"PySocks==1.7.1",
|
||||
"reverse-geocoder==1.5.1",
|
||||
"sgp4==2.25",
|
||||
@@ -33,18 +34,17 @@ dependencies = [
|
||||
"paho-mqtt>=1.6.0,<2.0.0",
|
||||
"PyNaCl>=1.5.0",
|
||||
"slowapi==0.1.9",
|
||||
"starlette==1.0.1",
|
||||
"vaderSentiment>=3.3.0",
|
||||
"uvicorn==0.34.0",
|
||||
"yfinance==1.3.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["pytest>=9.0.3", "pytest-asyncio>=1.4.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
dev = ["pytest>=8.3.4", "pytest-asyncio==0.25.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
# The current backend carries historical style debt in large legacy modules.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.81 release.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.79 release.
|
||||
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
||||
|
||||
[tool.black]
|
||||
|
||||
@@ -82,40 +82,9 @@ async def api_get_keys_meta(request: Request):
|
||||
return get_env_path_info()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/operator-handle",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("60/minute")
|
||||
async def api_get_operator_handle(request: Request):
|
||||
"""Round 7a: return the per-install operator handle so the frontend
|
||||
can include it in browser-direct third-party API calls (Wikipedia /
|
||||
Wikidata via lib/wikimediaClient). The handle is auto-generated on
|
||||
first use; operators can override it via the OPERATOR_HANDLE setting
|
||||
or the env var of the same name.
|
||||
|
||||
Gated on local-operator: legitimate browser usage goes through the
|
||||
Next.js proxy which auto-attaches the admin key; remote scanners get
|
||||
403. The handle itself isn't a secret (it's sent to every third-party
|
||||
API the operator touches), but admin-gating it matches the rest of
|
||||
the settings endpoints and follows least-privilege.
|
||||
"""
|
||||
from services.network_utils import get_operator_handle
|
||||
return {"handle": get_operator_handle()}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@router.get("/api/settings/news-feeds")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_news_feeds(request: Request):
|
||||
"""Issue #252 (tg12): the curated feed inventory is configuration
|
||||
state, not a public data feed. Gated on local-operator so the
|
||||
Tauri shell, the Docker bridge frontend, and any caller with an
|
||||
admin key all see the full list; anonymous LAN/internet callers
|
||||
can no longer enumerate operator source URLs.
|
||||
"""
|
||||
from services.news_feed_config import get_feeds
|
||||
return get_feeds()
|
||||
|
||||
@@ -149,18 +118,9 @@ async def api_reset_news_feeds(request: Request):
|
||||
@router.get("/api/settings/node")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_node_settings(request: Request):
|
||||
"""Issue #243 (tg12): node_mode and node_enabled are operational
|
||||
posture. Anonymous callers receive an empty stub; authenticated
|
||||
callers (local-operator or admin/scoped token) see the full
|
||||
state. See the canonical handler in backend/main.py for the full
|
||||
rationale.
|
||||
"""
|
||||
import asyncio
|
||||
from auth import _scoped_view_authenticated
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
if not _scoped_view_authenticated(request, "node"):
|
||||
return {}
|
||||
return {
|
||||
**data,
|
||||
"node_mode": _current_node_mode(),
|
||||
@@ -250,19 +210,9 @@ async def api_set_meshtastic_mqtt_settings(request: Request, body: MeshtasticMqt
|
||||
return _meshtastic_runtime_snapshot()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/timemachine",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@router.get("/api/settings/timemachine")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_timemachine_settings(request: Request):
|
||||
"""Issue #253 (tg12): archival-capture posture is operationally
|
||||
sensitive — it tells a remote caller whether this deployment is
|
||||
retaining replayable historical surveillance data. Gated on
|
||||
local-operator so the Tauri shell and Docker bridge frontend
|
||||
still see the toggle state, but anonymous LAN/internet callers
|
||||
can no longer fingerprint Time Machine state.
|
||||
"""
|
||||
import asyncio
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
|
||||
+42
-202
@@ -18,12 +18,6 @@ from auth import require_local_operator, require_openclaw_or_local
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import latest_data as _latest_data
|
||||
|
||||
|
||||
|
||||
def _ai_intel_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("ai-intel")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
@@ -453,7 +447,7 @@ async def ai_satellite_images(
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=10,
|
||||
headers={"User-Agent": _ai_intel_user_agent()},
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (ai-intel)"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
features = resp.json().get("features", [])
|
||||
@@ -1590,7 +1584,7 @@ async def agent_tool_manifest(request: Request):
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.81",
|
||||
"version": "0.9.79",
|
||||
"access_tier": access_tier,
|
||||
"available_commands": available_commands,
|
||||
"transport": {
|
||||
@@ -2226,7 +2220,7 @@ async def api_capabilities(request: Request):
|
||||
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.81",
|
||||
"version": "0.9.79",
|
||||
"auth": {
|
||||
"method": "HMAC-SHA256",
|
||||
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
||||
@@ -2521,85 +2515,45 @@ async def api_capabilities(request: Request):
|
||||
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
||||
# These endpoints manage the HMAC secret itself, so they MUST require
|
||||
# local operator access to prevent privilege escalation.
|
||||
#
|
||||
# Issue #302 (tg12): pre-fix, GET /api/ai/connect-info had two problems:
|
||||
#
|
||||
# 1. ``?reveal=true`` made the full secret travel through every operator
|
||||
# page-load that opened the Connect modal. Even gated to
|
||||
# ``require_local_operator``, that put the secret into browser
|
||||
# history, dev-tools network panels, browser disk caches, HAR
|
||||
# exports, and screen captures. Every time the modal opened.
|
||||
#
|
||||
# 2. The same GET endpoint auto-bootstrapped (generated + persisted)
|
||||
# the secret on first read. Side effects on a GET are a footgun:
|
||||
# browser prefetchers, mirror tools, and casual curl-from-history
|
||||
# would all silently mint+persist a fresh secret. (Gated, but
|
||||
# still surprising — and noisy in the audit log.)
|
||||
#
|
||||
# Resolution:
|
||||
#
|
||||
# GET /api/ai/connect-info — always returns the MASKED
|
||||
# secret. No ?reveal param.
|
||||
# No auto-bootstrap; if the
|
||||
# secret is missing,
|
||||
# ``hmac_secret_set: false``
|
||||
# tells the frontend to call
|
||||
# /bootstrap.
|
||||
#
|
||||
# POST /api/ai/connect-info/bootstrap — NEW. Generates + persists the
|
||||
# secret if missing. Idempotent.
|
||||
# Returns metadata only, never
|
||||
# the full secret.
|
||||
#
|
||||
# POST /api/ai/connect-info/reveal — NEW. Returns the full secret in
|
||||
# the body with strict
|
||||
# ``Cache-Control: no-store,
|
||||
# no-cache, must-revalidate``
|
||||
# + ``Pragma: no-cache`` so
|
||||
# it does not land in browser
|
||||
# caches. POST means it does
|
||||
# not land in URL history.
|
||||
#
|
||||
# POST /api/ai/connect-info/regenerate — keeps existing one-time-reveal
|
||||
# behavior (regenerate IS a
|
||||
# deliberate destructive action
|
||||
# the operator triggered, so
|
||||
# displaying the new secret
|
||||
# once is the only path that
|
||||
# makes the operation useful).
|
||||
# Same no-store headers added.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Cache-Control headers that should accompany every response carrying the
|
||||
# full HMAC secret. Reused across the reveal + regenerate endpoints so a
|
||||
# future refactor that splits or renames them can't forget the headers.
|
||||
_NO_STORE_HEADERS = {
|
||||
"Cache-Control": "no-store, no-cache, must-revalidate, private",
|
||||
"Pragma": "no-cache",
|
||||
"Expires": "0",
|
||||
}
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
|
||||
|
||||
def _mask_hmac_secret(secret: str) -> str:
|
||||
"""Return a fingerprint-style mask (first6 + bullets + last4) suitable
|
||||
for display in the UI before the operator clicks Reveal."""
|
||||
if not secret:
|
||||
return ""
|
||||
if len(secret) > 10:
|
||||
return secret[:6] + "••••••••" + secret[-4:]
|
||||
return "••••••••"
|
||||
|
||||
|
||||
def _connect_info_metadata(settings) -> dict:
|
||||
"""Return everything the Connect modal needs EXCEPT the secret itself.
|
||||
|
||||
Shared between GET /api/ai/connect-info (where the full secret is
|
||||
masked) and POST /api/ai/connect-info/bootstrap (where the operator
|
||||
just generated a secret but we don't return it inline — they have to
|
||||
call /reveal to see it).
|
||||
The HMAC secret is masked by default. Pass ?reveal=true to see the full key.
|
||||
Private keys are NEVER returned.
|
||||
"""
|
||||
import os
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
|
||||
# Auto-generate if not set
|
||||
if not hmac_secret:
|
||||
hmac_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", hmac_secret)
|
||||
# Clear settings cache so next read picks up the new value
|
||||
get_settings.cache_clear()
|
||||
|
||||
masked = hmac_secret[:6] + "••••••••" + hmac_secret[-4:] if len(hmac_secret) > 10 else "••••••••"
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret if reveal else masked,
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": True,
|
||||
"auto_generated_this_call": not bool(settings.OPENCLAW_HMAC_SECRET or ""),
|
||||
"notes": [
|
||||
"If no HMAC secret exists yet, this endpoint bootstraps one and persists it to .env.",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
"access_tier": access_tier,
|
||||
"trust_model": {
|
||||
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
||||
@@ -2653,120 +2607,10 @@ def _connect_info_metadata(settings) -> dict:
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
|
||||
The HMAC secret is always returned as a fingerprint mask
|
||||
(``first6 + bullets + last4``); the full value is only ever served by
|
||||
``POST /api/ai/connect-info/reveal`` (see #302). When the secret has
|
||||
not been bootstrapped yet, ``hmac_secret_set`` is false and the
|
||||
frontend should call ``POST /api/ai/connect-info/bootstrap``.
|
||||
|
||||
Private keys are NEVER returned.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": False,
|
||||
"notes": [
|
||||
"Call POST /api/ai/connect-info/bootstrap to mint a secret on first use.",
|
||||
"Call POST /api/ai/connect-info/reveal to see the full secret (no-store).",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
**_connect_info_metadata(settings),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/bootstrap", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def bootstrap_hmac_secret(request: Request):
|
||||
"""Mint and persist the OpenClaw HMAC secret if it isn't already set.
|
||||
|
||||
Idempotent: if a secret already exists, returns ``generated: false``
|
||||
and leaves the existing secret untouched. Never returns the secret
|
||||
value in the response body — the operator calls
|
||||
``POST /api/ai/connect-info/reveal`` to see it.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
existing = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if existing:
|
||||
return {
|
||||
"ok": True,
|
||||
"generated": False,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(existing),
|
||||
"detail": "HMAC secret already configured. Use /reveal to see it.",
|
||||
}
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"generated": True,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret generated. Call /reveal to copy it into your OpenClaw config.",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/reveal", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def reveal_hmac_secret(request: Request):
|
||||
"""Return the full HMAC secret in the response body.
|
||||
|
||||
POST (not GET) so the secret never lands in URL history, access logs,
|
||||
or browser visit history. Strict ``Cache-Control: no-store`` headers
|
||||
prevent intermediaries from persisting the response. Returns 404 if
|
||||
no secret has been bootstrapped — the frontend should call
|
||||
``POST /api/ai/connect-info/bootstrap`` first.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if not hmac_secret:
|
||||
raise HTTPException(
|
||||
404,
|
||||
"No HMAC secret configured. Call POST /api/ai/connect-info/bootstrap first.",
|
||||
)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working.
|
||||
|
||||
Returns the new secret in the response body — this is the only
|
||||
operation where the full secret travels back through the response,
|
||||
because regenerating IS a deliberate destructive action the operator
|
||||
triggered and they need to see the new value once to update their
|
||||
OpenClaw configuration. Strict ``Cache-Control: no-store`` headers
|
||||
keep it from being persisted by browser caches, proxies, or HAR
|
||||
capture tooling.
|
||||
"""
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working."""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
@@ -2774,15 +2618,11 @@ async def regenerate_hmac_secret(request: Request):
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
}
|
||||
|
||||
|
||||
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
||||
|
||||
@@ -165,13 +165,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
|
||||
|
||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
||||
# Round 7a: per-install operator handle. Mozilla/5.0 prefix retained
|
||||
# because many CCTV endpoints sniff for a browser-like prefix.
|
||||
from services.network_utils import outbound_user_agent
|
||||
headers = {
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||
**profile.headers,
|
||||
}
|
||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)", **profile.headers}
|
||||
range_header = request.headers.get("range")
|
||||
if range_header:
|
||||
headers["Range"] = range_header
|
||||
|
||||
+12
-215
@@ -1,7 +1,6 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
from fastapi import APIRouter, Request, Response, Query, Depends
|
||||
@@ -9,7 +8,7 @@ from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from limiter import limiter
|
||||
from auth import require_admin, require_local_operator
|
||||
from services.data_fetcher import update_all_data
|
||||
from services.data_fetcher import get_latest_data, update_all_data
|
||||
import orjson
|
||||
import json as json_mod
|
||||
|
||||
@@ -31,14 +30,6 @@ class LayerUpdate(BaseModel):
|
||||
layers: dict[str, bool]
|
||||
|
||||
|
||||
class LiveUamapOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
class PredictionMarketsOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
_LAST_VIEWPORT_UPDATE: tuple | None = None
|
||||
_LAST_VIEWPORT_UPDATE_TS = 0.0
|
||||
_VIEWPORT_UPDATE_LOCK = threading.Lock()
|
||||
@@ -107,88 +98,6 @@ def _current_etag(prefix: str = "") -> str:
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||
# filtering. Light reference layers (datacenters, military_bases,
|
||||
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||
# in these sets — they ship world-scale even when bounds are supplied so
|
||||
# panning never reveals an "empty world" of static infrastructure.
|
||||
#
|
||||
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||
# is byte-for-byte identical to the pre-#288 behavior.
|
||||
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"sigint",
|
||||
"trains",
|
||||
)
|
||||
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"gdelt",
|
||||
"firms_fires",
|
||||
"kiwisdr",
|
||||
"scanners",
|
||||
"psk_reporter",
|
||||
)
|
||||
|
||||
|
||||
def _has_full_bbox(s, w, n, e) -> bool:
|
||||
return None not in (s, w, n, e)
|
||||
|
||||
|
||||
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||
"""Quantize bbox to 1° before mixing into the ETag.
|
||||
|
||||
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||
quantizing here means small mouse drags don't blow the ETag cache
|
||||
on the client. Full-world bounds collapse to a single suffix.
|
||||
"""
|
||||
if not _has_full_bbox(s, w, n, e):
|
||||
return ""
|
||||
try:
|
||||
ss = math.floor(float(s))
|
||||
ww = math.floor(float(w))
|
||||
nn = math.ceil(float(n))
|
||||
ee = math.ceil(float(e))
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
# If the requested window covers basically the whole world, treat it as
|
||||
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||
# same ETag and benefit from the existing 304 path.
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return ""
|
||||
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||
|
||||
|
||||
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||
s: float, w: float, n: float, e: float) -> dict:
|
||||
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||
|
||||
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||
pad and handles antimeridian crossings.
|
||||
"""
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||
# guarantees the response matches the no-params shape.
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return payload
|
||||
for key in heavy_keys:
|
||||
items = payload.get(key)
|
||||
if not isinstance(items, list) or not items:
|
||||
continue
|
||||
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||
return payload
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
@@ -395,95 +304,6 @@ async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.get("/api/liveuamap/scraper-status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_liveuamap_scraper_status():
|
||||
"""Whether LiveUAMap Playwright may run (Windows needs UI opt-in unless env forces)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status
|
||||
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
@router.post("/api/liveuamap/scraper-opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_liveuamap_scraper_opt_in(body: LiveUamapOptInUpdate, request: Request):
|
||||
"""Persist operator consent for LiveUAMap scraper (#348)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status, set_liveuamap_ui_opt_in
|
||||
|
||||
set_liveuamap_ui_opt_in(body.opted_in)
|
||||
if body.opted_in:
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
if is_any_active("global_incidents"):
|
||||
threading.Thread(target=_run_liveuamap_refresh, daemon=True).start()
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
def _run_liveuamap_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.geo import update_liveuamap
|
||||
|
||||
update_liveuamap()
|
||||
except Exception as e:
|
||||
logger.warning("LiveUAMap refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
@router.get("/api/prediction-markets/status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_prediction_markets_status():
|
||||
"""Whether Polymarket/Kalshi fetches and news market correlation are enabled."""
|
||||
from services.prediction_markets_settings import prediction_markets_status
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
@router.post("/api/prediction-markets/opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_prediction_markets_opt_in(body: PredictionMarketsOptInUpdate, request: Request):
|
||||
"""Enable or disable prediction market fetches + intercept story correlation."""
|
||||
from services.config import get_settings
|
||||
from services.prediction_markets_settings import (
|
||||
prediction_markets_status,
|
||||
set_prediction_markets_ui_opt_in,
|
||||
)
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
set_prediction_markets_ui_opt_in(body.opted_in)
|
||||
_write_env_value("PREDICTION_MARKETS_ENABLED", "true" if body.opted_in else "false")
|
||||
os.environ["PREDICTION_MARKETS_ENABLED"] = "true" if body.opted_in else "false"
|
||||
get_settings.cache_clear()
|
||||
|
||||
if body.opted_in:
|
||||
threading.Thread(target=_run_prediction_markets_refresh, daemon=True).start()
|
||||
else:
|
||||
threading.Thread(target=_run_prediction_markets_disable, daemon=True).start()
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
def _run_prediction_markets_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
fetch_prediction_markets()
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
def _run_prediction_markets_disable() -> None:
|
||||
try:
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
with _data_lock:
|
||||
latest_data["prediction_markets"] = []
|
||||
latest_data["trending_markets"] = []
|
||||
_mark_fresh("prediction_markets")
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets disable cleanup failed: %s", e)
|
||||
|
||||
|
||||
@router.post("/api/layers", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def update_layers(update: LayerUpdate, request: Request):
|
||||
@@ -554,17 +374,7 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
@router.get("/api/live-data")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data(request: Request):
|
||||
etag = _current_etag(prefix="live|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
payload = get_latest_data_deepcopy_snapshot()
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload)),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
return get_latest_data()
|
||||
|
||||
|
||||
@router.get("/api/bootstrap/critical")
|
||||
@@ -669,14 +479,13 @@ async def bootstrap_critical(request: Request):
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -716,11 +525,6 @@ async def live_data_fast(
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
@@ -729,13 +533,12 @@ async def live_data_fast(
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
):
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -789,12 +592,6 @@ async def live_data_slow(
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Static reference layers (datacenters, military bases,
|
||||
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||
# hides the infrastructure overlay the operator already has on screen.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
media_type="application/json",
|
||||
|
||||
@@ -8,7 +8,7 @@ from services.data_fetcher import get_latest_data
|
||||
from services.schemas import HealthResponse
|
||||
import os
|
||||
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.81")
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.79")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -59,12 +59,6 @@ async def health_check(request: Request):
|
||||
# when the SPKI-pinned fallback is in effect. The data plane keeps
|
||||
# flowing (this is by design — see ais_proxy.js comments) but observers
|
||||
# who care about MITM-protection posture deserve a visible signal.
|
||||
#
|
||||
# Plus connectivity health (added 2026-05-23 when stream.aisstream.io
|
||||
# went fully offline): ``connected`` tells the frontend whether ship
|
||||
# data is actually flowing. When false, a banner explains that ships
|
||||
# are unavailable due to an upstream outage — better than the user
|
||||
# silently seeing an empty ocean and assuming we broke something.
|
||||
ais_status: dict = {}
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
@@ -75,15 +69,6 @@ async def health_check(request: Request):
|
||||
# Don't override a worse top-level status if SLOs already failed,
|
||||
# but escalate ok -> degraded so the field surfaces in dashboards.
|
||||
top_status = "degraded"
|
||||
# AIS_API_KEY not configured is "feature off", not "system broken" —
|
||||
# so we only escalate when the operator opted into AIS (key set) AND
|
||||
# the stream is currently offline.
|
||||
if (
|
||||
os.environ.get("AIS_API_KEY")
|
||||
and ais_status.get("connected") is False
|
||||
and top_status == "ok"
|
||||
):
|
||||
top_status = "degraded"
|
||||
|
||||
return {
|
||||
"status": top_status,
|
||||
|
||||
@@ -223,21 +223,11 @@ async def oracle_markets_more(request: Request, category: str = "NEWS", offset:
|
||||
"has_more": offset + limit < len(cat_markets), "total": len(cat_markets)}
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@router.post("/api/mesh/oracle/resolve")
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve(request: Request):
|
||||
"""Resolve a prediction market.
|
||||
|
||||
Issue #240 (tg12): requires admin authentication. The
|
||||
``mesh_write_exempt`` decorator below is **metadata only** — it tags
|
||||
the route as not requiring a mesh signed-write envelope, it does
|
||||
NOT itself enforce caller authorization. The ``Depends(require_admin)``
|
||||
on the route decorator is what actually gates access.
|
||||
"""
|
||||
"""Resolve a prediction market."""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
body = await request.json()
|
||||
market_title = body.get("market_title", "")
|
||||
@@ -337,18 +327,11 @@ async def oracle_predictions(request: Request, node_id: str = ""):
|
||||
active_predictions, authenticated=_scoped_view_authenticated(request, "mesh.audit"))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve-stakes",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@router.post("/api/mesh/oracle/resolve-stakes")
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve_stakes(request: Request):
|
||||
"""Resolve all expired stake contests.
|
||||
|
||||
Issue #241 (tg12): requires admin authentication. See the note on
|
||||
``oracle_resolve`` above — ``mesh_write_exempt`` is metadata only.
|
||||
"""
|
||||
"""Resolve all expired stake contests."""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
resolutions = oracle_ledger.resolve_expired_stakes()
|
||||
return {"ok": True, "resolutions": resolutions, "count": len(resolutions)}
|
||||
|
||||
@@ -55,12 +55,6 @@ def _hydrate_gate_store_from_chain(events: list) -> int:
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def infonet_peer_push(request: Request):
|
||||
@@ -88,68 +82,9 @@ async def infonet_peer_push(request: Request):
|
||||
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": []}
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@router.post("/api/mesh/dm/replicate-envelope")
|
||||
@limiter.limit("60/minute")
|
||||
async def dm_replicate_envelope(request: Request):
|
||||
"""Accept a DM envelope replicated from a peer relay (cross-node mailbox).
|
||||
|
||||
Companion endpoint to ``DMRelay.replicate_to_peers`` (outbound, in
|
||||
``mesh_dm_relay.py``). The sender's relay POSTs an encrypted DM
|
||||
envelope here after a successful local ``deposit``; this endpoint
|
||||
re-enforces the per-(sender, recipient) anti-spam cap and stores
|
||||
the envelope in the local mailbox if accepted.
|
||||
|
||||
The cap is the network rule: a hostile sender's relay can spool
|
||||
extras locally, but every honest peer enforces the cap on inbound
|
||||
replication. Recipient polling from any honest peer therefore
|
||||
never sees more than ``MESH_DM_PENDING_PER_SENDER_LIMIT`` pending
|
||||
from any one sender, no matter how many spam attempts were tried.
|
||||
|
||||
Same HMAC auth pattern as ``infonet_peer_push`` and ``gate_peer_push``.
|
||||
"""
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
# DM envelopes are bounded by MESH_DM_MAX_MSG_BYTES + envelope
|
||||
# overhead; 64 KB is a generous ceiling.
|
||||
if int(content_length) > 65_536:
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Request body too large (max 64KB)"}',
|
||||
status_code=413, media_type="application/json",
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
body_bytes = await request.body()
|
||||
if not _verify_peer_push_hmac(request, body_bytes):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid or missing peer HMAC"}',
|
||||
status_code=403, media_type="application/json",
|
||||
)
|
||||
try:
|
||||
body = json_mod.loads(body_bytes or b"{}")
|
||||
except (ValueError, TypeError):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid JSON body"}',
|
||||
status_code=400, media_type="application/json",
|
||||
)
|
||||
envelope = body.get("envelope")
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
|
||||
originating_peer = _peer_hmac_url_from_request(request) or ""
|
||||
|
||||
from services.mesh.mesh_dm_relay import dm_relay
|
||||
result = dm_relay.accept_replica(
|
||||
envelope=envelope,
|
||||
originating_peer_url=originating_peer,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/mesh/gate/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def gate_peer_push(request: Request):
|
||||
|
||||
@@ -65,7 +65,6 @@ from services.mesh.mesh_signed_events import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
_INFONET_SYNC_RATE_LIMIT = "600/minute"
|
||||
|
||||
|
||||
def _signed_body(request: Request) -> dict[str, Any]:
|
||||
@@ -264,19 +263,6 @@ def _redact_public_event(event: dict) -> dict:
|
||||
return _redact_vote_gate(_redact_key_rotate_payload(_redact_gate_metadata(event)))
|
||||
|
||||
|
||||
def _infonet_private_transport_required() -> bool:
|
||||
import main as _m
|
||||
|
||||
return bool(_m._infonet_private_transport_required())
|
||||
|
||||
|
||||
def _infonet_sync_response_events(events: list[dict], request=None) -> list[dict]:
|
||||
"""Build the sync event surface for the current transport policy."""
|
||||
import main as _m
|
||||
|
||||
return _m._infonet_sync_response_events(events, request=request)
|
||||
|
||||
|
||||
def _trusted_gate_reply_to(event: dict) -> str:
|
||||
if not isinstance(event, dict):
|
||||
return ""
|
||||
@@ -588,12 +574,6 @@ def _hydrate_gate_store_from_chain(events: list[dict]) -> int:
|
||||
pass
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list[dict]) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
# --- Safe type helpers ---
|
||||
|
||||
def _safe_int(val, default=0):
|
||||
@@ -1551,7 +1531,7 @@ async def infonet_locator(request: Request, limit: int = Query(32, ge=4, le=128)
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/sync")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
@limiter.limit("30/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.PEER_GOSSIP)
|
||||
async def infonet_sync_post(
|
||||
request: Request,
|
||||
@@ -1604,7 +1584,8 @@ async def infonet_sync_post(
|
||||
elif matched_hash == GENESIS_HASH and len(locator) > 1:
|
||||
forked = True
|
||||
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
|
||||
response = {
|
||||
"events": events,
|
||||
@@ -1665,7 +1646,7 @@ async def mesh_rns_status(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/mesh/infonet/sync")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
@limiter.limit("30/minute")
|
||||
async def infonet_sync(
|
||||
request: Request,
|
||||
after_hash: str = "",
|
||||
@@ -1703,7 +1684,8 @@ async def infonet_sync(
|
||||
)
|
||||
base = after_hash or GENESIS_HASH
|
||||
events = infonet.get_events_after(base, limit=limit)
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
return {
|
||||
"events": events,
|
||||
"after_hash": base,
|
||||
@@ -1742,7 +1724,6 @@ async def infonet_ingest(request: Request):
|
||||
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@@ -2298,12 +2279,6 @@ async def infonet_event(request: Request, event_id: str):
|
||||
)
|
||||
return _strip_gate_for_access(evt, access)
|
||||
return {"ok": False, "detail": "Event not found"}
|
||||
if evt.get("event_type") == "dm_message":
|
||||
return await _private_plane_refusal_response(
|
||||
request,
|
||||
status_code=403,
|
||||
payload=_private_plane_access_denied_payload(),
|
||||
)
|
||||
if evt.get("event_type") == "gate_message":
|
||||
gate_id = str(evt.get("payload", {}).get("gate", "") or evt.get("gate", "") or "").strip()
|
||||
access = _verify_gate_access(request, gate_id) if gate_id else ""
|
||||
@@ -2328,7 +2303,7 @@ async def infonet_node_events(
|
||||
from services.mesh.mesh_hashchain import infonet
|
||||
|
||||
events = infonet.get_events_by_node(node_id, limit=limit)
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
events = _redact_public_node_history(
|
||||
events,
|
||||
@@ -2353,7 +2328,7 @@ async def infonet_events_by_type(
|
||||
else:
|
||||
events = list(reversed(infonet.events))
|
||||
events = events[offset : offset + limit]
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
return {
|
||||
"events": events,
|
||||
|
||||
+10
-120
@@ -85,63 +85,7 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ───
|
||||
@router.get("/api/wikipedia/summary")
|
||||
@limiter.limit("60/minute")
|
||||
def api_wikipedia_summary(
|
||||
request: Request,
|
||||
title: str = Query(..., min_length=1, max_length=256),
|
||||
):
|
||||
"""Proxy Wikipedia REST summaries through the self-hosted backend."""
|
||||
from services.region_dossier import fetch_wikipedia_page_summary
|
||||
|
||||
summary = fetch_wikipedia_page_summary(title)
|
||||
if summary is None:
|
||||
return JSONResponse(status_code=404, content={"detail": "not_found"})
|
||||
return summary
|
||||
|
||||
|
||||
class WikidataSparqlRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
@router.post("/api/wikidata/sparql")
|
||||
@limiter.limit("30/minute")
|
||||
def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest):
|
||||
"""Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org."""
|
||||
from services.region_dossier import fetch_wikidata_sparql_bindings
|
||||
|
||||
q = (body.query or "").strip()
|
||||
if len(q) > 12_000:
|
||||
raise HTTPException(400, "SPARQL query too large")
|
||||
bindings = fetch_wikidata_sparql_bindings(q)
|
||||
return {"bindings": bindings}
|
||||
|
||||
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
# operator-only helpers — they MUST NOT be callable by anonymous remote
|
||||
# users, because:
|
||||
#
|
||||
# * /api/sentinel/token — caller supplies their own Sentinel client_id +
|
||||
# client_secret. Without operator gating, the backend becomes a free
|
||||
# anonymous OAuth-mint relay for any Copernicus account.
|
||||
# * /api/sentinel/tile — same shape as the token route but for tile
|
||||
# imagery. Without gating, the backend acts as an anonymous quota and
|
||||
# bandwidth relay for Sentinel Hub Process API calls.
|
||||
# * /api/sentinel2/search — hits the Planetary Computer STAC search API
|
||||
# and falls back to Esri imagery. No caller credentials are involved,
|
||||
# but the route is still an anonymous external-search relay. We gate
|
||||
# it the same way for consistency with the rest of the operator-only
|
||||
# helper surface.
|
||||
#
|
||||
# Gating is via require_local_operator (loopback / bridge / admin key),
|
||||
# matching the same allowlist already used by /api/region-dossier and
|
||||
# the other operator helpers further up this file. Single-operator nodes
|
||||
# see no behavior change — their dashboard already lives on loopback or
|
||||
# the trusted Docker bridge, so it still resolves.
|
||||
@router.get("/api/sentinel2/search", dependencies=[Depends(require_local_operator)])
|
||||
@router.get("/api/sentinel2/search")
|
||||
@limiter.limit("30/minute")
|
||||
def api_sentinel2_search(
|
||||
request: Request,
|
||||
@@ -153,60 +97,18 @@ def api_sentinel2_search(
|
||||
return search_sentinel2_scene(lat, lng)
|
||||
|
||||
|
||||
# Issue #298 (tg12): Sentinel credentials moved server-side
|
||||
# ---------------------------------------------------------------------------
|
||||
# Previously the frontend kept Copernicus CDSE client_id + client_secret in
|
||||
# browser localStorage / sessionStorage and forwarded them on every tile
|
||||
# request through this proxy. That exposed real third-party credentials to
|
||||
# any same-origin script (XSS, malicious browser extension, dev-tools HAR
|
||||
# export).
|
||||
#
|
||||
# Resolution order (first match wins):
|
||||
# 1. Request body — kept for back-compat. A small number of legacy
|
||||
# operator setups may still post credentials; we don't break them.
|
||||
# 2. Backend .env — SENTINEL_CLIENT_ID / SENTINEL_CLIENT_SECRET, managed
|
||||
# through the existing /api/settings/api-keys flow (admin-gated).
|
||||
#
|
||||
# The frontend in ``sentinelHub.ts`` no longer reads browser storage and no
|
||||
# longer forwards credentials — every dashboard request now lands in (2).
|
||||
# The require_local_operator gate (added in #303/PR #303) stays — both layers
|
||||
# are independent: the gate blocks anonymous callers, the env fallback lets
|
||||
# legitimate (gated) callers omit credentials from the body.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _resolve_sentinel_credentials(body_id: str, body_secret: str) -> tuple[str, str]:
|
||||
"""Return (client_id, client_secret) using body values when present,
|
||||
otherwise falling back to backend .env. Empty strings if neither is set."""
|
||||
import os as _os
|
||||
cid = (body_id or "").strip() or (_os.environ.get("SENTINEL_CLIENT_ID", "") or "").strip()
|
||||
csec = (body_secret or "").strip() or (_os.environ.get("SENTINEL_CLIENT_SECRET", "") or "").strip()
|
||||
return cid, csec
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token", dependencies=[Depends(require_local_operator)])
|
||||
@router.post("/api/sentinel/token")
|
||||
@limiter.limit("60/minute")
|
||||
async def api_sentinel_token(request: Request):
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block).
|
||||
|
||||
Credentials are resolved by ``_resolve_sentinel_credentials`` — body
|
||||
fields are honored for back-compat, otherwise the backend .env values
|
||||
populated through ``/api/settings/api-keys`` are used.
|
||||
"""
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block)."""
|
||||
import requests as req
|
||||
body = await request.body()
|
||||
from urllib.parse import parse_qs
|
||||
params = parse_qs(body.decode("utf-8"))
|
||||
body_id = params.get("client_id", [""])[0]
|
||||
body_secret = params.get("client_secret", [""])[0]
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
client_id = params.get("client_id", [""])[0]
|
||||
client_secret = params.get("client_secret", [""])[0]
|
||||
if not client_id or not client_secret:
|
||||
# Friendly, non-hostile error — points the operator at the place
|
||||
# they configure other API keys instead of just saying "required".
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
raise HTTPException(400, "client_id and client_secret required")
|
||||
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||
try:
|
||||
resp = await asyncio.to_thread(req.post, token_url,
|
||||
@@ -250,7 +152,7 @@ import os as _os
|
||||
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/tile", dependencies=[Depends(require_local_operator)])
|
||||
@router.post("/api/sentinel/tile")
|
||||
@limiter.limit("300/minute")
|
||||
async def api_sentinel_tile(request: Request):
|
||||
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
||||
@@ -261,11 +163,8 @@ async def api_sentinel_tile(request: Request):
|
||||
except Exception:
|
||||
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
||||
|
||||
# Issue #298: same resolution order as /api/sentinel/token — body
|
||||
# values for back-compat, otherwise backend .env.
|
||||
body_id = body.get("client_id", "")
|
||||
body_secret = body.get("client_secret", "")
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
client_id = body.get("client_id", "")
|
||||
client_secret = body.get("client_secret", "")
|
||||
preset = body.get("preset", "TRUE-COLOR")
|
||||
date_str = body.get("date", "")
|
||||
z = body.get("z", 0)
|
||||
@@ -273,16 +172,7 @@ async def api_sentinel_tile(request: Request):
|
||||
y = body.get("y", 0)
|
||||
|
||||
if not client_id or not client_secret or not date_str:
|
||||
# Distinguish "no creds" from "no date" so the operator knows
|
||||
# what to fix. Same friendly pointer as the /token route.
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
raise HTTPException(400, "date required")
|
||||
raise HTTPException(400, "client_id, client_secret, and date required")
|
||||
|
||||
now = _time.time()
|
||||
credential_fp = _credential_fingerprint(client_id, client_secret)
|
||||
|
||||
@@ -160,13 +160,8 @@ router = APIRouter()
|
||||
|
||||
# --- Constants ---
|
||||
|
||||
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||
# "is this on?" boolean. Transport choice, anonymous-mode state, and
|
||||
# the named privacy profile were all leaking actionable recon to
|
||||
# unauthenticated callers and are now gated behind authenticated reads.
|
||||
# See the matching block in backend/main.py for the full rationale.
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||
|
||||
@@ -20,17 +20,7 @@ OUT_PATH = Path(__file__).parent.parent / "data" / "power_plants.json"
|
||||
|
||||
def main() -> None:
|
||||
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
||||
# Round 7a: release-time data refresher. Uses the per-operator UA if
|
||||
# available, otherwise a release-script-specific identifier. This
|
||||
# script is run by the maintainer at release time, NOT at runtime,
|
||||
# so an aggregate UA is acceptable; we still use the helper so the
|
||||
# behavior matches the rest of the project.
|
||||
try:
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua = outbound_user_agent("release-script-power-plants")
|
||||
except Exception:
|
||||
ua = "operator-release-script (purpose: power-plants)"
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
|
||||
|
||||
@@ -167,11 +167,6 @@ def cmd_hash(args: argparse.Namespace) -> int:
|
||||
print("")
|
||||
print("Updater pin:")
|
||||
print(f"MESH_UPDATE_SHA256={digest}")
|
||||
print("")
|
||||
print("Release checklist:")
|
||||
print(" - add this digest to SHA256SUMS.txt for the GitHub release")
|
||||
print(" - add/update backend/data/release_digests.json for bundled updater verification")
|
||||
print(" - keep MESH_UPDATE_SHA256 available as the operator override path")
|
||||
return 0 if asset_matches else 2
|
||||
|
||||
|
||||
|
||||
@@ -92,37 +92,18 @@ SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token
|
||||
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
||||
|
||||
if [[ -n "$TEXT_FILES" ]]; then
|
||||
# Known-public exclusions: lines matching `<host-or-ip> ssh-<algo> <key>`
|
||||
# are SSH known_hosts entries — the host's PUBLIC fingerprint, which is
|
||||
# by definition safe to commit (the whole point of pinning known_hosts
|
||||
# is to publish the fingerprint widely so MITM is detectable). Filter
|
||||
# these out before flagging the file.
|
||||
KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA'
|
||||
|
||||
# Use grep with file list, skip missing/binary, limit output
|
||||
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
||||
if [[ -n "$CONTENT_HITS" ]]; then
|
||||
REAL_HITS=""
|
||||
REAL_REPORT=""
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
# Re-grep this file, but filter out known_hosts-style lines.
|
||||
FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true)
|
||||
if [[ -n "$FILE_HITS" ]]; then
|
||||
REAL_HITS+="$f"$'\n'
|
||||
REAL_REPORT+=" ${RED}$f${NC}"$'\n'
|
||||
# Show first 2 matching lines for context
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n'
|
||||
done < <(echo "$FILE_HITS" | head -2)
|
||||
fi
|
||||
done <<< "$CONTENT_HITS"
|
||||
if [[ -n "$REAL_HITS" ]]; then
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo -en "$REAL_REPORT"
|
||||
FOUND=1
|
||||
fi
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo "$CONTENT_HITS" | while read -r f; do
|
||||
echo -e " ${RED}$f${NC}"
|
||||
# Show first matching line for context
|
||||
grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | head -2 | while read -r line; do
|
||||
echo -e " ${YELLOW}$line${NC}"
|
||||
done
|
||||
done
|
||||
FOUND=1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -350,58 +350,19 @@ _proxy_process = None
|
||||
# path during an upstream cert outage. Surfaced via ais_proxy_status() for
|
||||
# /api/health.
|
||||
_proxy_status: dict = {}
|
||||
# Upstream-connectivity telemetry (added when stream.aisstream.io went fully
|
||||
# offline on 2026-05-23). ``_last_msg_at`` is the unix timestamp of the most
|
||||
# recent vessel message received from the proxy. ``_proxy_spawn_count`` is
|
||||
# how many times we've started the node proxy; combined with no recent
|
||||
# messages it tells us the proxy is respawning in a tight loop because the
|
||||
# upstream is unreachable. Surfaced via ais_proxy_status() so the operator
|
||||
# can see "AIS is dead" instead of guessing whether it's their map filter,
|
||||
# their api key, or upstream.
|
||||
_last_msg_at: float = 0.0
|
||||
_proxy_spawn_count: int = 0
|
||||
_VESSEL_TRAIL_INTERVAL_S = 120
|
||||
_VESSEL_TRAIL_MAX_POINTS = 240
|
||||
|
||||
|
||||
# How stale "last vessel message" can be before we consider the stream
|
||||
# disconnected. AISStream typically pushes multiple messages/sec, so a 60s
|
||||
# gap means something's wrong upstream or in transit.
|
||||
_AIS_CONNECTED_FRESHNESS_S = 60
|
||||
|
||||
|
||||
def ais_proxy_status() -> dict:
|
||||
"""Return a copy of the latest ais_proxy.js status + connectivity health.
|
||||
"""Return a copy of the latest ais_proxy.js status (issue #258).
|
||||
|
||||
Fields:
|
||||
* ``degraded_tls`` (bool, issue #258) — true when the proxy is using
|
||||
SPKI-pinned fallback because AISStream's cert expired.
|
||||
* ``connected`` (bool) — true when we received a vessel message in
|
||||
the last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||
* ``last_msg_age_seconds`` (int | None) — seconds since the last
|
||||
vessel message; None if we've never received one.
|
||||
* ``proxy_spawn_count`` (int) — how many times we've spawned the
|
||||
node proxy. Sustained increases here without ``connected`` means
|
||||
we're respawning in a tight loop because upstream is dead.
|
||||
|
||||
Returns an empty dict when called before the AIS subsystem starts
|
||||
(e.g. during tests or when no API key is set).
|
||||
Currently surfaces ``degraded_tls`` (bool) which is true when the
|
||||
proxy is using SPKI-pinned fallback because AISStream's cert expired.
|
||||
Returns an empty dict when no status has been received yet.
|
||||
"""
|
||||
with _vessels_lock:
|
||||
status = dict(_proxy_status)
|
||||
last = _last_msg_at
|
||||
spawns = _proxy_spawn_count
|
||||
|
||||
now = time.time()
|
||||
if last > 0:
|
||||
last_age = int(now - last)
|
||||
status["last_msg_age_seconds"] = last_age
|
||||
status["connected"] = last_age <= _AIS_CONNECTED_FRESHNESS_S
|
||||
else:
|
||||
status["last_msg_age_seconds"] = None
|
||||
status["connected"] = False
|
||||
status["proxy_spawn_count"] = spawns
|
||||
return status
|
||||
return dict(_proxy_status)
|
||||
|
||||
import os
|
||||
|
||||
@@ -627,10 +588,8 @@ def _ais_stream_loop():
|
||||
env=proxy_env,
|
||||
**popen_kwargs,
|
||||
)
|
||||
global _proxy_spawn_count
|
||||
with _vessels_lock:
|
||||
_proxy_process = process
|
||||
_proxy_spawn_count += 1
|
||||
|
||||
# Drain stderr in a background thread to prevent deadlock
|
||||
import threading
|
||||
@@ -686,15 +645,9 @@ def _ais_stream_loop():
|
||||
if not mmsi:
|
||||
continue
|
||||
|
||||
# Telemetry: stamp the timestamp of the most recent real
|
||||
# vessel message. ais_proxy_status() reads this to decide
|
||||
# whether the stream is currently "connected" — i.e. has
|
||||
# any data flowed in the last 60s.
|
||||
global _last_msg_at
|
||||
with _vessels_lock:
|
||||
_last_msg_at = time.time()
|
||||
if mmsi not in _vessels:
|
||||
_vessels[mmsi] = {"_updated": _last_msg_at}
|
||||
_vessels[mmsi] = {"_updated": time.time()}
|
||||
vessel = _vessels[mmsi]
|
||||
|
||||
# Update position from PositionReport or StandardClassBPositionReport
|
||||
|
||||
@@ -150,31 +150,6 @@ API_REGISTRY = [
|
||||
"url": "https://finnhub.io/register",
|
||||
"required": False,
|
||||
},
|
||||
# Issue #298 (tg12): Sentinel Hub / Copernicus Data Space Ecosystem
|
||||
# credentials were previously held in browser localStorage / sessionStorage
|
||||
# by the Settings panel. Moved server-side to the same .env-backed
|
||||
# store every other third-party API key lives in. The Sentinel proxy
|
||||
# routes (POST /api/sentinel/token, /tile) now fall back to these
|
||||
# env values when the request body omits credentials — see
|
||||
# backend/routers/tools.py for the resolution order.
|
||||
{
|
||||
"id": "sentinel_client_id",
|
||||
"env_key": "SENTINEL_CLIENT_ID",
|
||||
"name": "Sentinel Hub / Copernicus — Client ID",
|
||||
"description": "OAuth2 client ID for Copernicus Data Space Ecosystem (CDSE). Required for the Sentinel-2 imagery overlay and the right-click Sentinel-2 Intel Card. Sign in at dataspace.copernicus.eu and create OAuth credentials.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "sentinel_client_secret",
|
||||
"env_key": "SENTINEL_CLIENT_SECRET",
|
||||
"name": "Sentinel Hub / Copernicus — Client Secret",
|
||||
"description": "OAuth2 client secret paired with the Client ID above. Used by the backend to mint short-lived access tokens against the CDSE identity provider. Stored in the backend .env; never sent to the browser.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
|
||||
ALLOWED_ENV_KEYS = {
|
||||
|
||||
+173
-407
@@ -1,90 +1,46 @@
|
||||
"""
|
||||
Carrier Strike Group OSINT Tracker
|
||||
===================================
|
||||
Maintains estimated positions for US Navy Carrier Strike Groups with
|
||||
honest provenance and freshness signals.
|
||||
Scrapes multiple OSINT sources to maintain current estimated positions
|
||||
for US Navy Carrier Strike Groups. Updates on startup + 00:00 & 12:00 UTC.
|
||||
|
||||
Issues #244 / #245 / #246 (tg12 external audit):
|
||||
|
||||
The previous implementation baked a snapshot of USNI News Fleet &
|
||||
Marine Tracker positions (March 9, 2026) into the registry as
|
||||
``fallback_lat``/``fallback_lng`` and stamped ``updated = now()``
|
||||
every time the dossier was rendered. That presented stale editorial
|
||||
data as live state. It also persisted GDELT-derived positions to the
|
||||
on-disk cache with no freshness signal, so a single news mention from
|
||||
months ago could keep overriding the (already-stale) registry default
|
||||
indefinitely.
|
||||
|
||||
Architecture after this PR:
|
||||
|
||||
::
|
||||
|
||||
backend/data/carrier_seed.json read-only, shipped with image,
|
||||
used ONCE on first-ever startup
|
||||
to bootstrap carrier_cache.json.
|
||||
|
||||
backend/data/carrier_cache.json mutable, lives in the runtime data
|
||||
volume, written by every GDELT
|
||||
refresh + any future source.
|
||||
|
||||
Startup flow:
|
||||
|
||||
1. ``carrier_cache.json`` exists? → load it.
|
||||
2. Otherwise, copy ``carrier_seed.json`` → ``carrier_cache.json``,
|
||||
then load it. (This happens once, ever, per install.)
|
||||
3. Background: GDELT fetch runs. Any carrier mentioned in fresh news
|
||||
gets its entry replaced with the news-derived position.
|
||||
``position_source_at`` is set to the news article timestamp.
|
||||
|
||||
Freshness is a *labelling* decision, not an eviction decision:
|
||||
|
||||
- ``position_source_at`` within the configurable freshness window
|
||||
(default 14 days) → ``position_confidence = "recent"``.
|
||||
- Older than that → ``position_confidence = "stale"``.
|
||||
- Bootstrapped from the seed file (never updated) → ``"seed"``.
|
||||
- No cache entry at all (e.g. a carrier added to the registry after
|
||||
first install) → carrier renders at its homeport with
|
||||
``"homeport_default"``.
|
||||
|
||||
Carriers are never hidden, never teleported, never disappeared. The
|
||||
position the user sees is always the last position the system actually
|
||||
observed, with an honest "as-of" timestamp the UI can render however
|
||||
it likes. A year from now, the runtime cache reflects whatever this
|
||||
install has observed via GDELT — not the seed snapshot.
|
||||
Sources:
|
||||
1. GDELT News API — recent carrier movement headlines
|
||||
2. WikiVoyage / public port-call databases
|
||||
3. Fallback — last-known or static OSINT estimates
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import random
|
||||
import shutil
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Carrier registry: hull number → identity only.
|
||||
#
|
||||
# Issue #244 (tg12): the previous registry carried hard-coded
|
||||
# ``fallback_lat``/``fallback_lng`` that were dated editorial
|
||||
# snapshots from a 2026-03-09 article. Those fields are DELETED. The
|
||||
# registry is now identity + homeport only; positions are sourced
|
||||
# exclusively from carrier_cache.json (and via that, from the
|
||||
# bootstrap seed or live OSINT).
|
||||
# Carrier registry: hull number → metadata + fallback position
|
||||
# -----------------------------------------------------------------
|
||||
CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
|
||||
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
|
||||
# --- Bremerton, WA (Naval Base Kitsap) ---
|
||||
# Distinct pier positions along Sinclair Inlet so carriers don't stack
|
||||
"CVN-68": {
|
||||
"name": "USS Nimitz (CVN-68)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5535,
|
||||
"homeport_lng": -122.6400,
|
||||
"fallback_lat": 47.5535,
|
||||
"fallback_lng": -122.6400,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Maintenance)",
|
||||
},
|
||||
"CVN-76": {
|
||||
"name": "USS Ronald Reagan (CVN-76)",
|
||||
@@ -92,14 +48,23 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5580,
|
||||
"homeport_lng": -122.6360,
|
||||
"fallback_lat": 47.5580,
|
||||
"fallback_lng": -122.6360,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Decommissioning)",
|
||||
},
|
||||
# --- Norfolk, VA (Naval Station Norfolk) ---
|
||||
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
|
||||
"CVN-69": {
|
||||
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9465,
|
||||
"homeport_lng": -76.3265,
|
||||
"fallback_lat": 36.9465,
|
||||
"fallback_lng": -76.3265,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
},
|
||||
"CVN-78": {
|
||||
"name": "USS Gerald R. Ford (CVN-78)",
|
||||
@@ -107,6 +72,10 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9505,
|
||||
"homeport_lng": -76.3250,
|
||||
"fallback_lat": 18.0,
|
||||
"fallback_lng": 39.5,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
"CVN-74": {
|
||||
"name": "USS John C. Stennis (CVN-74)",
|
||||
@@ -114,6 +83,10 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9540,
|
||||
"homeport_lng": -76.3235,
|
||||
"fallback_lat": 36.98,
|
||||
"fallback_lng": -76.43,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
},
|
||||
"CVN-75": {
|
||||
"name": "USS Harry S. Truman (CVN-75)",
|
||||
@@ -121,6 +94,10 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9580,
|
||||
"homeport_lng": -76.3220,
|
||||
"fallback_lat": 36.0,
|
||||
"fallback_lng": 15.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
},
|
||||
"CVN-77": {
|
||||
"name": "USS George H.W. Bush (CVN-77)",
|
||||
@@ -128,14 +105,23 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9620,
|
||||
"homeport_lng": -76.3210,
|
||||
"fallback_lat": 36.5,
|
||||
"fallback_lng": -74.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
},
|
||||
# --- San Diego, CA (Naval Base San Diego) ---
|
||||
# Carrier piers along the east shore of San Diego Bay, spread N-S
|
||||
"CVN-70": {
|
||||
"name": "USS Carl Vinson (CVN-70)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6840,
|
||||
"homeport_lng": -117.1290,
|
||||
"fallback_lat": 32.6840,
|
||||
"fallback_lng": -117.1290,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Homeport)",
|
||||
},
|
||||
"CVN-71": {
|
||||
"name": "USS Theodore Roosevelt (CVN-71)",
|
||||
@@ -143,6 +129,10 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6885,
|
||||
"homeport_lng": -117.1280,
|
||||
"fallback_lat": 32.6885,
|
||||
"fallback_lng": -117.1280,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Maintenance)",
|
||||
},
|
||||
"CVN-72": {
|
||||
"name": "USS Abraham Lincoln (CVN-72)",
|
||||
@@ -150,6 +140,10 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6925,
|
||||
"homeport_lng": -117.1275,
|
||||
"fallback_lat": 20.0,
|
||||
"fallback_lng": 64.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
# --- Yokosuka, Japan (CFAY) ---
|
||||
"CVN-73": {
|
||||
@@ -158,18 +152,16 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Yokosuka, Japan",
|
||||
"homeport_lat": 35.2830,
|
||||
"homeport_lng": 139.6700,
|
||||
"fallback_lat": 35.2830,
|
||||
"fallback_lng": 139.6700,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "Yokosuka, Japan (Forward deployed)",
|
||||
},
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Region → approximate center coordinates.
|
||||
#
|
||||
# Issue #245 (tg12): converting a region name straight into precise
|
||||
# map coordinates is false precision. We still use this table to
|
||||
# infer a coarse position from a headline mention, but the resulting
|
||||
# carrier object is now stamped ``position_confidence = "approximate"``
|
||||
# so the UI can render an uncertainty radius / dimmed icon. The
|
||||
# centroid is a best-effort midpoint of the named body of water.
|
||||
# Region → approximate center coordinates
|
||||
# Used to map textual geographic descriptions to lat/lng
|
||||
# -----------------------------------------------------------------
|
||||
REGION_COORDS: Dict[str, tuple] = {
|
||||
# Oceans & Seas
|
||||
@@ -228,39 +220,9 @@ REGION_COORDS: Dict[str, tuple] = {
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Files
|
||||
# Cache file for persisting positions between restarts
|
||||
# -----------------------------------------------------------------
|
||||
#
|
||||
# The seed lives in the read-only image data dir (it ships with each
|
||||
# release). The cache lives in the same data dir but is written at
|
||||
# runtime; under Docker compose this dir is volume-mounted so the
|
||||
# cache persists across container restarts, which is the whole point
|
||||
# of the seed-then-observe model — the user's runtime observations
|
||||
# survive image upgrades.
|
||||
SEED_FILE = Path(__file__).parent.parent / "data" / "carrier_seed.json"
|
||||
CACHE_FILE = Path(__file__).parent.parent / "data" / "carrier_cache.json"
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Freshness window for position_confidence labeling. Issue #246 (tg12):
|
||||
# previously persisted cache entries had no freshness signal at all.
|
||||
# After this change, the position itself is preserved (we never lose
|
||||
# what was last observed) but the confidence label flips from
|
||||
# "recent" to "stale" once the underlying source is older than this
|
||||
# window. Operator-overridable via env var.
|
||||
# -----------------------------------------------------------------
|
||||
_DEFAULT_FRESHNESS_WINDOW_DAYS = 14
|
||||
|
||||
|
||||
def _freshness_window_days() -> int:
|
||||
raw = str(os.environ.get("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "") or "").strip()
|
||||
if not raw:
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
try:
|
||||
n = int(raw)
|
||||
return n if n > 0 else _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
|
||||
CACHE_FILE = Path(__file__).parent.parent / "carrier_cache.json"
|
||||
|
||||
_carrier_positions: Dict[str, dict] = {}
|
||||
_positions_lock = threading.Lock()
|
||||
@@ -272,159 +234,25 @@ _GDELT_REQUEST_DELAY_SECONDS = 1.25
|
||||
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _parse_iso(ts: str) -> Optional[datetime]:
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
# Python's fromisoformat accepts +00:00 but not 'Z' until 3.11.
|
||||
normalized = ts.replace("Z", "+00:00")
|
||||
dt = datetime.fromisoformat(normalized)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _compute_position_confidence(entry: dict, *, now: Optional[datetime] = None) -> str:
|
||||
"""Return the public confidence label for a carrier cache entry.
|
||||
|
||||
Order of precedence:
|
||||
- explicit "homeport_default" / "seed" labels are preserved.
|
||||
- dated entries (with position_source_at) are "recent" if within
|
||||
the configured freshness window, else "stale".
|
||||
- missing position_source_at falls through to "stale".
|
||||
"""
|
||||
raw_label = str(entry.get("position_confidence", "") or "").strip()
|
||||
# Explicit "kind of provenance" labels are preserved as-is. They
|
||||
# describe HOW we got the position, not WHEN — a fresh headline-to-
|
||||
# centroid match (#245) is still imprecise no matter how recently
|
||||
# it was observed, and the seed (#244) is always the seed.
|
||||
if raw_label in {"seed", "homeport_default", "approximate"}:
|
||||
# Approximate entries can still age into "stale_approximate" if
|
||||
# they fall out of the freshness window — that distinction lets
|
||||
# the UI render a different badge for old-and-imprecise vs
|
||||
# recent-and-imprecise. seed/homeport_default never age (they
|
||||
# were never timestamped against real observations).
|
||||
if raw_label == "approximate":
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if source_at is not None:
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
if reference - source_at > timedelta(days=_freshness_window_days()):
|
||||
return "stale_approximate"
|
||||
return raw_label
|
||||
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if not source_at:
|
||||
return "stale"
|
||||
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
window = timedelta(days=_freshness_window_days())
|
||||
if reference - source_at <= window:
|
||||
return "recent"
|
||||
return "stale"
|
||||
|
||||
|
||||
def _load_seed() -> Dict[str, dict]:
|
||||
"""Load the read-only seed file shipped with the image.
|
||||
|
||||
Returns a hull→entry dict (no _meta wrapper). Missing or malformed
|
||||
seed files yield an empty dict — the caller falls back to homeport
|
||||
defaults.
|
||||
"""
|
||||
try:
|
||||
if not SEED_FILE.exists():
|
||||
logger.info("Carrier seed file not present at %s; first-run will fall back to homeport defaults", SEED_FILE)
|
||||
return {}
|
||||
raw = json.loads(SEED_FILE.read_text(encoding="utf-8"))
|
||||
carriers = raw.get("carriers", {}) if isinstance(raw, dict) else {}
|
||||
if not isinstance(carriers, dict):
|
||||
return {}
|
||||
logger.info("Carrier seed loaded: %d entries from %s", len(carriers), SEED_FILE)
|
||||
return carriers
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning("Failed to load carrier seed file %s: %s", SEED_FILE, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _load_cache() -> Dict[str, dict]:
|
||||
"""Load the mutable cache (last-known positions persisted between restarts)."""
|
||||
"""Load cached carrier positions from disk."""
|
||||
try:
|
||||
if CACHE_FILE.exists():
|
||||
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
logger.info("Carrier cache loaded: %d carriers from %s", len(data), CACHE_FILE)
|
||||
return data
|
||||
data = json.loads(CACHE_FILE.read_text())
|
||||
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
|
||||
return data
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning("Failed to load carrier cache: %s", e)
|
||||
logger.warning(f"Failed to load carrier cache: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(positions: Dict[str, dict]) -> None:
|
||||
"""Persist the mutable cache. Atomic write (temp + rename) so a crash
|
||||
mid-write can't leave the file truncated."""
|
||||
def _save_cache(positions: Dict[str, dict]):
|
||||
"""Persist carrier positions to disk."""
|
||||
try:
|
||||
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = CACHE_FILE.with_suffix(CACHE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(positions, indent=2), encoding="utf-8")
|
||||
# On Windows os.replace is atomic and overwrites existing files.
|
||||
os.replace(tmp, CACHE_FILE)
|
||||
logger.info("Carrier cache saved: %d carriers", len(positions))
|
||||
CACHE_FILE.write_text(json.dumps(positions, indent=2))
|
||||
logger.info(f"Carrier cache saved: {len(positions)} carriers")
|
||||
except (IOError, OSError) as e:
|
||||
logger.warning("Failed to save carrier cache: %s", e)
|
||||
|
||||
|
||||
def _homeport_entry_for(hull: str) -> Optional[dict]:
|
||||
"""Return a homeport-default cache entry for a hull, or None if the
|
||||
hull is not in the registry."""
|
||||
info = CARRIER_REGISTRY.get(hull)
|
||||
if not info:
|
||||
return None
|
||||
return {
|
||||
"lat": info["homeport_lat"],
|
||||
"lng": info["homeport_lng"],
|
||||
"heading": 0,
|
||||
"desc": f"{info['homeport']} (no observations yet)",
|
||||
"source": f"Homeport default ({info['homeport']})",
|
||||
"source_url": info.get("wiki", ""),
|
||||
"position_source_at": _now_iso(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
|
||||
|
||||
def _bootstrap_cache_if_missing() -> Dict[str, dict]:
|
||||
"""One-shot: if no cache exists, materialize one from the seed file.
|
||||
|
||||
Returns the cache contents (hull→entry). On first-ever startup,
|
||||
this writes ``carrier_cache.json`` so subsequent restarts skip the
|
||||
seed entirely. Operator-deleted caches re-bootstrap the same way —
|
||||
operators can use that to "reset" carrier positions, but it's an
|
||||
explicit operator action.
|
||||
"""
|
||||
if CACHE_FILE.exists():
|
||||
return _load_cache()
|
||||
|
||||
seed = _load_seed()
|
||||
if not seed:
|
||||
# No seed file either. Build a homeport-default cache so the
|
||||
# first save_cache call still produces something honest.
|
||||
homeports: Dict[str, dict] = {}
|
||||
for hull in CARRIER_REGISTRY:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
homeports[hull] = entry
|
||||
if homeports:
|
||||
_save_cache(homeports)
|
||||
return homeports
|
||||
|
||||
# Persist the seed as the first cache so subsequent runs skip this branch.
|
||||
_save_cache(seed)
|
||||
logger.info("Carrier cache bootstrapped from seed (first-ever startup)")
|
||||
return dict(seed)
|
||||
logger.warning(f"Failed to save carrier cache: {e}")
|
||||
|
||||
|
||||
def _match_region(text: str) -> Optional[tuple]:
|
||||
@@ -442,8 +270,10 @@ def _match_carrier(text: str) -> Optional[str]:
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
hull_check = hull.lower().replace("-", "")
|
||||
name_parts = info["name"].lower()
|
||||
# Match hull number (e.g., "CVN-78", "CVN78")
|
||||
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
||||
return hull
|
||||
# Match ship name (e.g., "Ford", "Eisenhower", "Vinson")
|
||||
ship_name = name_parts.split("(")[0].strip()
|
||||
last_name = ship_name.split()[-1] if ship_name else ""
|
||||
if last_name and len(last_name) > 3 and last_name in text_lower:
|
||||
@@ -493,9 +323,8 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
articles = data.get("articles", [])
|
||||
for art in articles:
|
||||
title = art.get("title", "")
|
||||
article_url = art.get("url", "")
|
||||
article_at = art.get("seendate") or art.get("date") or ""
|
||||
results.append({"title": title, "url": article_url, "seendate": article_at})
|
||||
url = art.get("url", "")
|
||||
results.append({"title": title, "url": url})
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.debug(f"GDELT search failed for '{term}': {e}")
|
||||
continue
|
||||
@@ -511,175 +340,108 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
return results
|
||||
|
||||
|
||||
def _gdelt_seendate_to_iso(seendate: str) -> Optional[str]:
|
||||
"""GDELT returns YYYYMMDDhhmmss (UTC). Convert to ISO8601 for
|
||||
position_source_at. Returns None if the input is unparseable."""
|
||||
raw = (seendate or "").strip()
|
||||
if len(raw) < 8 or not raw.isdigit():
|
||||
return None
|
||||
try:
|
||||
dt = datetime.strptime(raw[:14] if len(raw) >= 14 else raw[:8] + "000000", "%Y%m%d%H%M%S")
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
||||
"""Parse carrier positions from news article titles.
|
||||
|
||||
Issue #245 (tg12): the position is a region centroid, which is
|
||||
coarse — we now stamp ``position_confidence = "approximate"`` so
|
||||
the UI can render that uncertainty. Issue #244: the
|
||||
``position_source_at`` field is the news article's actual seen
|
||||
date, NOT now(), so the freshness check correctly flips entries
|
||||
to "stale" once they age past the configured window.
|
||||
"""
|
||||
"""Parse carrier positions from news article titles and descriptions."""
|
||||
updates: Dict[str, dict] = {}
|
||||
|
||||
for article in articles:
|
||||
title = article.get("title", "")
|
||||
|
||||
# Try to match a carrier from the title
|
||||
hull = _match_carrier(title)
|
||||
if not hull:
|
||||
continue
|
||||
|
||||
# Try to match a region from the title
|
||||
coords = _match_region(title)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
# First match wins (most recent article, GDELT returns newest first
|
||||
# per term).
|
||||
# Only update if we haven't seen this carrier yet (first match wins — most recent)
|
||||
if hull not in updates:
|
||||
iso_at = _gdelt_seendate_to_iso(str(article.get("seendate", ""))) or _now_iso()
|
||||
updates[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": title[:100],
|
||||
"source": "GDELT News API (headline region match — approximate)",
|
||||
"source": "GDELT News API",
|
||||
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
||||
"position_source_at": iso_at,
|
||||
# Headline-to-centroid match is explicitly approximate.
|
||||
"position_confidence": "approximate",
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
logger.info(
|
||||
"Carrier update: %s → %s (from: %s)",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
coords,
|
||||
title[:80],
|
||||
f"Carrier update: {CARRIER_REGISTRY[hull]['name']} → {coords} (from: {title[:80]})"
|
||||
)
|
||||
|
||||
return updates
|
||||
|
||||
|
||||
def _enrich_for_rendering(hull: str, entry: dict, *, now: Optional[datetime] = None) -> dict:
|
||||
"""Add live computed fields (confidence label, last_osint_update)
|
||||
on top of the persisted cache entry. The persisted entry is left
|
||||
untouched; this function builds the public-facing object.
|
||||
"""
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
confidence = _compute_position_confidence(entry, now=now)
|
||||
return {
|
||||
"name": entry.get("name", info.get("name", hull)),
|
||||
"lat": entry["lat"],
|
||||
"lng": entry["lng"],
|
||||
"heading": entry.get("heading", 0),
|
||||
"desc": entry.get("desc", ""),
|
||||
"wiki": entry.get("wiki", info.get("wiki", "")),
|
||||
"source": entry.get("source", "OSINT estimated position"),
|
||||
"source_url": entry.get("source_url", ""),
|
||||
"position_source_at": entry.get("position_source_at", ""),
|
||||
"position_confidence": confidence,
|
||||
# Existing field preserved for backward compatibility with the
|
||||
# current frontend ShipPopup; now reflects the SOURCE's observed
|
||||
# time (not now()), so "last reported X days ago" is honest.
|
||||
"last_osint_update": entry.get("position_source_at", ""),
|
||||
# Convenience boolean for the UI: true when the position is
|
||||
# NOT live OSINT (used to render dimmed icons / badges).
|
||||
"is_fallback": confidence in {"seed", "stale", "stale_approximate", "homeport_default"},
|
||||
}
|
||||
def _load_carrier_fallbacks() -> Dict[str, dict]:
|
||||
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
|
||||
positions: Dict[str, dict] = {}
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
positions[hull] = {
|
||||
"name": info["name"],
|
||||
"lat": info["fallback_lat"],
|
||||
"lng": info["fallback_lng"],
|
||||
"heading": info["fallback_heading"],
|
||||
"desc": info["fallback_desc"],
|
||||
"wiki": info["wiki"],
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
# Overlay cached positions from previous runs (may have GDELT data)
|
||||
cached = _load_cache()
|
||||
for hull, cached_pos in cached.items():
|
||||
if hull in positions:
|
||||
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get(
|
||||
"source", ""
|
||||
).startswith("News"):
|
||||
positions[hull].update(
|
||||
{
|
||||
"lat": cached_pos["lat"],
|
||||
"lng": cached_pos["lng"],
|
||||
"desc": cached_pos.get("desc", positions[hull]["desc"]),
|
||||
"source": cached_pos.get("source", "Cached OSINT"),
|
||||
"updated": cached_pos.get("updated", ""),
|
||||
}
|
||||
)
|
||||
return positions
|
||||
|
||||
|
||||
def update_carrier_positions() -> None:
|
||||
"""Refresh carrier positions.
|
||||
def update_carrier_positions():
|
||||
"""Main update function — called on startup and every 12h.
|
||||
|
||||
Phase 1 (instant): publish whatever's in carrier_cache.json (or
|
||||
bootstrap from seed on first-ever run), so the map has carriers
|
||||
immediately.
|
||||
|
||||
Phase 2 (slow): query GDELT and replace position entries for any
|
||||
carrier mentioned in fresh news. Persist back to cache.
|
||||
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
|
||||
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
|
||||
"""
|
||||
global _last_update
|
||||
|
||||
# --- Phase 1: instant cache (bootstrap from seed on first-ever run) ---
|
||||
positions = _bootstrap_cache_if_missing()
|
||||
|
||||
# Ensure every registered hull has SOMETHING in the cache. A hull
|
||||
# the seed didn't cover (e.g. added after install) renders at its
|
||||
# homeport with "homeport_default" confidence.
|
||||
for hull in CARRIER_REGISTRY:
|
||||
if hull not in positions:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
positions[hull] = entry
|
||||
# --- Phase 1: instant fallback + cache ---
|
||||
positions = _load_carrier_fallbacks()
|
||||
|
||||
with _positions_lock:
|
||||
# Only overwrite if positions are currently empty (first startup).
|
||||
# If we already have data from a previous cycle, keep it while GDELT runs.
|
||||
if not _carrier_positions:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)"
|
||||
)
|
||||
|
||||
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||
#
|
||||
# USNI publishes a weekly editorial tracker with each carrier's
|
||||
# actual operating area, parsed from explicit prose like
|
||||
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
# These positions are tagged ``position_confidence: "recent"`` because
|
||||
# they reflect actual reporting, not headline-keyword centroids.
|
||||
# USNI updates are preferred over GDELT — they're authoritative on
|
||||
# US Navy positions where GDELT is just article-title text mining.
|
||||
try:
|
||||
from services.fetchers.usni_fleet_tracker import (
|
||||
fetch_latest_fleet_tracker_positions,
|
||||
)
|
||||
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||
for hull, pos in usni_positions.items():
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier USNI update: %s → %s",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
pos.get("desc", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||
|
||||
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||
#
|
||||
# Used only to backfill carriers USNI didn't mention this week. The
|
||||
# position is stamped ``approximate`` so the UI knows it's a
|
||||
# headline-centroid match (Issue #245).
|
||||
# --- Phase 2: slow GDELT enrichment ---
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
# Only overwrite if the existing entry is NOT a recent USNI
|
||||
# observation. A "recent" USNI position is higher-confidence
|
||||
# than a GDELT headline-centroid match — don't let GDELT
|
||||
# demote a real position to an approximate one.
|
||||
existing = positions.get(hull, {})
|
||||
existing_conf = _compute_position_confidence(existing)
|
||||
if existing_conf == "recent":
|
||||
continue
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier OSINT: updated %s from GDELT news",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
)
|
||||
if hull in positions:
|
||||
positions[hull].update(pos)
|
||||
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
logger.warning(f"GDELT carrier fetch failed: {e}")
|
||||
|
||||
# Save and update the global state with enriched positions
|
||||
with _positions_lock:
|
||||
_carrier_positions.clear()
|
||||
_carrier_positions.update(positions)
|
||||
@@ -687,15 +449,21 @@ def update_carrier_positions() -> None:
|
||||
|
||||
_save_cache(positions)
|
||||
|
||||
confidences: Dict[str, int] = {}
|
||||
for entry in positions.values():
|
||||
label = _compute_position_confidence(entry)
|
||||
confidences[label] = confidences.get(label, 0) + 1
|
||||
logger.info("Carrier tracker: %d carriers updated. Confidence: %s", len(positions), confidences)
|
||||
sources = {}
|
||||
for p in positions.values():
|
||||
src = p.get("source", "unknown")
|
||||
sources[src] = sources.get(src, 0) + 1
|
||||
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
|
||||
|
||||
|
||||
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
"""Offset carriers that share identical coordinates so they don't stack."""
|
||||
"""Offset carriers that share identical coordinates so they don't stack.
|
||||
|
||||
At port: offset along the pier axis (~500m / 0.004° apart).
|
||||
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
|
||||
so they're visibly separate but clearly operating together.
|
||||
"""
|
||||
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
|
||||
from collections import defaultdict
|
||||
|
||||
groups: dict[str, list[int]] = defaultdict(list)
|
||||
@@ -707,6 +475,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
if len(indices) < 2:
|
||||
continue
|
||||
n = len(indices)
|
||||
# Determine if this is a port (near a homeport) or at sea
|
||||
sample = result[indices[0]]
|
||||
at_port = any(
|
||||
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
||||
@@ -715,6 +484,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
)
|
||||
|
||||
if at_port:
|
||||
# Use each carrier's distinct homeport pier coordinates
|
||||
for idx in indices:
|
||||
carrier = result[idx]
|
||||
hull = None
|
||||
@@ -727,7 +497,8 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
carrier["lat"] = info["homeport_lat"]
|
||||
carrier["lng"] = info["homeport_lng"]
|
||||
else:
|
||||
spacing = 0.08
|
||||
# At sea: spread in a line perpendicular to travel (~0.08° apart)
|
||||
spacing = 0.08 # ~9km — close enough to see they're together
|
||||
start_offset = -(n - 1) * spacing / 2
|
||||
for j, idx in enumerate(indices):
|
||||
result[idx]["lng"] += start_offset + j * spacing
|
||||
@@ -736,44 +507,36 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
|
||||
|
||||
def get_carrier_positions() -> List[dict]:
|
||||
"""Return current carrier positions for the data pipeline.
|
||||
|
||||
Each entry has the full provenance + freshness fields; the UI can
|
||||
decide how to render them. Carriers are never hidden — only
|
||||
labeled.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
"""Return current carrier positions for the data pipeline."""
|
||||
with _positions_lock:
|
||||
result: List[dict] = []
|
||||
for hull, entry in _carrier_positions.items():
|
||||
enriched = _enrich_for_rendering(hull, entry, now=now)
|
||||
result = []
|
||||
for hull, pos in _carrier_positions.items():
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
result.append(
|
||||
{
|
||||
"name": enriched["name"],
|
||||
"name": pos.get("name", info.get("name", hull)),
|
||||
"type": "carrier",
|
||||
"lat": enriched["lat"],
|
||||
"lng": enriched["lng"],
|
||||
"heading": None, # OSINT cannot determine true heading.
|
||||
"lat": pos["lat"],
|
||||
"lng": pos["lng"],
|
||||
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
|
||||
"sog": 0,
|
||||
"cog": 0,
|
||||
"country": "United States",
|
||||
"desc": enriched["desc"],
|
||||
"wiki": enriched["wiki"],
|
||||
"desc": pos.get("desc", ""),
|
||||
"wiki": pos.get("wiki", info.get("wiki", "")),
|
||||
"estimated": True,
|
||||
"source": enriched["source"],
|
||||
"source_url": enriched["source_url"],
|
||||
"last_osint_update": enriched["last_osint_update"],
|
||||
# New fields (additive — existing UI continues to work):
|
||||
"position_source_at": enriched["position_source_at"],
|
||||
"position_confidence": enriched["position_confidence"],
|
||||
"is_fallback": enriched["is_fallback"],
|
||||
"source": pos.get("source", "OSINT estimated position"),
|
||||
"source_url": pos.get(
|
||||
"source_url", "https://news.usni.org/category/fleet-tracker"
|
||||
),
|
||||
"last_osint_update": pos.get("updated", ""),
|
||||
}
|
||||
)
|
||||
return _deconflict_positions(result)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily.
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily
|
||||
# -----------------------------------------------------------------
|
||||
_scheduler_thread: Optional[threading.Thread] = None
|
||||
_scheduler_stop = threading.Event()
|
||||
@@ -781,6 +544,7 @@ _scheduler_stop = threading.Event()
|
||||
|
||||
def _scheduler_loop():
|
||||
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
||||
# Initial update on startup
|
||||
try:
|
||||
update_carrier_positions()
|
||||
except Exception as e:
|
||||
@@ -788,6 +552,7 @@ def _scheduler_loop():
|
||||
|
||||
while not _scheduler_stop.is_set():
|
||||
now = datetime.now(timezone.utc)
|
||||
# Next target: 00:00 or 12:00 UTC, whichever is sooner
|
||||
hour = now.hour
|
||||
if hour < 12:
|
||||
next_hour = 12
|
||||
@@ -796,17 +561,18 @@ def _scheduler_loop():
|
||||
|
||||
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
||||
if next_hour == 24:
|
||||
from datetime import timedelta
|
||||
|
||||
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
wait_seconds = (next_run - now).total_seconds()
|
||||
logger.info(
|
||||
"Carrier tracker: next update at %s (%.1fh)",
|
||||
next_run.isoformat(),
|
||||
wait_seconds / 3600,
|
||||
f"Carrier tracker: next update at {next_run.isoformat()} ({wait_seconds/3600:.1f}h)"
|
||||
)
|
||||
|
||||
# Wait until next scheduled time, or until stop event
|
||||
if _scheduler_stop.wait(timeout=wait_seconds):
|
||||
break
|
||||
break # Stop event was set
|
||||
|
||||
try:
|
||||
update_carrier_positions()
|
||||
|
||||
@@ -1012,33 +1012,14 @@ def _extract_img_src(html_fragment: str):
|
||||
class MadridCityIngestor(BaseCCTVIngestor):
|
||||
"""Madrid City Hall traffic cameras from datos.madrid.es KML feed."""
|
||||
|
||||
KML_URL_HTTPS = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL_HTTP = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def _fetch_kml(self):
|
||||
"""Prefer HTTPS; fall back to legacy HTTP if the catalog is HTTP-only (#363)."""
|
||||
last_error: Exception | None = None
|
||||
for url in (self.KML_URL_HTTPS, self.KML_URL_HTTP):
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=20)
|
||||
response.raise_for_status()
|
||||
if url == self.KML_URL_HTTP:
|
||||
logger.warning(
|
||||
"MadridCityIngestor: HTTPS KML unavailable, using HTTP catalog feed"
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("MadridCityIngestor: KML fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError("Madrid KML fetch failed")
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = self._fetch_kml()
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
except Exception as e:
|
||||
logger.error(f"MadridCityIngestor: failed to fetch KML: {e}")
|
||||
return []
|
||||
|
||||
@@ -32,7 +32,6 @@ class Settings(BaseSettings):
|
||||
MESH_ARTI_ENABLED: bool = False
|
||||
MESH_ARTI_SOCKS_PORT: int = 9050
|
||||
MESH_RELAY_PEERS: str = ""
|
||||
MESH_PUBLIC_PEER_URL: str = ""
|
||||
# Bootstrap seeds are discovery hints, not authoritative network roots.
|
||||
# Nodes promote healthy discovered peers from the store/manifest over time.
|
||||
MESH_BOOTSTRAP_SEED_PEERS: str = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
@@ -54,12 +53,6 @@ class Settings(BaseSettings):
|
||||
MESH_RELAY_FAILURE_COOLDOWN_S: int = 120
|
||||
MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S: int = 15
|
||||
MESH_PEER_PUSH_SECRET: str = ""
|
||||
# Issue #256 (tg12): optional per-peer HMAC secret map. Comma-separated
|
||||
# `url=secret` pairs. When a peer URL appears here, only that per-peer
|
||||
# secret is accepted for it — the global MESH_PEER_PUSH_SECRET above is
|
||||
# ignored for that specific URL. Single-peer installs and unmigrated
|
||||
# multi-peer installs leave this empty and behavior is unchanged.
|
||||
MESH_PEER_SECRETS: str = ""
|
||||
MESH_RNS_APP_NAME: str = "shadowbroker"
|
||||
MESH_RNS_ASPECT: str = "infonet"
|
||||
MESH_RNS_IDENTITY_PATH: str = ""
|
||||
@@ -117,21 +110,6 @@ class Settings(BaseSettings):
|
||||
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
||||
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
||||
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
||||
# Anti-spam: cap on distinct UNACKED messages a single sender can have
|
||||
# parked in a single recipient's mailbox at any one time. Once the
|
||||
# recipient pulls (acks) a message, the sender's quota for that pair
|
||||
# frees up. Default 2 — a sender who wants to deliver more must wait
|
||||
# for the recipient to actually read the prior messages.
|
||||
#
|
||||
# This cap is enforced TWICE: once on the local deposit path (the
|
||||
# sender's own node refuses to spool the 3rd message) AND once on
|
||||
# the replication-acceptance path (honest peer relays refuse to
|
||||
# accept inbound replicas that would put them over the cap). The
|
||||
# double enforcement makes the rule a NETWORK rule — patching out
|
||||
# the local check on a hostile sender's relay doesn't let extras
|
||||
# propagate, because every honest peer enforces the same cap on
|
||||
# inbound replication.
|
||||
MESH_DM_PENDING_PER_SENDER_LIMIT: int = 2
|
||||
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
||||
@@ -311,19 +289,6 @@ class Settings(BaseSettings):
|
||||
# service operator can identify per-install traffic instead of a generic
|
||||
# "ShadowBroker" aggregate.
|
||||
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
||||
# Per-install operator handle used in the User-Agent for EVERY third-party
|
||||
# API the backend calls (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz,
|
||||
# Broadcastify, weather.gov, NUFORC, etc.). The default is empty, in which
|
||||
# case backend/services/network_utils.py auto-generates a stable
|
||||
# pseudonymous handle like "operator-7f3a92" on first use and caches it.
|
||||
# Operators who want to identify themselves with a real handle can set
|
||||
# this; operators who want to stay pseudonymous can leave it empty.
|
||||
#
|
||||
# The handle is sent ONLY to public third-party APIs. It is NEVER mixed
|
||||
# into mesh / Wormhole / Infonet identity (those have their own crypto
|
||||
# identity layer; conflating the two would leak public attribution into
|
||||
# private mesh state).
|
||||
OPERATOR_HANDLE: str = ""
|
||||
|
||||
# SAR (Synthetic Aperture Radar) data layer
|
||||
# Mode A — free catalog metadata, no account, default-on
|
||||
|
||||
@@ -11,13 +11,8 @@ DEFAULT_TRAIL_TTL_S = 300 # 5 min - trail TTL for non-tracked flights
|
||||
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
|
||||
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
|
||||
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
|
||||
# Tuned 2026-05: previously 0.30 / 5 aircraft which — combined with the
|
||||
# -1 noise cushion in the detector AND the pre-fix nac_p==0 filter that
|
||||
# discarded jamming victims — meant the layer almost never lit up.
|
||||
# Lowering the bar so genuine jamming zones with sparser ADS-B coverage
|
||||
# clear (eastern Med, Russia/Ukraine border, Iran/Iraq).
|
||||
GPS_JAMMING_MIN_RATIO = 0.20 # 20% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 3 # Min aircraft in grid cell for statistical significance
|
||||
GPS_JAMMING_MIN_RATIO = 0.30 # 30% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 5 # Min aircraft in grid cell for statistical significance
|
||||
|
||||
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
|
||||
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
|
||||
|
||||
@@ -19,7 +19,6 @@ import concurrent.futures
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
@@ -145,18 +144,13 @@ _STARTUP_HEAVY_REFRESH_DELAY_S = float(os.environ.get("SHADOWBROKER_STARTUP_HEAV
|
||||
_STARTUP_HEAVY_REFRESH_STARTED = False
|
||||
_STARTUP_HEAVY_REFRESH_LOCK = threading.Lock()
|
||||
_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_FETCH_WORKERS", "8"))
|
||||
_HEAVY_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_HEAVY_FETCH_WORKERS", "2"))
|
||||
_SLOW_FETCH_CONCURRENCY = int(os.environ.get("SHADOWBROKER_SLOW_FETCH_CONCURRENCY", "4"))
|
||||
_STARTUP_HEAVY_CONCURRENCY = int(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY", "2"))
|
||||
|
||||
# Fast-tier pool (flights, ships, sigint, …). Slow / heavy work uses a separate pool
|
||||
# so Playwright, GDELT, CCTV ingest, etc. cannot starve the 60s refresh path (#375).
|
||||
# Shared thread pool — reused across all fetch cycles instead of creating/destroying per tick
|
||||
_SHARED_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(2, _FETCH_WORKERS), thread_name_prefix="fetch"
|
||||
)
|
||||
_SLOW_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(1, _HEAVY_FETCH_WORKERS), thread_name_prefix="fetch-slow"
|
||||
)
|
||||
|
||||
|
||||
def _cache_json_safe(value):
|
||||
@@ -325,42 +319,10 @@ def seed_startup_caches() -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler & Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
def _executor_for_task_label(label: str) -> concurrent.futures.ThreadPoolExecutor:
|
||||
if label.startswith(("slow-tier", "startup-heavy")):
|
||||
return _SLOW_EXECUTOR
|
||||
return _SHARED_EXECUTOR
|
||||
|
||||
|
||||
def _run_task_with_health_on_executor(
|
||||
executor: concurrent.futures.ThreadPoolExecutor,
|
||||
func,
|
||||
name: str | None = None,
|
||||
) -> None:
|
||||
"""Run a scheduled job on the given pool so it cannot starve fast-tier workers."""
|
||||
task_name = name or getattr(func, "__name__", "task")
|
||||
future = executor.submit(func)
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
future.result(timeout=_TASK_HARD_TIMEOUT_S)
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_success
|
||||
|
||||
record_success(task_name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning("task slow: %s took %.2f}s", task_name, duration)
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(task_name, error=e, duration_s=duration)
|
||||
logger.exception("task failed: %s", task_name)
|
||||
|
||||
|
||||
def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
"""Run tasks concurrently and log any exceptions (do not fail silently)."""
|
||||
if not funcs:
|
||||
return
|
||||
executor = _executor_for_task_label(label)
|
||||
if max_concurrency is None:
|
||||
if label.startswith("slow-tier"):
|
||||
max_concurrency = _SLOW_FETCH_CONCURRENCY
|
||||
@@ -373,7 +335,7 @@ def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
remaining_funcs = list(funcs)
|
||||
while remaining_funcs:
|
||||
batch, remaining_funcs = remaining_funcs[:max_concurrency], remaining_funcs[max_concurrency:]
|
||||
futures = {executor.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
_drain_task_futures(label, futures)
|
||||
|
||||
|
||||
@@ -443,6 +405,7 @@ def update_slow_data():
|
||||
logger.info("Slow-tier data update starting...")
|
||||
slow_funcs = [
|
||||
fetch_news,
|
||||
fetch_prediction_markets,
|
||||
fetch_earthquakes,
|
||||
fetch_firms_fires,
|
||||
fetch_firms_country_fires,
|
||||
@@ -784,27 +747,6 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Prediction markets — own jittered cadence (Polymarket/Kalshi clearnet egress).
|
||||
# Kept off the fixed 5-minute slow tier so poll timing is less fingerprintable.
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
|
||||
_pm_interval_m = max(5, int(os.environ.get("PREDICTION_MARKETS_INTERVAL_MINUTES", "7")))
|
||||
_pm_jitter_s = max(0, int(os.environ.get("PREDICTION_MARKETS_SCHEDULER_JITTER_S", "240")))
|
||||
_pm_initial_max_s = max(0, int(os.environ.get("PREDICTION_MARKETS_INITIAL_DELAY_MAX_S", "180")))
|
||||
_pm_first_run = datetime.utcnow() + timedelta(
|
||||
seconds=random.randint(30, max(30, _pm_initial_max_s))
|
||||
)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_prediction_markets, "fetch_prediction_markets"),
|
||||
"interval",
|
||||
minutes=_pm_interval_m,
|
||||
jitter=_pm_jitter_s,
|
||||
next_run_time=_pm_first_run,
|
||||
id="prediction_markets",
|
||||
max_instances=1,
|
||||
misfire_grace_time=300,
|
||||
)
|
||||
|
||||
# Weather alerts — every 5 minutes (time-critical, separate from slow tier)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_weather_alerts, "fetch_weather_alerts"),
|
||||
@@ -835,39 +777,6 @@ def start_scheduler():
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# Flight observation pruning — drops icao24 → first_seen_at entries we
|
||||
# haven't seen in an hour. Same cadence as AIS prune for symmetry; the
|
||||
# per-tick scan is O(in-flight aircraft) so it's cheap.
|
||||
from services.fetchers.flight_observations import prune as _prune_flight_observations
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(_prune_flight_observations, "prune_flight_observations"),
|
||||
"interval",
|
||||
minutes=5,
|
||||
id="flight_observation_prune",
|
||||
max_instances=1,
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# AISHub REST fallback — slow polling when the AISStream WebSocket
|
||||
# primary is offline. Configurable interval via
|
||||
# AISHUB_POLL_INTERVAL_MINUTES env (default 20 min). Operator must
|
||||
# set AISHUB_USERNAME to opt in. The fetcher is gated internally on
|
||||
# the primary being disconnected, so this job is cheap when the
|
||||
# WebSocket is healthy (early-returns after a status check).
|
||||
from services.fetchers.aishub_fallback import (
|
||||
aishub_poll_interval_minutes,
|
||||
fetch_aishub_vessels,
|
||||
)
|
||||
_aishub_interval = aishub_poll_interval_minutes()
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_aishub_vessels, "fetch_aishub_vessels"),
|
||||
"interval",
|
||||
minutes=_aishub_interval,
|
||||
id="aishub_fallback",
|
||||
max_instances=1,
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
|
||||
# days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
|
||||
# and broken upstream). Airline schedules change on a quarterly cycle,
|
||||
@@ -902,7 +811,7 @@ def start_scheduler():
|
||||
|
||||
# GDELT — every 30 minutes (downloads 32 ZIP files per call, avoid rate limits)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health_on_executor(_SLOW_EXECUTOR, fetch_gdelt, "fetch_gdelt"),
|
||||
lambda: _run_task_with_health(fetch_gdelt, "fetch_gdelt"),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="gdelt",
|
||||
@@ -910,9 +819,7 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, update_liveuamap, "update_liveuamap"
|
||||
),
|
||||
lambda: _run_task_with_health(update_liveuamap, "update_liveuamap"),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="liveuamap",
|
||||
@@ -973,9 +880,7 @@ def start_scheduler():
|
||||
logger.warning(f"CCTV post-ingest refresh failed: {e}")
|
||||
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, _run_cctv_ingest_cycle, "cctv_ingest_cycle"
|
||||
),
|
||||
_run_cctv_ingest_cycle,
|
||||
"interval",
|
||||
minutes=10,
|
||||
id="cctv_ingest",
|
||||
@@ -1055,19 +960,16 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window;
|
||||
# each self-hosted install pulls live nuforc.org so operators see current
|
||||
# reports (typically ~400–500 mappable pins). Disk cache TTL defaults to 7d.
|
||||
# UAP sightings (NUFORC) — daily at 12:00 UTC
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(
|
||||
lambda: fetch_uap_sightings(force_refresh=True),
|
||||
"fetch_uap_sightings",
|
||||
),
|
||||
"cron",
|
||||
day_of_week="mon",
|
||||
hour=12,
|
||||
minute=0,
|
||||
id="uap_sightings_weekly",
|
||||
id="uap_sightings_daily",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
@@ -1192,10 +1094,7 @@ def start_scheduler():
|
||||
def stop_scheduler():
|
||||
if _scheduler:
|
||||
_scheduler.shutdown(wait=False)
|
||||
_SLOW_EXECUTOR.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
|
||||
def get_latest_data():
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
return get_latest_data_deepcopy_snapshot()
|
||||
return get_latest_data_subset(*latest_data.keys())
|
||||
|
||||
@@ -16,15 +16,8 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _feed_ingester_user_agent() -> str:
|
||||
# Round 7a: per-install attribution for operator-curated feed URLs.
|
||||
return outbound_user_agent("feed-ingester")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -164,7 +157,7 @@ def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
||||
resp = requests.get(
|
||||
feed_url,
|
||||
timeout=_FETCH_TIMEOUT,
|
||||
headers={"User-Agent": _feed_ingester_user_agent()},
|
||||
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -241,22 +241,16 @@ def get_active_layers_version() -> int:
|
||||
def get_latest_data_subset(*keys: str) -> DashboardData:
|
||||
"""Return a deep snapshot of only the requested top-level keys.
|
||||
|
||||
Grabs references under the lock, then deep-copies outside it so fetcher
|
||||
writers are not blocked for the duration of a large clone (#375).
|
||||
This avoids cloning the entire dashboard store for endpoints that only need
|
||||
a small tier-specific subset. Deep copy ensures callers cannot mutate
|
||||
nested structures (e.g. individual flight dicts) and affect the live store.
|
||||
"""
|
||||
with _data_lock:
|
||||
items = [(key, latest_data.get(key)) for key in keys]
|
||||
snap: DashboardData = {}
|
||||
for key, value in items:
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
|
||||
|
||||
def get_latest_data_deepcopy_snapshot() -> DashboardData:
|
||||
"""Deep-copy the full dashboard for legacy /api/live-data consumers."""
|
||||
with _data_lock:
|
||||
items = list(latest_data.items())
|
||||
return {key: copy.deepcopy(value) for key, value in items}
|
||||
snap: DashboardData = {}
|
||||
for key in keys:
|
||||
value = latest_data.get(key)
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
|
||||
|
||||
def get_latest_data_subset_refs(*keys: str) -> DashboardData:
|
||||
|
||||
@@ -21,13 +21,6 @@ from typing import Any
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _aircraft_db_user_agent() -> str:
|
||||
"""Round 7a: lazy import so the per-install operator handle is included."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("aircraft-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUCKET_LIST_URL = (
|
||||
@@ -38,6 +31,8 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_LIST_TIMEOUT_S = 30
|
||||
_DOWNLOAD_TIMEOUT_S = 600
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_aircraft_by_hex: dict[str, dict[str, str]] = {}
|
||||
_last_refresh = 0.0
|
||||
@@ -49,7 +44,7 @@ def _latest_snapshot_key() -> str:
|
||||
response = requests.get(
|
||||
_BUCKET_LIST_URL,
|
||||
timeout=_LIST_TIMEOUT_S,
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.text)
|
||||
@@ -76,7 +71,7 @@ def _stream_csv_index(url: str) -> dict[str, dict[str, str]]:
|
||||
url,
|
||||
timeout=_DOWNLOAD_TIMEOUT_S,
|
||||
stream=True,
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
line_iter = (
|
||||
|
||||
@@ -1,290 +0,0 @@
|
||||
"""AISHub REST fallback for ship tracking when AISStream is unreachable.
|
||||
|
||||
Background
|
||||
----------
|
||||
On 2026-05-23 ``stream.aisstream.io`` (the primary live AIS WebSocket feed)
|
||||
went fully offline. Backend's only ship signal vanished. This module polls
|
||||
``data.aishub.net``'s free REST API on a slow cadence (default 20 min) when
|
||||
the WebSocket primary is disconnected, so the ships layer doesn't go fully
|
||||
dark during upstream outages.
|
||||
|
||||
Why 20 minutes
|
||||
--------------
|
||||
AISHub's free tier is rate-limited and explicitly asks consumers to be
|
||||
courteous. 20 minutes is well inside their limits, gives ships time to
|
||||
move enough to look "alive" on the map, and won't drain their service.
|
||||
Configurable via the ``AISHUB_POLL_INTERVAL_MINUTES`` env var (clamped to
|
||||
[1, 360]).
|
||||
|
||||
Why slow vs primary
|
||||
-------------------
|
||||
This is degraded mode, not a replacement. A ship at 20 knots moves about
|
||||
6 nautical miles in 20 minutes — visible on the map but coarser than the
|
||||
real-time WebSocket signal. When AISStream comes back online, the
|
||||
WebSocket data will overwrite these records via the same ``_vessels``
|
||||
dict and ``source`` will flip from ``"aishub"`` back to upstream-live.
|
||||
|
||||
Opt-in
|
||||
------
|
||||
Operator must set ``AISHUB_USERNAME`` (free registration at
|
||||
https://www.aishub.net/api). If unset, this fetcher is a no-op.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
AISHUB_URL = "https://data.aishub.net/ws.php"
|
||||
|
||||
|
||||
def aishub_username() -> str:
|
||||
return str(os.environ.get("AISHUB_USERNAME", "")).strip()
|
||||
|
||||
|
||||
def aishub_fallback_enabled() -> bool:
|
||||
"""Returns True only when the operator has registered with AISHub and
|
||||
set ``AISHUB_USERNAME``. The presence of the username is the opt-in."""
|
||||
return bool(aishub_username())
|
||||
|
||||
|
||||
def aishub_poll_interval_minutes() -> int:
|
||||
"""Default 20 minutes. Clamped to [1, 360] so a hostile or
|
||||
misconfigured env var can't either hammer the upstream or silence the
|
||||
fallback for a day."""
|
||||
raw = os.environ.get("AISHUB_POLL_INTERVAL_MINUTES", "20")
|
||||
try:
|
||||
value = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
value = 20
|
||||
return max(1, min(360, value))
|
||||
|
||||
|
||||
def _should_run_fallback() -> bool:
|
||||
"""Only run when the primary WebSocket is disconnected. Avoids stomping
|
||||
over fresher live data when AISStream is healthy.
|
||||
|
||||
Returns False if:
|
||||
* AISHub isn't configured (no username)
|
||||
* AISStream primary is currently connected (recent vessel messages)
|
||||
|
||||
Returns True only when AIS is configured-but-down. The
|
||||
``proxy_spawn_count > 0`` guard means "the primary has at least tried
|
||||
to run" — if the user set AISHUB_USERNAME but not AIS_API_KEY at all,
|
||||
AISHub will still serve as a primary on its own slow cadence.
|
||||
"""
|
||||
if not aishub_fallback_enabled():
|
||||
return False
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
status = ais_proxy_status() or {}
|
||||
except Exception:
|
||||
return True # ais_stream not importable? still try AISHub.
|
||||
# If the WebSocket primary is connected, skip the fallback — fresher
|
||||
# data is already flowing.
|
||||
if status.get("connected") is True:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _parse_aishub_response(payload: str) -> list[dict]:
|
||||
"""Parse the AISHub JSON response into a list of vessel records.
|
||||
|
||||
Successful response shape::
|
||||
|
||||
[
|
||||
{"ERROR": false, "USERNAME": "...", "FORMAT": "1", "RECORDS": N},
|
||||
[{"MMSI": ..., "LATITUDE": ..., "LONGITUDE": ..., ...}, ...]
|
||||
]
|
||||
|
||||
Error response shape::
|
||||
|
||||
[{"ERROR": true, "ERROR_MESSAGE": "..."}]
|
||||
|
||||
Empty payload (e.g. silent rate-limit drop) returns ``[]``.
|
||||
"""
|
||||
if not payload or not payload.strip():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("AISHub: response is not JSON: %s", e)
|
||||
return []
|
||||
if not isinstance(data, list) or not data:
|
||||
return []
|
||||
header = data[0] if isinstance(data[0], dict) else {}
|
||||
if header.get("ERROR") is True:
|
||||
logger.warning(
|
||||
"AISHub: upstream error: %s",
|
||||
header.get("ERROR_MESSAGE", "<unspecified>"),
|
||||
)
|
||||
return []
|
||||
if len(data) < 2 or not isinstance(data[1], list):
|
||||
return []
|
||||
return [row for row in data[1] if isinstance(row, dict)]
|
||||
|
||||
|
||||
def _normalize_record(row: dict) -> dict | None:
|
||||
"""Map an AISHub vessel record to our internal vessel schema.
|
||||
|
||||
Returns None when the record can't be used (no MMSI, bad position,
|
||||
sentinel "not available" lat/lng).
|
||||
"""
|
||||
try:
|
||||
mmsi = int(row.get("MMSI") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not mmsi:
|
||||
return None
|
||||
try:
|
||||
lat = float(row.get("LATITUDE"))
|
||||
lng = float(row.get("LONGITUDE"))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
# AIS uses 91/181 as "no position available" sentinels.
|
||||
if abs(lat) > 90 or abs(lng) > 180:
|
||||
return None
|
||||
if lat == 91.0 or lng == 181.0:
|
||||
return None
|
||||
# SOG raw 102.3 is "speed not available"; sanitize to 0.
|
||||
try:
|
||||
sog_raw = float(row.get("SOG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
sog_raw = 0.0
|
||||
sog = 0.0 if sog_raw >= 102.2 else sog_raw
|
||||
try:
|
||||
cog = float(row.get("COG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
cog = 0.0
|
||||
try:
|
||||
heading_raw = int(row.get("HEADING") or 511)
|
||||
except (TypeError, ValueError):
|
||||
heading_raw = 511
|
||||
# AIS heading sentinel 511 = "not available" — fall back to COG.
|
||||
heading = heading_raw if heading_raw != 511 else cog
|
||||
try:
|
||||
ais_type = int(row.get("TYPE") or 0)
|
||||
except (TypeError, ValueError):
|
||||
ais_type = 0
|
||||
return {
|
||||
"mmsi": mmsi,
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"sog": sog,
|
||||
"cog": cog,
|
||||
"heading": heading,
|
||||
"name": str(row.get("NAME") or "").strip() or "UNKNOWN",
|
||||
"callsign": str(row.get("CALLSIGN") or "").strip(),
|
||||
"destination": str(row.get("DEST") or "").strip().replace("@", "") or "",
|
||||
"imo": int(row.get("IMO") or 0),
|
||||
"ais_type_code": ais_type,
|
||||
}
|
||||
|
||||
|
||||
def fetch_aishub_vessels() -> int:
|
||||
"""Poll AISHub and merge vessels into the shared ``_vessels`` store.
|
||||
|
||||
Returns the number of vessels updated (0 on skip, error, or no data).
|
||||
Designed to be called by the APScheduler tier — see
|
||||
``data_fetcher.py`` for the 20-minute interval job that wraps this.
|
||||
"""
|
||||
if not _should_run_fallback():
|
||||
logger.debug("AISHub fallback skipped: primary connected or not configured")
|
||||
return 0
|
||||
|
||||
username = aishub_username()
|
||||
url = (
|
||||
f"{AISHUB_URL}?username={username}&format=1&output=json"
|
||||
f"&compress=0"
|
||||
)
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=30)
|
||||
except Exception as e:
|
||||
logger.warning("AISHub fetch failed: %s", e)
|
||||
return 0
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
logger.warning(
|
||||
"AISHub HTTP %s",
|
||||
getattr(response, "status_code", "None"),
|
||||
)
|
||||
return 0
|
||||
|
||||
rows = _parse_aishub_response(getattr(response, "text", "") or "")
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
# Inline imports to avoid a circular dependency at module load time
|
||||
# (ais_stream imports lots of things and is loaded by main.py).
|
||||
from services.ais_stream import (
|
||||
_vessels,
|
||||
_vessels_lock,
|
||||
_record_vessel_trail_locked,
|
||||
classify_vessel,
|
||||
get_country_from_mmsi,
|
||||
)
|
||||
|
||||
now = time.time()
|
||||
count = 0
|
||||
with _vessels_lock:
|
||||
for row in rows:
|
||||
normalized = _normalize_record(row)
|
||||
if normalized is None:
|
||||
continue
|
||||
mmsi = normalized["mmsi"]
|
||||
vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
|
||||
# Don't overwrite fresher live data: if the WebSocket pushed an
|
||||
# update for this MMSI more recently than now-1s (race during
|
||||
# the brief reconnection window) keep the live one.
|
||||
last = float(vessel.get("_updated") or 0)
|
||||
if last > now - 1:
|
||||
continue
|
||||
vessel.update(
|
||||
{
|
||||
"lat": normalized["lat"],
|
||||
"lng": normalized["lng"],
|
||||
"sog": normalized["sog"],
|
||||
"cog": normalized["cog"],
|
||||
"heading": normalized["heading"],
|
||||
"_updated": now,
|
||||
"source": "aishub",
|
||||
}
|
||||
)
|
||||
if normalized["name"] and normalized["name"] != "UNKNOWN":
|
||||
vessel["name"] = normalized["name"]
|
||||
if normalized["callsign"]:
|
||||
vessel["callsign"] = normalized["callsign"]
|
||||
if normalized["destination"]:
|
||||
vessel["destination"] = normalized["destination"]
|
||||
if normalized["imo"]:
|
||||
vessel["imo"] = normalized["imo"]
|
||||
if normalized["ais_type_code"]:
|
||||
vessel["ais_type_code"] = normalized["ais_type_code"]
|
||||
vessel["type"] = classify_vessel(normalized["ais_type_code"], mmsi)
|
||||
if not vessel.get("country"):
|
||||
vessel["country"] = get_country_from_mmsi(mmsi)
|
||||
_record_vessel_trail_locked(
|
||||
mmsi,
|
||||
normalized["lat"],
|
||||
normalized["lng"],
|
||||
normalized["sog"],
|
||||
now,
|
||||
)
|
||||
count += 1
|
||||
|
||||
if count:
|
||||
logger.info(
|
||||
"AISHub fallback: merged %d vessels (poll interval %d min)",
|
||||
count,
|
||||
aishub_poll_interval_minutes(),
|
||||
)
|
||||
return count
|
||||
@@ -15,11 +15,7 @@ import time
|
||||
import heapq
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from services.network_utils import (
|
||||
external_curl_fallback_enabled,
|
||||
fetch_with_curl,
|
||||
outbound_user_agent,
|
||||
)
|
||||
from services.network_utils import external_curl_fallback_enabled, fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.nuforc_enrichment import enrich_sighting
|
||||
from services.fetchers.retry import with_retry
|
||||
@@ -283,13 +279,13 @@ def fetch_weather_alerts():
|
||||
return
|
||||
alerts = []
|
||||
try:
|
||||
# weather.gov requires a User-Agent per their API policy. Round 7a:
|
||||
# send the per-install operator handle so they can rate-limit per
|
||||
# operator instead of treating "Shadowbroker" as one entity.
|
||||
from services.network_utils import outbound_user_agent
|
||||
# weather.gov requires a User-Agent per their API policy, but it
|
||||
# need not identify the operator. Use a project-generic string and
|
||||
# let the user override via SHADOWBROKER_USER_AGENT if needed.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||
headers = {
|
||||
"User-Agent": outbound_user_agent("weather-gov"),
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Accept": "application/geo+json",
|
||||
}
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
@@ -692,8 +688,7 @@ _NUFORC_TILESET = "nuforc.cmm18aqea06bu1mmselhpnano-0ce5v"
|
||||
_NUFORC_TOKEN = os.environ.get("NUFORC_MAPBOX_TOKEN", "").strip()
|
||||
_NUFORC_RADIUS_M = 200_000 # 200 km query radius
|
||||
_NUFORC_LIMIT = 50 # max features per tilequery call
|
||||
# Rolling window shown on the map (~2 calendar months). Override via NUFORC_RECENT_DAYS.
|
||||
_NUFORC_RECENT_DAYS = max(1, int(os.environ.get("NUFORC_RECENT_DAYS", "60")))
|
||||
_NUFORC_RECENT_DAYS = int(os.environ.get("NUFORC_RECENT_DAYS", "60"))
|
||||
_NUFORC_HF_FALLBACK_LIMIT = max(25, int(os.environ.get("NUFORC_HF_FALLBACK_LIMIT", "250")))
|
||||
_NUFORC_HF_GEOCODE_LIMIT = max(25, int(os.environ.get("NUFORC_HF_GEOCODE_LIMIT", "150")))
|
||||
_NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1")))
|
||||
@@ -701,12 +696,6 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1
|
||||
# practice, so a 0.3s spacing keeps us well under any soft throttle while
|
||||
# still rebuilding a full 12-month window in ~10 minutes.
|
||||
_NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3"))
|
||||
# Disk cache TTL — match the weekly scheduler so restarts between fetches still
|
||||
# serve the same rolling 60-day snapshot without hammering nuforc.org daily.
|
||||
_NUFORC_CACHE_TTL_S = max(
|
||||
3600,
|
||||
int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600,
|
||||
)
|
||||
_NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
||||
_NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json"
|
||||
_NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json"
|
||||
@@ -724,12 +713,7 @@ _NUFORC_LIVE_NONCE_RE = re.compile(
|
||||
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
||||
)
|
||||
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
||||
# Round 7a: NUFORC's site is sensitive to non-browser UAs but we send a
|
||||
# per-install operator handle prefixed by Mozilla/5.0 so we're identifiable
|
||||
# without being aggregately blocked. Operators who want stricter privacy
|
||||
# can override the entire UA via SHADOWBROKER_USER_AGENT.
|
||||
def _nuforc_live_user_agent() -> str:
|
||||
return f"Mozilla/5.0 ({outbound_user_agent('nuforc-live')})"
|
||||
_NUFORC_LIVE_USER_AGENT = "Mozilla/5.0 (ShadowBroker-OSINT NUFORC-fetcher)"
|
||||
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
||||
|
||||
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
||||
@@ -773,35 +757,6 @@ def _fetch_nuforc_tilequery(lng: float, lat: float) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def _uap_cutoff_date_str() -> str:
|
||||
return (datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _uap_sighting_date_str(sighting: dict) -> str | None:
|
||||
"""Normalize a sighting row to YYYY-MM-DD for window filtering."""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
raw = str(sighting.get("date_time") or sighting.get("occurred") or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
parsed = _parse_date(raw)
|
||||
if parsed:
|
||||
return parsed
|
||||
if len(raw) >= 10 and raw[4] == "-" and raw[7] == "-":
|
||||
return raw[:10]
|
||||
return None
|
||||
|
||||
|
||||
def _filter_uap_sightings_recent(sightings: list[dict]) -> list[dict]:
|
||||
"""Drop anything outside the rolling NUFORC_RECENT_DAYS window."""
|
||||
cutoff = _uap_cutoff_date_str()
|
||||
return [
|
||||
sighting
|
||||
for sighting in sightings
|
||||
if (_uap_sighting_date_str(sighting) or "") >= cutoff
|
||||
]
|
||||
|
||||
|
||||
def _parse_nuforc_tile_date(value: str) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
@@ -838,41 +793,19 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] |
|
||||
built_dt = datetime.fromisoformat(built) if built else None
|
||||
if built_dt is None:
|
||||
return None
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S:
|
||||
return None
|
||||
if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS:
|
||||
logger.info(
|
||||
"UAP sightings: cache cutoff_days mismatch (%s != %s); rebuilding",
|
||||
raw.get("cutoff_days"),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > 86400:
|
||||
return None
|
||||
sightings = raw.get("sightings")
|
||||
if isinstance(sightings, list):
|
||||
if len(sightings) <= 0:
|
||||
logger.info("UAP sightings: cache is fresh but empty; rebuilding")
|
||||
return None
|
||||
filtered = _filter_uap_sightings_recent(sightings)
|
||||
if not filtered:
|
||||
logger.warning(
|
||||
"UAP sightings: cache had %d rows but none within last %d days; rebuilding",
|
||||
len(sightings),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return None
|
||||
if len(filtered) < len(sightings):
|
||||
logger.info(
|
||||
"UAP sightings: dropped %d stale cached rows outside %d-day window",
|
||||
len(sightings) - len(filtered),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
logger.info(
|
||||
"UAP sightings: loaded %d cached reports from %s (within %d-day window)",
|
||||
len(filtered),
|
||||
"UAP sightings: loaded %d cached reports from %s",
|
||||
len(sightings),
|
||||
built,
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return filtered
|
||||
return sightings
|
||||
except Exception as e:
|
||||
logger.warning("UAP sightings: cache load error: %s", e)
|
||||
return None
|
||||
@@ -886,7 +819,6 @@ def _save_nuforc_sightings_cache(sightings: list[dict]) -> None:
|
||||
_NUFORC_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"built": datetime.utcnow().isoformat(),
|
||||
"cutoff_days": _NUFORC_RECENT_DAYS,
|
||||
"count": len(sightings),
|
||||
"sightings": sightings,
|
||||
}
|
||||
@@ -1025,7 +957,7 @@ def _photon_lookup(query: str) -> list[float] | None:
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": outbound_user_agent("nuforc-uap-geocode"),
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (NUFORC-UAP-layer)",
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=10,
|
||||
@@ -1094,10 +1026,97 @@ def _nuforc_months_for_window(days: int) -> list[str]:
|
||||
return months
|
||||
|
||||
|
||||
def _parse_nuforc_live_datatables_rows(raw_rows: list) -> list[dict]:
|
||||
"""Parse wpDataTables ``data`` array into normalized row dicts."""
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via the live wpDataTables AJAX.
|
||||
|
||||
Returns a list of raw row dicts with the fields we care about:
|
||||
id, occurred (YYYY-MM-DD), posted (YYYY-MM-DD), city, state, country,
|
||||
shape_raw, summary, explanation. Empty list on any failure — caller
|
||||
decides whether a failure is fatal.
|
||||
"""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
if not external_curl_fallback_enabled():
|
||||
logger.warning(
|
||||
"NUFORC live: external curl disabled on Windows for %s; "
|
||||
"set SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=1 to opt in.",
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.returncode != 0 or not index_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: index fetch exit=%s for %s", index_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.stdout)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live: wdtNonce not found on index page for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
|
||||
# Step 2: POST to admin-ajax.php with length=-1 to pull the whole month.
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
"-H", f"Referer: {index_url}",
|
||||
"-H", "X-Requested-With: XMLHttpRequest",
|
||||
"-H", "Content-Type: application/x-www-form-urlencoded",
|
||||
"--data", post_data,
|
||||
ajax_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: ajax fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.returncode != 0 or not ajax_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: ajax fetch exit=%s for %s", ajax_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = json.loads(ajax_res.stdout)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live: ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
|
||||
raw_rows = payload.get("data") or []
|
||||
out: list[dict] = []
|
||||
for raw in raw_rows:
|
||||
if not isinstance(raw, list) or len(raw) < 8:
|
||||
@@ -1146,166 +1165,16 @@ def _parse_nuforc_live_datatables_rows(raw_rows: list) -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_requests(yyyymm: str) -> list[dict]:
|
||||
"""Live NUFORC month fetch via requests (Windows-safe when curl is disabled)."""
|
||||
import requests
|
||||
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
headers = {"User-Agent": _nuforc_live_user_agent()}
|
||||
session = requests.Session()
|
||||
session.headers.update(headers)
|
||||
try:
|
||||
index_res = session.get(index_url, timeout=60)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.status_code != 200 or not index_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live (requests): index HTTP %s for %s",
|
||||
index_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.text)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live (requests): wdtNonce not found for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = session.post(
|
||||
ajax_url,
|
||||
data=post_data,
|
||||
headers={
|
||||
**headers,
|
||||
"Referer": index_url,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): ajax failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.status_code != 200 or not ajax_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live (requests): ajax HTTP %s for %s",
|
||||
ajax_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = ajax_res.json()
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live (requests): ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_curl(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via curl + wpDataTables AJAX."""
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.returncode != 0 or not index_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: index fetch exit=%s for %s", index_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.stdout)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live: wdtNonce not found on index page for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
|
||||
# Step 2: POST to admin-ajax.php with length=-1 to pull the whole month.
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
"-H", f"Referer: {index_url}",
|
||||
"-H", "X-Requested-With: XMLHttpRequest",
|
||||
"-H", "Content-Type: application/x-www-form-urlencoded",
|
||||
"--data", post_data,
|
||||
ajax_url,
|
||||
],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
encoding="utf-8", errors="replace",
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("NUFORC live: ajax fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.returncode != 0 or not ajax_res.stdout:
|
||||
logger.warning(
|
||||
"NUFORC live: ajax fetch exit=%s for %s", ajax_res.returncode, yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = json.loads(ajax_res.stdout)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live: ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via live wpDataTables AJAX."""
|
||||
if external_curl_fallback_enabled():
|
||||
rows = _nuforc_fetch_month_live_curl(yyyymm, cookie_jar)
|
||||
if rows:
|
||||
return rows
|
||||
return _nuforc_fetch_month_live_requests(yyyymm)
|
||||
|
||||
|
||||
def _build_recent_uap_sightings() -> list[dict]:
|
||||
"""Build the rolling UAP sightings layer from live NUFORC data.
|
||||
"""Build the rolling 1-year UAP sightings layer from live NUFORC data.
|
||||
|
||||
Hits nuforc.org's public sub-index once per month in the window, drops
|
||||
anything outside the exact day-precision cutoff, dedupes by sighting id,
|
||||
geocodes city+state via the existing location cache, and returns rows
|
||||
keyed to the same schema the frontend already renders.
|
||||
"""
|
||||
cutoff_str = _uap_cutoff_date_str()
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
months = _nuforc_months_for_window(_NUFORC_RECENT_DAYS)
|
||||
|
||||
try:
|
||||
@@ -1505,21 +1374,10 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
This is a resilience fallback for local/Windows runs where nuforc.org is
|
||||
Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
|
||||
as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
|
||||
|
||||
Date-cutoff guard: the kcimc/NUFORC HF dataset is a static snapshot whose
|
||||
maintainer refreshes it sporadically. Without a cutoff, sorting by
|
||||
occurred-desc and taking the top N rows returns whatever the mirror's
|
||||
newest rows happen to be — which can be years old if the snapshot is
|
||||
stale. We apply the same ``_NUFORC_RECENT_DAYS`` window the live path
|
||||
uses (60 days). If the HF mirror has nothing inside the window we return
|
||||
``[]`` rather than silently serving 3-year-old "newest" rows.
|
||||
"""
|
||||
from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
|
||||
from services.geocode_validate import coord_in_country
|
||||
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
|
||||
if not response or response.status_code != 200:
|
||||
@@ -1533,7 +1391,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
return []
|
||||
|
||||
candidates: list[dict] = []
|
||||
stale_rows_dropped = 0
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(response.text))
|
||||
for row in reader:
|
||||
@@ -1544,9 +1401,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
)
|
||||
if not occurred:
|
||||
continue
|
||||
if occurred < cutoff_str:
|
||||
stale_rows_dropped += 1
|
||||
continue
|
||||
raw_location = _normalize_uap_location(
|
||||
row.get("Location", "")
|
||||
or row.get("City", "")
|
||||
@@ -1581,19 +1435,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
logger.warning("UAP sightings: HF fallback parse failed: %s", e)
|
||||
return []
|
||||
|
||||
if not candidates:
|
||||
# HF mirror returned rows, but none inside the rolling window. This is
|
||||
# the smoking gun for "the public HF dataset hasn't been refreshed in
|
||||
# years" — log loudly so the operator sees it instead of guessing.
|
||||
logger.error(
|
||||
"UAP sightings: HF fallback yielded 0 rows within last %d days "
|
||||
"(dropped %d stale rows). HF mirror is likely stale; the layer "
|
||||
"will be empty until the live NUFORC path recovers.",
|
||||
_NUFORC_RECENT_DAYS,
|
||||
stale_rows_dropped,
|
||||
)
|
||||
return []
|
||||
|
||||
candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
|
||||
candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]
|
||||
|
||||
@@ -1652,12 +1493,11 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
"""Fetch rolling-window UAP sightings from live NUFORC.
|
||||
"""Fetch last-year UAP sightings from NUFORC.
|
||||
|
||||
Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS
|
||||
(default 168h / one week). The weekly scheduler forces a rebuild so every
|
||||
install refreshes the same ~60-day layer without daily load on nuforc.org.
|
||||
Operators can also POST /api/refresh (admin) to pull immediately.
|
||||
Startup reads the cached daily snapshot when it is still fresh. The daily
|
||||
scheduler forces a rebuild so this layer updates once per day instead of
|
||||
churning continuously.
|
||||
"""
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
@@ -1666,32 +1506,13 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
|
||||
sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
|
||||
if sightings is None:
|
||||
live_error: Exception | None = None
|
||||
try:
|
||||
sightings = _build_recent_uap_sightings()
|
||||
except Exception as e:
|
||||
live_error = e
|
||||
logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
|
||||
sightings = _build_uap_sightings_from_hf_mirror()
|
||||
if sightings:
|
||||
_save_nuforc_sightings_cache(sightings)
|
||||
elif live_error is not None:
|
||||
# Both paths failed: live raised AND HF fallback returned empty
|
||||
# (either the HF mirror is stale beyond the cutoff or the network
|
||||
# is gone entirely). The previous code silently set the layer to
|
||||
# ``[]`` and kept marking it fresh; that masked the failure for
|
||||
# days. Surface it via assert_canary so the health registry shows
|
||||
# the layer as broken instead of "fresh and empty".
|
||||
from services.slo import assert_canary
|
||||
assert_canary("uap_sightings", 0)
|
||||
logger.error(
|
||||
"UAP sightings: both live NUFORC and HF fallback produced 0 "
|
||||
"rows; layer is unavailable. Live error: %s",
|
||||
live_error,
|
||||
)
|
||||
|
||||
if sightings:
|
||||
sightings = _filter_uap_sightings_recent(sightings)
|
||||
|
||||
with _data_lock:
|
||||
latest_data["uap_sightings"] = sightings or []
|
||||
@@ -1699,7 +1520,6 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
_mark_fresh("uap_sightings")
|
||||
return
|
||||
|
||||
# Unreachable legacy Mapbox tilequery path (kept for reference).
|
||||
cutoff = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
|
||||
# Query the grid concurrently (up to 8 threads)
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
"""Per-aircraft observation tracking for cumulative fuel/CO2 estimates.
|
||||
|
||||
Background
|
||||
----------
|
||||
The pre-existing emissions enrichment attached a *rate* to each flight
|
||||
(GPH and kg/hr) based on aircraft model. Users — reasonably — wanted the
|
||||
running total: how much fuel HAS this plane burned since we started
|
||||
seeing it? Multiplying the rate by elapsed observation time gets us
|
||||
there, but it requires somewhere to remember "when did this icao24
|
||||
first appear on our radar?"
|
||||
|
||||
Why this lives outside ``flight_trails``
|
||||
----------------------------------------
|
||||
``flight_trails`` is sized and pruned aggressively for map rendering
|
||||
(5-minute TTL for untracked aircraft, 200 trail points max). That's
|
||||
wrong for cumulative burn: if a plane has been airborne 2 hours but
|
||||
its trail was pruned 30 min in, the "first trail point" timestamp is
|
||||
30 min ago, not 2h ago. Worse, when the trail expires and re-creates,
|
||||
the cumulative counter would reset mid-flight.
|
||||
|
||||
This module tracks observation lifecycle separately:
|
||||
|
||||
* When a hex is first observed: start a new flight session.
|
||||
* While observed regularly (gap < ``REOPEN_GAP_S``): keep accumulating.
|
||||
* When unseen for longer than ``REOPEN_GAP_S``: treat next sighting as
|
||||
a new session (the plane landed and took off again, or it's a
|
||||
different leg). Reset ``first_seen_at``.
|
||||
* Stale sessions are pruned every ``PRUNE_INTERVAL_S`` so memory stays
|
||||
bounded.
|
||||
|
||||
The user explicitly asked for this counting semantic: "as soon as a
|
||||
plane appears there should be a counter that keeps a running count of
|
||||
the fuel being burned... If there is no estimate take off time then it
|
||||
can just be from the time the server starts to keep a log of whats in
|
||||
the air."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
# Gap between sightings that resets the session. ADS-B refreshes the
|
||||
# whole aircraft list every minute or two, so anything over a few
|
||||
# minutes means the plane left our coverage window (landed, transit
|
||||
# through dead zone, etc). 15 minutes is conservative.
|
||||
REOPEN_GAP_S = 15 * 60
|
||||
|
||||
# Don't accumulate runaway memory: drop entries unseen for an hour.
|
||||
PRUNE_AFTER_S = 60 * 60
|
||||
|
||||
# Cap on accumulated airtime per session so a single bug elsewhere
|
||||
# (e.g. ts clock skew) can't produce comically large numbers.
|
||||
MAX_SESSION_SECONDS = 24 * 3600 # 24h — longest realistic civilian leg
|
||||
|
||||
|
||||
_observations: dict[str, dict[str, float]] = {}
|
||||
_lock = threading.Lock()
|
||||
_last_prune_at = 0.0
|
||||
|
||||
|
||||
def record_observation(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Record a sighting of ``icao_hex`` and return airtime so far (seconds).
|
||||
|
||||
Returns 0 for the first-ever sighting (no elapsed time yet) or when
|
||||
``icao_hex`` is falsy. The caller can multiply the returned seconds
|
||||
by ``rate_per_hour / 3600`` to get cumulative consumption.
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
if not key:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
# Use explicit ``is None`` checks instead of ``or`` short-circuit:
|
||||
# ``0.0`` is a legitimate timestamp value (e.g. test fixtures
|
||||
# seeding a far-past first_seen_at to exercise the clamp) but
|
||||
# ``0.0 or fallback`` collapses to ``fallback`` because 0.0 is
|
||||
# falsy. Bit me on my own test — leaving the safer form here.
|
||||
last_raw = entry.get("last_seen_at")
|
||||
last_seen = float(last_raw) if last_raw is not None else current
|
||||
gap = current - last_seen
|
||||
if gap > REOPEN_GAP_S:
|
||||
# Treat as a new flight session — the plane landed/disappeared
|
||||
# long enough that the prior cumulative count is no longer
|
||||
# the same flight.
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
# Clamp absurd values from clock skew or bad input.
|
||||
elapsed = max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
entry["last_seen_at"] = current
|
||||
return elapsed
|
||||
|
||||
|
||||
def prune(*, now: float | None = None) -> int:
|
||||
"""Drop entries we haven't seen in ``PRUNE_AFTER_S`` seconds.
|
||||
|
||||
Returns number of entries dropped. Safe to call from a scheduler tick;
|
||||
cheap (single dict scan) so cadence doesn't matter much.
|
||||
"""
|
||||
current = float(now if now is not None else time.time())
|
||||
dropped = 0
|
||||
with _lock:
|
||||
stale_keys = []
|
||||
for k, v in _observations.items():
|
||||
last_raw = v.get("last_seen_at")
|
||||
last = float(last_raw) if last_raw is not None else 0.0
|
||||
if current - last > PRUNE_AFTER_S:
|
||||
stale_keys.append(k)
|
||||
for k in stale_keys:
|
||||
del _observations[k]
|
||||
dropped += 1
|
||||
return dropped
|
||||
|
||||
|
||||
def get_session_seconds(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Read-only accessor: airtime for a known icao without bumping last-seen.
|
||||
|
||||
Used by tests and external consumers (e.g. when rendering a snapshot
|
||||
of all in-flight aircraft, you want the current value, not to update
|
||||
last_seen_at as a side effect).
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
return max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Drop all observations. Test helper only."""
|
||||
with _lock:
|
||||
_observations.clear()
|
||||
@@ -17,7 +17,6 @@ from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.retry import with_retry
|
||||
from services.fetchers.route_database import lookup_route
|
||||
from services.fetchers.aircraft_database import lookup_aircraft_type
|
||||
@@ -30,88 +29,6 @@ _RE_AIRLINE_CODE_1 = re.compile(r"^([A-Z]{3})\d")
|
||||
_RE_AIRLINE_CODE_2 = re.compile(r"^([A-Z]{3})[A-Z\d]")
|
||||
|
||||
|
||||
def detect_gps_jamming_zones(
|
||||
raw_flights: list[dict],
|
||||
*,
|
||||
min_aircraft: int | None = None,
|
||||
min_ratio: float | None = None,
|
||||
nacp_threshold: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Detect GPS interference zones from a snapshot of raw ADS-B aircraft.
|
||||
|
||||
Methodology mirrors GPSJam.org / Flightradar24: bin aircraft into 1°x1°
|
||||
grid cells, flag cells where the fraction of aircraft reporting degraded
|
||||
NACp clears a threshold.
|
||||
|
||||
Inputs
|
||||
------
|
||||
raw_flights:
|
||||
Iterable of dicts. Each item is expected to carry ``lat``, ``lng``
|
||||
(or ``lon``), and ``nac_p``. Records missing position OR missing
|
||||
``nac_p`` entirely (typical for OpenSky-sourced flights) are
|
||||
skipped — absence-of-data isn't evidence of anything.
|
||||
|
||||
nac_p == 0 IS counted as degraded. Pre-fix code skipped it on the theory
|
||||
that "0 = old transponder, never computed accuracy." That's only half
|
||||
right: modern Mode-S Enhanced Surveillance transponders also fall back
|
||||
to nac_p=0 when they lose GPS lock entirely — which is exactly the
|
||||
jamming signature we're trying to detect. Filtering 0 out was discarding
|
||||
the strongest evidence.
|
||||
|
||||
Denoising:
|
||||
1. Require ``min_aircraft`` per grid cell for statistical validity.
|
||||
2. Subtract 1 from degraded count per cell (GPSJam's technique) so
|
||||
a single quirky transponder can't flag an entire zone.
|
||||
3. Require ratio ``adjusted_degraded / total > min_ratio``.
|
||||
|
||||
All thresholds default to the module-level constants but can be
|
||||
overridden for testing.
|
||||
"""
|
||||
min_aircraft = GPS_JAMMING_MIN_AIRCRAFT if min_aircraft is None else int(min_aircraft)
|
||||
min_ratio = GPS_JAMMING_MIN_RATIO if min_ratio is None else float(min_ratio)
|
||||
nacp_threshold = (
|
||||
GPS_JAMMING_NACP_THRESHOLD if nacp_threshold is None else int(nacp_threshold)
|
||||
)
|
||||
|
||||
jamming_grid: dict[str, dict[str, int]] = {}
|
||||
for rf in raw_flights or []:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") if rf.get("lng") is not None else rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
cell = jamming_grid.setdefault(grid_key, {"degraded": 0, "total": 0})
|
||||
cell["total"] += 1
|
||||
if nacp < nacp_threshold:
|
||||
cell["degraded"] += 1
|
||||
|
||||
jamming_zones: list[dict] = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < min_aircraft:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > min_ratio:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
return jamming_zones
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenSky Network API Client (OAuth2)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -542,18 +459,6 @@ def _classify_and_publish(all_adsb_flights):
|
||||
|
||||
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
||||
|
||||
# Source attribution: prefer the explicit ``source`` tag stamped
|
||||
# at fetch time (adsb.lol, OpenSky). If absent, fall back to the
|
||||
# legacy ``supplemental_source`` (airplanes.live, adsb.fi) so
|
||||
# supplementals are still attributed without changing their
|
||||
# tagger. Final fallback "adsb.lol" preserves prior behavior for
|
||||
# any caller that synthesizes records without going through one
|
||||
# of our fetchers (e.g. tests).
|
||||
source = (
|
||||
f.get("source")
|
||||
or f.get("supplemental_source")
|
||||
or "adsb.lol"
|
||||
)
|
||||
flights.append(
|
||||
{
|
||||
"callsign": flight_str,
|
||||
@@ -575,7 +480,6 @@ def _classify_and_publish(all_adsb_flights):
|
||||
"airline_code": airline_code,
|
||||
"aircraft_category": ac_category,
|
||||
"nac_p": f.get("nac_p"),
|
||||
"source": source,
|
||||
}
|
||||
)
|
||||
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
||||
@@ -602,22 +506,6 @@ def _classify_and_publish(all_adsb_flights):
|
||||
if model:
|
||||
emi = get_emissions_info(model)
|
||||
if emi:
|
||||
# Cumulative fuel/CO2: multiply the per-hour rate by how
|
||||
# long we've been observing this airframe. Users want to
|
||||
# see the *amount* burned, not just the rate. If we've
|
||||
# never seen this hex before, observed_seconds is 0 and
|
||||
# the cumulative values are 0 until the next refresh —
|
||||
# the rate is still useful info on its own.
|
||||
observed_seconds = _record_flight_observation(
|
||||
f.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emi = {
|
||||
**emi,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emi["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emi["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
f["emissions"] = emi
|
||||
|
||||
callsign = f.get("callsign", "").strip().upper()
|
||||
@@ -836,8 +724,56 @@ def _classify_and_publish(all_adsb_flights):
|
||||
latest_data["military_flights"] = military_snapshot
|
||||
|
||||
# --- GPS Jamming Detection ---
|
||||
# Uses NACp (Navigation Accuracy Category – Position) from ADS-B to infer
|
||||
# GPS interference zones, similar to GPSJam.org / Flightradar24.
|
||||
# NACp < 8 = position accuracy worse than the FAA-mandated 0.05 NM.
|
||||
#
|
||||
# Denoising (to suppress false positives from old GA transponders):
|
||||
# 1. Skip nac_p == 0 ("unknown accuracy") — old transponders that never
|
||||
# computed accuracy, NOT evidence of jamming. Real jamming shows 1-7.
|
||||
# 2. Require minimum aircraft per grid cell for statistical validity.
|
||||
# 3. Subtract 1 from degraded count per cell (GPSJam's technique) so a
|
||||
# single quirky transponder can't flag an entire zone.
|
||||
# 4. Require the adjusted ratio to exceed the threshold.
|
||||
try:
|
||||
jamming_zones = detect_gps_jamming_zones(raw_flights_snapshot)
|
||||
jamming_grid = {}
|
||||
raw_flights = raw_flights_snapshot
|
||||
for rf in raw_flights:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") or rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None or nacp == 0:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
if grid_key not in jamming_grid:
|
||||
jamming_grid[grid_key] = {"degraded": 0, "total": 0}
|
||||
jamming_grid[grid_key]["total"] += 1
|
||||
if nacp < GPS_JAMMING_NACP_THRESHOLD:
|
||||
jamming_grid[grid_key]["degraded"] += 1
|
||||
|
||||
jamming_zones = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < GPS_JAMMING_MIN_AIRCRAFT:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > GPS_JAMMING_MIN_RATIO:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
with _data_lock:
|
||||
latest_data["gps_jamming"] = jamming_zones
|
||||
if jamming_zones:
|
||||
@@ -913,15 +849,7 @@ def _fetch_adsb_lol_regions():
|
||||
res = fetch_with_curl(url, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
aircraft = data.get("ac", [])
|
||||
# Stamp the source at the fetch site so attribution survives
|
||||
# the OpenSky/supplemental dedupe-by-hex merge downstream.
|
||||
# Previously adsb.lol records carried no marker while OpenSky
|
||||
# records got ``is_opensky: True`` — which made flight tooltips
|
||||
# look like everything came from OpenSky.
|
||||
for a in aircraft:
|
||||
a["source"] = "adsb.lol"
|
||||
return aircraft
|
||||
return data.get("ac", [])
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
@@ -1004,7 +932,6 @@ def _enrich_with_opensky_and_supplemental(adsb_flights):
|
||||
"gs": (s[9] * 1.94384) if s[9] else 0,
|
||||
"t": "Unknown",
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
)
|
||||
elif os_res.status_code == 429:
|
||||
|
||||
@@ -20,9 +20,17 @@ def _env_flag(name: str) -> str:
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
from services.liveuamap_settings import liveuamap_scraper_enabled as _enabled
|
||||
"""Return whether the Playwright-based LiveUAMap scraper should run.
|
||||
|
||||
return _enabled()
|
||||
It is useful enrichment, but it starts a browser/Node driver and must not be
|
||||
allowed to destabilize Windows local startup.
|
||||
"""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return os.name != "nt"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -202,17 +210,10 @@ def update_liveuamap():
|
||||
if not is_any_active("global_incidents"):
|
||||
return
|
||||
if not liveuamap_scraper_enabled():
|
||||
from services.liveuamap_settings import liveuamap_requires_ui_opt_in
|
||||
|
||||
if liveuamap_requires_ui_opt_in():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled: enable Global Incidents in the UI to "
|
||||
"consent, or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled; set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled for this runtime; set "
|
||||
"SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
return
|
||||
logger.info("Running scheduled Liveuamap scraper...")
|
||||
try:
|
||||
|
||||
@@ -6,7 +6,7 @@ import heapq
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
|
||||
@@ -29,7 +29,7 @@ def _geocode_region(region_name: str, country_name: str) -> tuple:
|
||||
|
||||
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
||||
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": outbound_user_agent("infrastructure-data")})
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
if results:
|
||||
|
||||
@@ -188,16 +188,11 @@ def fetch_meshtastic_nodes():
|
||||
callsign = ""
|
||||
|
||||
send_callsign_header = str(
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")
|
||||
).strip().lower() in {"1", "true", "yes", "on"}
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||
|
||||
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||
# The optional Meshtastic callsign is appended as additional context so
|
||||
# meshtastic.liamcottle.net's operator can identify both the install AND
|
||||
# the registered radio operator (when MESHTASTIC_OPERATOR_CALLSIGN is set
|
||||
# and MESHTASTIC_SEND_CALLSIGN_HEADER is true; see issue #203).
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua_base = f"{outbound_user_agent('meshtastic-map')}; 24h polling"
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
|
||||
if callsign and send_callsign_header:
|
||||
user_agent = f"{ua_base}; node={callsign}"
|
||||
else:
|
||||
|
||||
@@ -7,7 +7,6 @@ import requests
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert
|
||||
|
||||
logger = logging.getLogger("services.data_fetcher")
|
||||
@@ -172,7 +171,6 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "adsb.lol"
|
||||
all_mil_ac.append(a)
|
||||
except Exception as e:
|
||||
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
||||
@@ -184,7 +182,6 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "airplanes.live"
|
||||
all_mil_ac.append(a)
|
||||
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
||||
except Exception as e:
|
||||
@@ -237,7 +234,6 @@ def fetch_military_flights():
|
||||
"registration": f.get("r", "N/A"),
|
||||
"icao24": icao_hex,
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -262,8 +258,7 @@ def fetch_military_flights():
|
||||
"model": f.get("t", "Unknown"),
|
||||
"icao24": icao_hex,
|
||||
"speed_knots": speed_knots,
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
"squawk": f.get("squawk", "")
|
||||
})
|
||||
except Exception as loop_e:
|
||||
logger.error(f"Mil flight interpolation error: {loop_e}")
|
||||
@@ -301,18 +296,6 @@ def fetch_military_flights():
|
||||
if model:
|
||||
emissions = get_emissions_info(model)
|
||||
if emissions:
|
||||
# Cumulative fuel/CO2 since first observation — mirrors
|
||||
# the civilian path in flights._classify_and_publish.
|
||||
observed_seconds = _record_flight_observation(
|
||||
mf.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emissions = {
|
||||
**emissions,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emissions["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emissions["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
mf["emissions"] = emissions
|
||||
if mf.get("alert_category"):
|
||||
mf["type"] = "tracked_flight"
|
||||
|
||||
@@ -9,7 +9,6 @@ import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from urllib.parse import urlencode
|
||||
@@ -22,34 +21,23 @@ _prev_probabilities: dict[str, float] = {}
|
||||
_market_cache = TTLCache(maxsize=1, ttl=300)
|
||||
_POLYMARKET_PAGE_DELAY_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_S", "0.02"))
|
||||
_KALSHI_PAGE_DELAY_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_S", "0.08"))
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_JITTER_S", "0.08"))
|
||||
_KALSHI_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_JITTER_S", "0.2"))
|
||||
# Random delay before each full Polymarket+Kalshi cycle (decorrelates from other slow-tier jobs).
|
||||
_PRE_FETCH_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PRE_FETCH_JITTER_S", "90"))
|
||||
# Random pause between finishing Polymarket pagination and starting Kalshi.
|
||||
_PROVIDER_GAP_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S", "45"))
|
||||
_provider_pace_lock = threading.Lock()
|
||||
_provider_last_request_at: dict[str, float] = {}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""Return True when UI opt-in or PREDICTION_MARKETS_ENABLED enables pulls."""
|
||||
from services.prediction_markets_settings import prediction_markets_fetch_enabled as _enabled
|
||||
|
||||
return _enabled()
|
||||
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
|
||||
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
if min_interval_s <= 0:
|
||||
return
|
||||
jitter_s = (
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S
|
||||
if provider == "polymarket"
|
||||
else _KALSHI_PAGE_DELAY_JITTER_S
|
||||
if provider == "kalshi"
|
||||
else 0.0
|
||||
)
|
||||
min_interval_s += random.uniform(0.0, jitter_s) if jitter_s > 0 else 0.0
|
||||
with _provider_pace_lock:
|
||||
now = time.monotonic()
|
||||
wait_s = min_interval_s - (now - _provider_last_request_at.get(provider, 0.0))
|
||||
@@ -59,24 +47,6 @@ def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
_provider_last_request_at[provider] = now
|
||||
|
||||
|
||||
def _apply_pre_fetch_jitter() -> None:
|
||||
if _PRE_FETCH_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PRE_FETCH_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: pre-fetch jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _apply_provider_gap_jitter() -> None:
|
||||
if _PROVIDER_GAP_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PROVIDER_GAP_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: provider gap jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _finite_or_none(value):
|
||||
try:
|
||||
n = float(value)
|
||||
@@ -780,9 +750,7 @@ def _merge_markets(poly_events: list[dict], kalshi_events: list[dict]) -> list[d
|
||||
@cached(_market_cache)
|
||||
def fetch_prediction_markets_raw() -> list[dict]:
|
||||
"""Fetch and merge prediction markets from both sources. Cached 5 min."""
|
||||
_apply_pre_fetch_jitter()
|
||||
poly = _fetch_polymarket_events()
|
||||
_apply_provider_gap_jitter()
|
||||
kalshi = _fetch_kalshi_events()
|
||||
merged = _merge_markets(poly, kalshi)
|
||||
logger.info(
|
||||
|
||||
@@ -17,12 +17,6 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _route_db_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("route-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
||||
@@ -30,6 +24,8 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_HTTP_TIMEOUT_S = 60
|
||||
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
||||
_airports_by_icao: dict[str, dict[str, Any]] = {}
|
||||
@@ -41,7 +37,7 @@ def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=_HTTP_TIMEOUT_S,
|
||||
headers={"User-Agent": _route_db_user_agent(), "Accept-Encoding": "gzip"},
|
||||
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
text = gzip.decompress(response.content).decode("utf-8-sig")
|
||||
|
||||
@@ -10,12 +10,6 @@ from datetime import datetime, timezone
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _trains_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("trains")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EARTH_RADIUS_KM = 6371.0
|
||||
@@ -385,7 +379,7 @@ def _fetch_digitraffic() -> list[dict]:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept-Encoding": "gzip",
|
||||
"User-Agent": _trains_user_agent(),
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
|
||||
@@ -1,457 +0,0 @@
|
||||
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||
position publication.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||
- low-precision (audit issue #245 — false precision from text mentions),
|
||||
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||
certain network paths, including Docker Desktop on some Windows hosts).
|
||||
|
||||
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||
carrier is operating. The article body uses extremely consistent phrasing:
|
||||
|
||||
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||
Yokosuka, Japan."
|
||||
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||
|
||||
Those are deterministic to parse. This module:
|
||||
|
||||
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||
site-wide feed often has fresher posts before the category feed
|
||||
catches up, so we union them.
|
||||
2. Picks the most recent post by parsed ``pubDate``.
|
||||
3. For each carrier in the registry, scans the article body for a
|
||||
"is operating in / is in port in / departed from" pattern near
|
||||
the carrier's name.
|
||||
4. Maps the extracted region phrase to coordinates via the carrier
|
||||
tracker's existing REGION_COORDS.
|
||||
|
||||
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||
with ``position_source_at`` set to the article's actual publication
|
||||
timestamp (not ``now()``).
|
||||
|
||||
Politeness
|
||||
----------
|
||||
We send the per-install operator handle via ``outbound_user_agent``
|
||||
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||
but WordPress RSS feeds are open and serve the full article in
|
||||
``<content:encoded>`` — that's the supported path for aggregators and
|
||||
the one we use. We do not spoof browser headers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Iterable
|
||||
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RSS_URLS: tuple[str, ...] = (
|
||||
# Site-wide feed often has the freshest posts before the category
|
||||
# feed catches up. We try this first.
|
||||
"https://news.usni.org/feed",
|
||||
# Category feed has older fleet trackers for backfill.
|
||||
"https://news.usni.org/category/fleet-tracker/feed",
|
||||
)
|
||||
|
||||
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||
|
||||
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||
)
|
||||
|
||||
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
def _request_headers() -> dict[str, str]:
|
||||
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||
|
||||
The ``Referer`` is the category index page — that's where a real
|
||||
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||
falls back to HTML. No browser UA spoofing.
|
||||
"""
|
||||
return {
|
||||
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||
}
|
||||
|
||||
|
||||
def _parse_pubdate(raw: str) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||
|
||||
De-duplicates by article link. Returns a list of dicts:
|
||||
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||
"""
|
||||
items_by_link: dict[str, dict] = {}
|
||||
for url in rss_urls:
|
||||
try:
|
||||
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||
except Exception as exc:
|
||||
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||
continue
|
||||
if not r or r.status_code != 200 or not r.text:
|
||||
logger.debug(
|
||||
"USNI RSS %s returned status=%s body=%d",
|
||||
url,
|
||||
getattr(r, "status_code", "?"),
|
||||
len(getattr(r, "text", "") or ""),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(r.text)
|
||||
except ET.ParseError as exc:
|
||||
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||
continue
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||
continue
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link or link in items_by_link:
|
||||
continue
|
||||
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||
body_html = (
|
||||
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||
or item.findtext("description", default="")
|
||||
or ""
|
||||
)
|
||||
items_by_link[link] = {
|
||||
"title": title,
|
||||
"link": link,
|
||||
"pub_date": pub_dt,
|
||||
"body": _strip_html(body_html),
|
||||
}
|
||||
return list(items_by_link.values())
|
||||
|
||||
|
||||
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||
# The carrier_tracker table already covers most named bodies of water and
|
||||
# major ports — we just need to teach this module to RECOGNIZE the
|
||||
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||
# the same body of water differently.
|
||||
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||
("eastern mediterranean", "eastern mediterranean"),
|
||||
("western mediterranean", "western mediterranean"),
|
||||
("mediterranean sea", "mediterranean"),
|
||||
("the mediterranean", "mediterranean"),
|
||||
("red sea", "red sea"),
|
||||
("arabian sea area of responsibility", "arabian sea"),
|
||||
("north arabian sea", "north arabian sea"),
|
||||
("arabian sea", "arabian sea"),
|
||||
("persian gulf", "persian gulf"),
|
||||
("gulf of oman", "gulf of oman"),
|
||||
("strait of hormuz", "strait of hormuz"),
|
||||
("south china sea", "south china sea"),
|
||||
("east china sea", "east china sea"),
|
||||
("philippine sea", "philippine sea"),
|
||||
("sea of japan", "sea of japan"),
|
||||
("taiwan strait", "taiwan strait"),
|
||||
("western pacific", "western pacific"),
|
||||
("pacific ocean", "pacific"),
|
||||
("indian ocean", "indian ocean"),
|
||||
("north atlantic", "north atlantic"),
|
||||
("western atlantic", "atlantic"),
|
||||
("eastern atlantic", "atlantic"),
|
||||
("atlantic ocean", "atlantic"),
|
||||
("gulf of aden", "gulf of aden"),
|
||||
("horn of africa", "horn of africa"),
|
||||
("bab el-mandeb", "bab el-mandeb"),
|
||||
("suez canal", "suez canal"),
|
||||
("baltic sea", "baltic sea"),
|
||||
("north sea", "north sea"),
|
||||
("black sea", "black sea"),
|
||||
("south atlantic", "south atlantic"),
|
||||
("coral sea", "coral sea"),
|
||||
("gulf of mexico", "gulf of mexico"),
|
||||
("caribbean sea", "caribbean"),
|
||||
("caribbean", "caribbean"),
|
||||
# Specific ports
|
||||
("naval station norfolk", "norfolk"),
|
||||
("norfolk naval shipyard", "newport news"),
|
||||
("newport news shipbuilding", "newport news"),
|
||||
("newport news", "newport news"),
|
||||
# USNI tags Norfolk mentions with state suffix; match both.
|
||||
("norfolk, va", "norfolk"),
|
||||
("norfolk", "norfolk"),
|
||||
("naval station everett", "puget sound"),
|
||||
("naval base kitsap", "bremerton"),
|
||||
("bremerton", "bremerton"),
|
||||
("puget sound", "puget sound"),
|
||||
("naval base san diego", "san diego"),
|
||||
("san diego, calif", "san diego"),
|
||||
("san diego", "san diego"),
|
||||
("yokosuka, japan", "yokosuka"),
|
||||
("yokosuka", "yokosuka"),
|
||||
("pearl harbor", "pearl harbor"),
|
||||
("apra harbor, guam", "guam"),
|
||||
("guam", "guam"),
|
||||
("bahrain", "bahrain"),
|
||||
("naval station rota", "rota"),
|
||||
("rota, spain", "rota"),
|
||||
("naples, italy", "naples"),
|
||||
# Fleets / AORs
|
||||
("5th fleet", "5th fleet"),
|
||||
("6th fleet", "6th fleet"),
|
||||
("7th fleet", "7th fleet"),
|
||||
("3rd fleet", "3rd fleet"),
|
||||
("2nd fleet", "2nd fleet"),
|
||||
("centcom", "centcom"),
|
||||
("indo-pacific command", "indopacom"),
|
||||
("eucom", "eucom"),
|
||||
("southcom", "southcom"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||
or ``None`` if we don't recognize it.
|
||||
|
||||
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||
``display`` is the phrase we'll show in the dossier description.
|
||||
"""
|
||||
p = (phrase or "").lower().strip()
|
||||
if not p:
|
||||
return None
|
||||
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||
if usni_phrase in p:
|
||||
return canonical, usni_phrase
|
||||
return None
|
||||
|
||||
|
||||
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||
# phrase that immediately follows. Each pattern is designed to swallow
|
||||
# the optional editorial filler that often appears between verb and
|
||||
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||
# filler; "Norfolk" is the location).
|
||||
#
|
||||
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||
# wins over the generic "is".
|
||||
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||
|
||||
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is conducting <stuff> in [the] {REGION}"
|
||||
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port in {LOCATION}"
|
||||
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||
# → not captured here; falls through to homeport
|
||||
# "is underway in [the] {REGION}"
|
||||
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is deployed to [the] {REGION}" / "deployed in"
|
||||
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "arrived [Day] in/at {LOCATION}"
|
||||
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "departed [Day] from {LOCATION}"
|
||||
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is homeported at {LOCATION}"
|
||||
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def _extract_region_for_carrier(
|
||||
body: str,
|
||||
carrier_names: list[str],
|
||||
hull_code: str,
|
||||
) -> str | None:
|
||||
"""Return the best-guess region phrase for one carrier from the
|
||||
article body, or None if no confident match.
|
||||
|
||||
Algorithm:
|
||||
1. Find every mention of the carrier (any name variant or the hull
|
||||
code) in the body.
|
||||
2. For each mention, look in the ~300-char window AFTER it for any
|
||||
of the operating-verb patterns.
|
||||
3. Return the first hit. If a more-confident match later turns up
|
||||
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||
first one in document order still wins — USNI's structure puts
|
||||
the position-update sentence near the top of each carrier's
|
||||
section, and the homeport mention later.
|
||||
"""
|
||||
# Build a master mention regex covering every name variant + the hull.
|
||||
candidates: list[str] = []
|
||||
for name in carrier_names:
|
||||
if name and len(name) >= 4:
|
||||
candidates.append(re.escape(name))
|
||||
if hull_code:
|
||||
candidates.append(re.escape(hull_code))
|
||||
if not candidates:
|
||||
return None
|
||||
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||
|
||||
window_chars = 320
|
||||
seen_phrases: list[str] = []
|
||||
for mention in mention_re.finditer(body):
|
||||
end = mention.end()
|
||||
window = body[end : end + window_chars]
|
||||
# Cut window at the next sentence break for tighter context.
|
||||
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||
if sent_break:
|
||||
window = window[: sent_break.start() + 1]
|
||||
# Try patterns in priority order.
|
||||
for pat in _OPERATING_PATTERNS:
|
||||
m = pat.search(window)
|
||||
if not m:
|
||||
continue
|
||||
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||
if not phrase:
|
||||
continue
|
||||
# Strip trailing editorial filler — USNI often writes
|
||||
# "Norfolk, Va., according to ship spotters" or
|
||||
# "Yokosuka, Japan, according to..."
|
||||
phrase = re.split(
|
||||
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||
phrase,
|
||||
maxsplit=1,
|
||||
)[0].strip()
|
||||
seen_phrases.append(phrase)
|
||||
return phrase
|
||||
return seen_phrases[0] if seen_phrases else None
|
||||
|
||||
|
||||
def fetch_latest_fleet_tracker_positions(
|
||||
carrier_registry: dict | None = None,
|
||||
region_coords: dict | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||
|
||||
Entries look like::
|
||||
|
||||
{
|
||||
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||
"desc": "Red Sea (USNI May 18, 2026)",
|
||||
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||
mention) are simply absent from the result — the caller keeps
|
||||
whatever position they had before.
|
||||
|
||||
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||
module's own tables; passed in here for testability.
|
||||
"""
|
||||
if carrier_registry is None or region_coords is None:
|
||||
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||
region_coords = region_coords or REGION_COORDS
|
||||
|
||||
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||
if not items:
|
||||
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||
return {}
|
||||
|
||||
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||
# date fall to the back of the list.
|
||||
items.sort(
|
||||
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||
reverse=True,
|
||||
)
|
||||
latest = items[0]
|
||||
|
||||
pub_dt: datetime | None = latest["pub_date"]
|
||||
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||
|
||||
body = latest["body"]
|
||||
if not body:
|
||||
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||
return {}
|
||||
|
||||
positions: dict[str, dict] = {}
|
||||
for hull, info in carrier_registry.items():
|
||||
# Build name variants we'll try in the body.
|
||||
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||
last_word = without_hull.split()[-1] # "Ford"
|
||||
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||
|
||||
# Variants ordered most-specific first.
|
||||
variants: list[str] = []
|
||||
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||
if v and v not in variants and len(v) >= 4:
|
||||
variants.append(v)
|
||||
|
||||
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||
if not phrase:
|
||||
continue
|
||||
resolved = _resolve_region_phrase(phrase)
|
||||
if not resolved:
|
||||
logger.debug(
|
||||
"USNI: %s region phrase %r did not match any known region",
|
||||
hull, phrase,
|
||||
)
|
||||
continue
|
||||
canonical_key, display_phrase = resolved
|
||||
coords = region_coords.get(canonical_key)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
positions[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||
"source_url": latest["link"],
|
||||
"position_source_at": pub_iso,
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
if positions:
|
||||
logger.info(
|
||||
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||
len(positions), len(carrier_registry), latest["link"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||
latest["link"],
|
||||
)
|
||||
return positions
|
||||
@@ -21,17 +21,9 @@ _cache_lock = threading.Lock()
|
||||
_local_search_cache: List[Dict[str, Any]] | None = None
|
||||
_local_search_lock = threading.Lock()
|
||||
|
||||
# Round 7a: per-install operator handle threads through every Nominatim
|
||||
# call. NOMINATIM_USER_AGENT env override is still honored for operators
|
||||
# who run a custom relay / known good identity, but the default uses the
|
||||
# per-install handle so OpenStreetMap can rate-limit per install instead
|
||||
# of treating "Shadowbroker" as one big offender.
|
||||
def _nominatim_user_agent() -> str:
|
||||
override = os.environ.get("NOMINATIM_USER_AGENT", "").strip()
|
||||
if override:
|
||||
return override
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("nominatim")
|
||||
_USER_AGENT = os.environ.get(
|
||||
"NOMINATIM_USER_AGENT", "ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)"
|
||||
)
|
||||
|
||||
|
||||
def _get_cache(key: str):
|
||||
@@ -186,7 +178,7 @@ def search_geocode(query: str, limit: int = 5, local_only: bool = False) -> List
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"User-Agent": _USER_AGENT,
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
@@ -249,7 +241,7 @@ def reverse_geocode(lat: float, lng: float, local_only: bool = False) -> Dict[st
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"User-Agent": _USER_AGENT,
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import zipfile
|
||||
@@ -9,62 +8,11 @@ from datetime import datetime
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _geopolitics_user_agent() -> str:
|
||||
"""Round 7a: GDELT geopolitics fetcher attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("geopolitics-gdelt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
frontline_cache = TTLCache(maxsize=1, ttl=1800)
|
||||
|
||||
_DEFAULT_DEEPSTATE_MIRROR_REPO = "cyterat/deepstate-map-data"
|
||||
|
||||
|
||||
def _deepstate_mirror_ref() -> tuple[str, str]:
|
||||
"""Return (github_repo_slug, git_ref) for the DeepState mirror.
|
||||
|
||||
When ``DEEPSTATE_MIRROR_COMMIT`` is set, ingest is pinned to that immutable
|
||||
SHA instead of following the mutable ``main`` branch (#362).
|
||||
"""
|
||||
repo = (os.environ.get("DEEPSTATE_MIRROR_REPO") or _DEFAULT_DEEPSTATE_MIRROR_REPO).strip()
|
||||
if repo.count("/") != 1:
|
||||
repo = _DEFAULT_DEEPSTATE_MIRROR_REPO
|
||||
commit = (os.environ.get("DEEPSTATE_MIRROR_COMMIT") or "").strip()
|
||||
ref = commit if commit else "main"
|
||||
return repo, ref
|
||||
|
||||
|
||||
def _latest_deepstate_geo_path(tree_items: list) -> str | None:
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_items
|
||||
if isinstance(item, dict)
|
||||
and str(item.get("path", "")).startswith("data/deepstatemap_data_")
|
||||
and str(item.get("path", "")).endswith(".geojson")
|
||||
]
|
||||
return sorted(geo_files)[-1] if geo_files else None
|
||||
|
||||
|
||||
def _annotate_deepstate_geojson(data: dict) -> dict:
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
feature["properties"]["name"] = name_map.get(idx, "Russian-occupied areas")
|
||||
feature["properties"]["zone_id"] = idx
|
||||
return data
|
||||
|
||||
|
||||
@cached(frontline_cache)
|
||||
def fetch_ukraine_frontlines():
|
||||
@@ -72,34 +20,67 @@ def fetch_ukraine_frontlines():
|
||||
Fetches the latest GeoJSON data representing the Ukraine frontline.
|
||||
We use the cyterat/deepstate-map-data github mirror since the public API is locked.
|
||||
"""
|
||||
repo, ref = _deepstate_mirror_ref()
|
||||
try:
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror (%s @ %s)...", repo, ref)
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror...")
|
||||
|
||||
tree_url = f"https://api.github.com/repos/{repo}/git/trees/{ref}?recursive=1"
|
||||
# First, query the repo tree to find the latest file name
|
||||
tree_url = (
|
||||
"https://api.github.com/repos/cyterat/deepstate-map-data/git/trees/main?recursive=1"
|
||||
)
|
||||
res_tree = requests.get(tree_url, timeout=10)
|
||||
|
||||
if res_tree.status_code == 200:
|
||||
latest_file = _latest_deepstate_geo_path(res_tree.json().get("tree", []))
|
||||
if latest_file:
|
||||
raw_url = f"https://raw.githubusercontent.com/{repo}/{ref}/{latest_file}"
|
||||
logger.info("Downloading DeepStateMap: %s", raw_url)
|
||||
tree_data = res_tree.json().get("tree", [])
|
||||
# Filter for geojson files in data folder
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_data
|
||||
if item["path"].startswith("data/deepstatemap_data_")
|
||||
and item["path"].endswith(".geojson")
|
||||
]
|
||||
|
||||
if geo_files:
|
||||
# Get the alphabetically latest file (since it's named with YYYYMMDD)
|
||||
latest_file = sorted(geo_files)[-1]
|
||||
|
||||
raw_url = f"https://raw.githubusercontent.com/cyterat/deepstate-map-data/main/{latest_file}"
|
||||
logger.info(f"Downloading latest DeepStateMap: {raw_url}")
|
||||
|
||||
res_geo = requests.get(raw_url, timeout=20)
|
||||
if res_geo.status_code == 200:
|
||||
return _annotate_deepstate_geojson(res_geo.json())
|
||||
logger.error(
|
||||
"Failed to fetch parsed Github Raw GeoJSON: %s", res_geo.status_code
|
||||
)
|
||||
else:
|
||||
logger.error("No deepstatemap_data_*.geojson files in mirror tree at %s", ref)
|
||||
data = res_geo.json()
|
||||
|
||||
# The Cyterat GitHub mirror strips all properties and just provides a raw array of Feature polygons.
|
||||
# Based on DeepStateMap's frontend mapping, the array index corresponds to the zone type:
|
||||
# 0: Russian-occupied areas
|
||||
# 1: Russian advance
|
||||
# 2: Liberated area
|
||||
# 3: Uncontested/Crimea (often folded into occupied)
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
|
||||
feature["properties"]["name"] = name_map.get(
|
||||
idx, "Russian-occupied areas"
|
||||
)
|
||||
feature["properties"]["zone_id"] = idx
|
||||
|
||||
return data
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to fetch parsed Github Raw GeoJSON: {res_geo.status_code}"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Failed to fetch Github tree for Deepstatemap (%s @ %s): %s",
|
||||
repo,
|
||||
ref,
|
||||
res_tree.status_code,
|
||||
)
|
||||
logger.error(f"Failed to fetch Github Tree for Deepstatemap: {res_tree.status_code}")
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"Error fetching DeepStateMap: {e}")
|
||||
return None
|
||||
@@ -335,7 +316,7 @@ def _fetch_article_title(url):
|
||||
resp = requests.get(
|
||||
current_url,
|
||||
timeout=4,
|
||||
headers={"User-Agent": _geopolitics_user_agent()},
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT Dashboard/1.0)"},
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
)
|
||||
@@ -540,29 +521,10 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
|
||||
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
||||
|
||||
|
||||
# GDELT's data.gdeltproject.org is a CNAME to a Google Cloud Storage
|
||||
# bucket of the same name. GCS returns the wildcard ``*.storage.googleapis.com``
|
||||
# certificate, which legitimately does NOT cover the GDELT custom domain
|
||||
# — Python's TLS verification correctly refuses it. Some networks/POPs
|
||||
# happen to route through a path where this works; many do not (notably
|
||||
# Docker Desktop's outbound NAT on local installs).
|
||||
#
|
||||
# Fix: rewrite the URL to hit GCS directly with a path-style bucket
|
||||
# reference, where the standard GCS cert is genuinely valid. Same data,
|
||||
# verified TLS, no operator-side workaround needed.
|
||||
def _gcs_direct_gdelt_url(url: str) -> str:
|
||||
"""If ``url`` points at data.gdeltproject.org, return the equivalent
|
||||
GCS-direct URL. Otherwise return the URL unchanged."""
|
||||
prefix = "://data.gdeltproject.org/"
|
||||
if prefix in url:
|
||||
return url.replace(prefix, "://storage.googleapis.com/data.gdeltproject.org/", 1)
|
||||
return url
|
||||
|
||||
|
||||
def _download_gdelt_export(url):
|
||||
"""Download a single GDELT export file, return bytes or None."""
|
||||
try:
|
||||
res = fetch_with_curl(_gcs_direct_gdelt_url(url), timeout=15)
|
||||
res = fetch_with_curl(url, timeout=15)
|
||||
if res.status_code == 200:
|
||||
return res.content
|
||||
except (ConnectionError, TimeoutError, OSError): # non-critical
|
||||
@@ -658,12 +620,8 @@ def fetch_global_military_incidents():
|
||||
# HTTPS is used to prevent passive network observers from injecting
|
||||
# poisoned export records into the global incident map via MITM.
|
||||
# GDELT serves the same content over HTTPS as HTTP.
|
||||
# Use the GCS-direct URL because data.gdeltproject.org's CNAME
|
||||
# serves a wildcard *.storage.googleapis.com cert that legitimately
|
||||
# doesn't cover the GDELT hostname. See _gcs_direct_gdelt_url above.
|
||||
index_res = fetch_with_curl(
|
||||
_gcs_direct_gdelt_url("https://data.gdeltproject.org/gdeltv2/lastupdate.txt"),
|
||||
timeout=10,
|
||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt", timeout=10
|
||||
)
|
||||
if index_res.status_code != 200:
|
||||
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
||||
|
||||
@@ -1,20 +1,14 @@
|
||||
"""Function Keys — anonymous credential scaffolding.
|
||||
"""Function Keys — anonymous citizenship proof.
|
||||
|
||||
Source of truth: ``infonet-economy/IMPLEMENTATION_PLAN.md`` §4.4,
|
||||
``infonet-economy/BRAINDUMP.md`` §11 item 9.
|
||||
|
||||
A citizen should eventually be able to prove "I am a UBI-eligible
|
||||
Infonet citizen" to a real-world operator (food bank, community
|
||||
service) **without revealing their Infonet identity**. The current
|
||||
Python implementation wires the accounting, nullifier, receipt, and
|
||||
operator flows, but its HMAC challenge-response is a placeholder for
|
||||
integration tests. It is not a production anonymous or zero-knowledge
|
||||
citizenship proof until blind signatures or anonymous credentials are
|
||||
selected and wired.
|
||||
|
||||
The naive approach (scramble a public key, record each redemption on
|
||||
chain) leaks identity through metadata correlation (time, location,
|
||||
operator, frequency).
|
||||
A citizen should be able to prove "I am a UBI-eligible Infonet
|
||||
citizen" to a real-world operator (food bank, community service)
|
||||
**without revealing their Infonet identity**. The naive approach
|
||||
(scramble a public key, record each redemption on chain) leaks
|
||||
identity through metadata correlation (time, location, operator,
|
||||
frequency).
|
||||
|
||||
The full design has six pieces; five are implemented in pure Python
|
||||
here. The remaining piece — issuance via blind signatures or
|
||||
@@ -33,8 +27,7 @@ Pieces:
|
||||
operator: tracked via ``NullifierTracker``.
|
||||
3. **Challenge-response** (`challenge_response.py`) — operator
|
||||
issues a fresh nonce, key-holder signs with the Function Key's
|
||||
secret. This is HMAC placeholder plumbing for screenshot/replay
|
||||
resistance, not the final anonymous credential proof.
|
||||
secret. Prevents screenshot attacks, key sharing, replay.
|
||||
4. **Two-phase commit receipts** (`receipt.py`) — Phase 1
|
||||
verification receipt (operator-signed, day-level date NOT
|
||||
timestamp, no node_id). Phase 2 fulfillment receipt (citizen
|
||||
|
||||
@@ -32,14 +32,14 @@ logger = logging.getLogger(__name__)
|
||||
_REFRESH_SECONDS = 24 * 3600
|
||||
kiwisdr_cache: TTLCache = TTLCache(maxsize=1, ttl=_REFRESH_SECONDS)
|
||||
|
||||
_SOURCE_URL_HTTP = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTPS = "https://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_CACHE_FILE = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_cache.json"
|
||||
# Bundled fallback — shipped with the codebase so the KiwiSDR layer always
|
||||
# has something to render even when the upstream is unreachable, returns
|
||||
# garbage, or appears to have been tampered with. Issue #206 / #364: try HTTPS
|
||||
# first, then HTTP; we still validate shape and fall back to this bundle if the
|
||||
# payload does not look right.
|
||||
# garbage, or appears to have been tampered with. Issue #206: the upstream
|
||||
# only speaks HTTP, so we can't rely on TLS for integrity — instead we
|
||||
# validate the response's shape and fall back to this bundle if it doesn't
|
||||
# look right.
|
||||
_BUNDLED_FALLBACK = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_directory.json"
|
||||
|
||||
# Minimum number of receivers we expect from a healthy upstream response.
|
||||
@@ -184,29 +184,6 @@ def _validate_fetched_nodes(nodes: list[dict]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _fetch_mirror_payload_text() -> str | None:
|
||||
"""Try HTTPS first, then HTTP. Shape validation still applies (#364)."""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
last_error: Exception | None = None
|
||||
for url in (_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP):
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
if url == _SOURCE_URL_HTTP:
|
||||
logger.info(
|
||||
"KiwiSDR: HTTPS mirror unavailable; using HTTP with shape validation"
|
||||
)
|
||||
return res.text
|
||||
last_error = RuntimeError(f"HTTP {getattr(res, 'status_code', 'unknown')}")
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("KiwiSDR mirror fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
logger.warning("KiwiSDR mirror fetch failed: %s", last_error)
|
||||
return None
|
||||
|
||||
|
||||
def _load_bundled_fallback() -> list[dict]:
|
||||
"""Last-resort directory shipped with the codebase. Always returns a
|
||||
list (may be empty if the bundle is missing in older deployments)."""
|
||||
@@ -225,8 +202,9 @@ def _load_bundled_fallback() -> list[dict]:
|
||||
def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
"""Return the KiwiSDR receiver list, refreshed at most once per day.
|
||||
|
||||
Layered fallback (issue #206 / #364 — HTTPS first, HTTP fallback, plus
|
||||
content validation + bundled static directory):
|
||||
Layered fallback (issue #206 — upstream is HTTP-only, so we defend with
|
||||
content validation + bundled static directory rather than trying to
|
||||
upgrade the transport):
|
||||
|
||||
1. In-memory cache (handled by @cached on this function)
|
||||
2. On-disk cache if <24h old
|
||||
@@ -238,6 +216,8 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
tampered upstream returning garbage is caught by _validate_fetched_nodes()
|
||||
and falls through to whatever previously-trusted snapshot we have.
|
||||
"""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
# 1. Trust on-disk cache if fresh.
|
||||
cached_nodes = _load_disk_cache()
|
||||
if cached_nodes is not None:
|
||||
@@ -250,12 +230,14 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
fresh_nodes: list[dict] = []
|
||||
fetch_succeeded = False
|
||||
try:
|
||||
body = _fetch_mirror_payload_text()
|
||||
if body:
|
||||
fresh_nodes = _parse_mirror_payload(body)
|
||||
res = fetch_with_curl(_SOURCE_URL, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
fresh_nodes = _parse_mirror_payload(res.text)
|
||||
fetch_succeeded = True
|
||||
else:
|
||||
logger.warning("KiwiSDR fetch returned no usable mirror payload")
|
||||
logger.warning(
|
||||
f"KiwiSDR fetch returned HTTP {res.status_code if res else 'no response'}"
|
||||
)
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.warning(f"KiwiSDR fetch exception: {e}")
|
||||
|
||||
|
||||
@@ -27,15 +27,8 @@ def fetch_liveuamap():
|
||||
browser = p.chromium.launch(
|
||||
headless=True, args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
# Per-install handle (no shared Shadowbroker product token). Stealth remains
|
||||
# for Turnstile; see docs/OUTBOUND_DATA.md #348.
|
||||
playwright_ua = (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('liveuamap')})"
|
||||
)
|
||||
context = browser.new_context(
|
||||
user_agent=playwright_ua,
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
color_scheme="dark",
|
||||
)
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
"""LiveUAMap Playwright scraper opt-in (#348) — UI consent on Windows."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "liveuamap_scraper_opt_in.json"
|
||||
_OPT_IN_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def liveuamap_requires_ui_opt_in() -> bool:
|
||||
"""Windows local installs need explicit consent before Playwright contacts LiveUAMap."""
|
||||
return os.name == "nt"
|
||||
|
||||
|
||||
def get_liveuamap_ui_opt_in() -> bool:
|
||||
if not _OPT_IN_FILE.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8"))
|
||||
return bool(payload.get("opted_in"))
|
||||
except (OSError, json.JSONDecodeError, TypeError) as e:
|
||||
logger.warning("LiveUAMap opt-in file unreadable: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def set_liveuamap_ui_opt_in(opted_in: bool) -> None:
|
||||
_OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _OPT_IN_LOCK:
|
||||
_OPT_IN_FILE.write_text(
|
||||
json.dumps({"opted_in": bool(opted_in)}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Whether the Playwright LiveUAMap scraper may run on this backend."""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
if not liveuamap_requires_ui_opt_in():
|
||||
return True
|
||||
return get_liveuamap_ui_opt_in()
|
||||
|
||||
|
||||
def liveuamap_scraper_status() -> dict[str, Any]:
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
env_override = None
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
env_override = "on"
|
||||
elif setting in {"0", "false", "no", "off"}:
|
||||
env_override = "off"
|
||||
ui_opted_in = get_liveuamap_ui_opt_in()
|
||||
requires = liveuamap_requires_ui_opt_in()
|
||||
return {
|
||||
"platform_requires_opt_in": requires,
|
||||
"ui_opted_in": ui_opted_in,
|
||||
"scraper_enabled": liveuamap_scraper_enabled(),
|
||||
"env_override": env_override,
|
||||
}
|
||||
@@ -69,115 +69,6 @@ def _derive_peer_key(shared_secret: str, peer_url: str) -> bytes:
|
||||
).digest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Issue #256 (tg12): per-peer HMAC secrets
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Before this change, ALL peer-push HMACs were derived from a single
|
||||
# fleet-shared ``MESH_PEER_PUSH_SECRET``. The receiver could prove a
|
||||
# request was signed by *someone who knows the fleet secret*, but it
|
||||
# could NOT prove which peer signed it — any peer could compute the
|
||||
# expected HMAC for any other peer's URL and impersonate that peer.
|
||||
#
|
||||
# Fix: an optional ``MESH_PEER_SECRETS`` env var maps specific peer URLs
|
||||
# to per-peer secrets. When a peer URL is listed there, only that
|
||||
# per-peer secret is accepted for that URL — the global secret is
|
||||
# ignored for that peer. Peer A no longer learns peer B's secret, so
|
||||
# peer A cannot forge a request claiming to be peer B.
|
||||
#
|
||||
# Backwards-compatible by design:
|
||||
#
|
||||
# - Single-peer installs (``MESH_PEER_SECRETS`` empty) keep using the
|
||||
# global secret. Zero behavior change. Zero operator action required.
|
||||
# - Multi-peer installs that haven't migrated yet keep using the global
|
||||
# secret for every peer. Same behavior as before — same exposure.
|
||||
# - Multi-peer installs that have migrated configure
|
||||
# ``MESH_PEER_SECRETS=urlA=secretA,urlB=secretB`` and immediately get
|
||||
# per-peer identity. Migration is incremental: peers not yet listed
|
||||
# continue using the global secret until both sides of that peering
|
||||
# add their entry.
|
||||
|
||||
_PEER_SECRETS_CACHE: dict[str, str] = {}
|
||||
_PEER_SECRETS_CACHE_RAW: str = ""
|
||||
|
||||
|
||||
def _lookup_per_peer_secret(normalized_url: str) -> str:
|
||||
"""Return the per-peer secret for ``normalized_url`` from MESH_PEER_SECRETS.
|
||||
|
||||
Returns "" if no per-peer entry is configured for that URL. The parser
|
||||
is forgiving:
|
||||
|
||||
- Whitespace around items, URLs, and secrets is stripped.
|
||||
- Items without ``=`` or with empty URL/secret halves are skipped.
|
||||
- The URL half is normalized via ``normalize_peer_url`` so config
|
||||
authors don't have to match scheme/port/path quirks exactly.
|
||||
|
||||
The cache is invalidated whenever the env var's raw value changes,
|
||||
which keeps tests' ``monkeypatch.setenv`` calls effective without
|
||||
forcing a process restart.
|
||||
"""
|
||||
import os
|
||||
|
||||
raw = str(os.environ.get("MESH_PEER_SECRETS", "") or "").strip()
|
||||
|
||||
global _PEER_SECRETS_CACHE, _PEER_SECRETS_CACHE_RAW
|
||||
if raw != _PEER_SECRETS_CACHE_RAW:
|
||||
new_cache: dict[str, str] = {}
|
||||
for chunk in raw.split(","):
|
||||
chunk = chunk.strip()
|
||||
if not chunk or "=" not in chunk:
|
||||
continue
|
||||
url_part, _, secret_part = chunk.partition("=")
|
||||
normalized = normalize_peer_url(url_part.strip())
|
||||
secret = secret_part.strip()
|
||||
if normalized and secret:
|
||||
new_cache[normalized] = secret
|
||||
_PEER_SECRETS_CACHE = new_cache
|
||||
_PEER_SECRETS_CACHE_RAW = raw
|
||||
|
||||
return _PEER_SECRETS_CACHE.get(normalized_url, "")
|
||||
|
||||
|
||||
def resolve_peer_key_for_url(peer_url: str) -> bytes:
|
||||
"""Return the HMAC key for ``peer_url``, preferring per-peer secret.
|
||||
|
||||
Issue #256: this is the function every peer-push call site should
|
||||
use. It looks up the peer-specific secret first, falling back to the
|
||||
fleet-shared ``MESH_PEER_PUSH_SECRET`` only when the URL is NOT
|
||||
listed in ``MESH_PEER_SECRETS``.
|
||||
|
||||
Both sender (computing X-Peer-HMAC) and receiver (verifying it) call
|
||||
this with the SENDER's URL — they must derive the same key, so
|
||||
operators on both ends of a peering need matching MESH_PEER_SECRETS
|
||||
entries for that URL to stay in sync.
|
||||
|
||||
Returns empty bytes when no usable secret exists. Callers must treat
|
||||
that as fail-closed (skip the push, reject the verification).
|
||||
"""
|
||||
normalized_url = normalize_peer_url(peer_url)
|
||||
if not normalized_url:
|
||||
return b""
|
||||
|
||||
per_peer_secret = _lookup_per_peer_secret(normalized_url)
|
||||
if per_peer_secret:
|
||||
return _derive_peer_key(per_peer_secret, normalized_url)
|
||||
|
||||
# No per-peer entry for this URL — fall back to the legacy global
|
||||
# secret. This is what preserves zero-hostility for single-peer
|
||||
# installs and the migration window for multi-peer installs.
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
global_secret = str(
|
||||
getattr(get_settings(), "MESH_PEER_PUSH_SECRET", "") or ""
|
||||
).strip()
|
||||
except Exception:
|
||||
return b""
|
||||
if not global_secret:
|
||||
return b""
|
||||
return _derive_peer_key(global_secret, normalized_url)
|
||||
|
||||
|
||||
def _node_digest(public_key_b64: str) -> str:
|
||||
raw = base64.b64decode(public_key_b64)
|
||||
return hashlib.sha256(raw).hexdigest()
|
||||
|
||||
@@ -317,39 +317,6 @@ class DMRelay:
|
||||
def _self_mailbox_limit(self) -> int:
|
||||
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
||||
|
||||
def _per_sender_pending_limit(self) -> int:
|
||||
"""Anti-spam cap on UNACKED messages a single sender can have parked
|
||||
in a single recipient mailbox at any one time. See ``config.py``
|
||||
``MESH_DM_PENDING_PER_SENDER_LIMIT`` for the threat model — this
|
||||
rule is enforced both at ``deposit`` (local) and at
|
||||
``accept_replica`` (peer push acceptance), making it a network
|
||||
rule rather than a client-side honor system."""
|
||||
try:
|
||||
limit = int(getattr(self._settings(), "MESH_DM_PENDING_PER_SENDER_LIMIT", 2) or 2)
|
||||
except (TypeError, ValueError):
|
||||
limit = 2
|
||||
return max(1, limit)
|
||||
|
||||
def _per_sender_pending_count(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
sender_block_ref: str,
|
||||
) -> int:
|
||||
"""Count UNACKED messages from ``sender_block_ref`` currently parked
|
||||
in ``mailbox_key``. Caller already holds ``self._lock``.
|
||||
|
||||
Messages that have been claimed/acked are removed from the mailbox
|
||||
list (see ``claim_message_ids``), so anything still here is by
|
||||
definition unacked. We count by exact ``sender_block_ref`` match
|
||||
— that's the per-pair sender identity used for blocking too, so
|
||||
the cap is naturally per-(sender, recipient).
|
||||
"""
|
||||
if not mailbox_key or not sender_block_ref:
|
||||
return 0
|
||||
messages = self._mailboxes.get(mailbox_key, [])
|
||||
return sum(1 for m in messages if m.sender_block_ref == sender_block_ref)
|
||||
|
||||
def _nonce_ttl_seconds(self) -> int:
|
||||
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
||||
|
||||
@@ -1548,29 +1515,6 @@ class DMRelay:
|
||||
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
||||
metrics_inc("dm_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
# Anti-spam: per-(sender, recipient) cap on unacked messages.
|
||||
# A sender who already has the configured number of messages
|
||||
# parked in this mailbox can't deposit more until the recipient
|
||||
# pulls (acks) at least one. The same cap is re-enforced on
|
||||
# inbound replication in ``accept_replica`` so this rule isn't
|
||||
# bypassable by patching out the local check on a hostile
|
||||
# sender's relay — see config.py
|
||||
# MESH_DM_PENDING_PER_SENDER_LIMIT for the threat model.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_drop_per_sender_cap")
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
f"Recipient already has {pending} unread message"
|
||||
f"{'s' if pending != 1 else ''} from you. Wait for "
|
||||
"them to read your messages before sending more."
|
||||
),
|
||||
}
|
||||
if not msg_id:
|
||||
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
||||
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
||||
@@ -1595,245 +1539,8 @@ class DMRelay:
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
# Cross-node mailbox replication: push the freshly-stored
|
||||
# envelope to every authenticated relay peer so the recipient
|
||||
# can log into ANY node and find their messages. The push is
|
||||
# async (fire-and-forget thread) so deposit() returns
|
||||
# immediately — slow Tor peers can't block the sender's UX.
|
||||
# Each receiving peer re-enforces the per-sender cap on
|
||||
# acceptance, so hostile relays can't widen the cap.
|
||||
try:
|
||||
envelope_for_push = self.envelope_for_replication(
|
||||
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||
)
|
||||
if envelope_for_push:
|
||||
self._replicate_envelope_to_peers_async(
|
||||
envelope=envelope_for_push,
|
||||
)
|
||||
except Exception:
|
||||
metrics_inc("dm_replication_push_error")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def accept_replica(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
originating_peer_url: str = "",
|
||||
) -> dict[str, Any]:
|
||||
"""Receive a DM envelope replicated from a peer relay.
|
||||
|
||||
Cross-node mailbox replication entry point. When a sender's local
|
||||
relay accepts a ``deposit`` and pushes the envelope to
|
||||
``MESH_RELAY_PEERS`` (so the recipient can log into any peer
|
||||
node and find their messages), each receiving peer calls
|
||||
``accept_replica`` to ingest it.
|
||||
|
||||
The per-(sender, recipient) cap is re-enforced HERE. That's what
|
||||
makes the rule a NETWORK rule rather than a client-side honor
|
||||
system: a hostile sender who patches out the local ``deposit``
|
||||
check still can't get a 3rd unacked message to spread, because
|
||||
every honest peer enforces the same cap on inbound replicas.
|
||||
Result: hostile relays can hold extras locally, but those extras
|
||||
never reach any node a legitimate recipient is polling from.
|
||||
|
||||
Returns the same shape as ``deposit`` so the calling endpoint can
|
||||
forward the result back to the originating peer.
|
||||
"""
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
msg_id = str(envelope.get("msg_id", "") or "").strip()
|
||||
mailbox_key = str(envelope.get("mailbox_key", "") or "").strip()
|
||||
sender_block_ref = str(envelope.get("sender_block_ref", "") or "").strip()
|
||||
ciphertext = str(envelope.get("ciphertext", "") or "")
|
||||
if not msg_id or not mailbox_key or not sender_block_ref or not ciphertext:
|
||||
return {"ok": False, "detail": "envelope missing required fields"}
|
||||
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
self._cleanup_expired()
|
||||
|
||||
# Idempotent — if we already hold this exact msg_id, the
|
||||
# replication round-tripped or a peer pushed the same
|
||||
# envelope through multiple paths. Accept silently.
|
||||
if any(m.msg_id == msg_id for m in self._mailboxes.get(mailbox_key, [])):
|
||||
metrics_inc("dm_replica_duplicate")
|
||||
return {"ok": True, "msg_id": msg_id, "duplicate": True}
|
||||
|
||||
# Same per-class cap as the deposit path — defense in depth
|
||||
# against a peer that wraps a "deposit" as a "replica" to
|
||||
# bypass the class limit.
|
||||
delivery_class = str(envelope.get("delivery_class", "") or "")
|
||||
if delivery_class in ("request", "shared", "self"):
|
||||
class_limit = self._mailbox_limit_for_class(delivery_class)
|
||||
else:
|
||||
class_limit = self._shared_mailbox_limit()
|
||||
if len(self._mailboxes.get(mailbox_key, [])) >= class_limit:
|
||||
metrics_inc("dm_replica_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
|
||||
# THE network rule: per-(sender, recipient) anti-spam cap.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_replica_drop_per_sender_cap")
|
||||
# Returning a structured rejection — the sender's relay
|
||||
# learns its envelope was rejected by an honest peer and
|
||||
# can stop trying to push it.
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
"Per-sender cap reached on this relay; refusing replica"
|
||||
),
|
||||
"cap_violation": True,
|
||||
"pending": pending,
|
||||
"limit": per_sender_limit,
|
||||
}
|
||||
|
||||
# Accept the replica into the local mailbox.
|
||||
self._mailboxes[mailbox_key].append(
|
||||
DMMessage(
|
||||
sender_id=str(envelope.get("sender_id", "") or ""),
|
||||
ciphertext=ciphertext,
|
||||
timestamp=float(envelope.get("timestamp", time.time()) or time.time()),
|
||||
msg_id=msg_id,
|
||||
delivery_class=str(envelope.get("delivery_class", "shared") or "shared"),
|
||||
sender_seal=str(envelope.get("sender_seal", "") or ""),
|
||||
relay_salt=str(envelope.get("relay_salt", "") or ""),
|
||||
sender_block_ref=sender_block_ref,
|
||||
payload_format=str(envelope.get("payload_format", "dm1") or "dm1"),
|
||||
session_welcome=str(envelope.get("session_welcome", "") or ""),
|
||||
)
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
metrics_inc("dm_replica_accepted")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def _replicate_envelope_to_peers_async(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
) -> None:
|
||||
"""Push an outbound DM envelope to every authenticated relay peer.
|
||||
|
||||
Fire-and-forget: spawned in a background thread so ``deposit``
|
||||
returns to the caller immediately. Per-peer errors are logged
|
||||
and swallowed — the sender's UX must not block on slow Tor
|
||||
peers, and a peer that's down today gets the next message
|
||||
whenever it comes back. Inbound recipient polling from a healthy
|
||||
peer keeps the system functional during peer failures.
|
||||
|
||||
Each peer is authed with the existing per-peer HMAC pattern
|
||||
(#256) — same headers and key resolver gate-message replication
|
||||
uses, so a hostile node that doesn't know any peer's HMAC key
|
||||
can't impersonate a legitimate relay.
|
||||
"""
|
||||
import threading
|
||||
|
||||
def _do_push():
|
||||
try:
|
||||
import hashlib
|
||||
import hmac
|
||||
import requests as _requests
|
||||
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_router import (
|
||||
authenticated_push_peer_urls,
|
||||
)
|
||||
|
||||
peers = authenticated_push_peer_urls()
|
||||
if not peers:
|
||||
return
|
||||
|
||||
payload = json.dumps(
|
||||
{"envelope": envelope},
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
).encode("utf-8")
|
||||
|
||||
timeout = max(
|
||||
1,
|
||||
int(getattr(self._settings(), "MESH_RELAY_PUSH_TIMEOUT_S", 10) or 10),
|
||||
)
|
||||
|
||||
for peer_url in peers:
|
||||
try:
|
||||
normalized = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key, payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
url = f"{peer_url}/api/mesh/dm/replicate-envelope"
|
||||
resp = _requests.post(
|
||||
url, data=payload, timeout=timeout, headers=headers,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
metrics_inc("dm_replication_push_ok")
|
||||
else:
|
||||
# 4xx including the structured cap_violation
|
||||
# rejection from accept_replica — sender's
|
||||
# relay learns and stops retrying this msg_id.
|
||||
metrics_inc("dm_replication_push_rejected")
|
||||
except Exception:
|
||||
# Per-peer failure is non-fatal — log to metrics
|
||||
# but don't break the loop. Other peers and a
|
||||
# future retry can still propagate the envelope.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
continue
|
||||
except Exception:
|
||||
# Outer guard — never let replication errors propagate
|
||||
# back to the sender's deposit() caller.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_do_push,
|
||||
name="dm-replicate-push",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
def envelope_for_replication(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
msg_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return the wire-form envelope for a stored message, suitable
|
||||
for POSTing to a peer relay's replicate-envelope endpoint.
|
||||
|
||||
Returns ``None`` if the message isn't in the mailbox (already
|
||||
acked, expired, never existed). The caller holds the
|
||||
responsibility for transport security (Tor SOCKS for .onion
|
||||
peers, per-peer HMAC) and for not leaking the envelope to
|
||||
clearnet peers when private transport is required.
|
||||
"""
|
||||
with self._lock:
|
||||
for m in self._mailboxes.get(mailbox_key, []):
|
||||
if m.msg_id == msg_id:
|
||||
return {
|
||||
"msg_id": m.msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"sender_id": m.sender_id,
|
||||
"sender_block_ref": m.sender_block_ref,
|
||||
"sender_seal": m.sender_seal,
|
||||
"ciphertext": m.ciphertext,
|
||||
"timestamp": m.timestamp,
|
||||
"delivery_class": m.delivery_class,
|
||||
"relay_salt": m.relay_salt,
|
||||
"payload_format": m.payload_format,
|
||||
"session_welcome": m.session_welcome,
|
||||
}
|
||||
return None
|
||||
|
||||
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
|
||||
@@ -33,9 +33,8 @@ Each event contains:
|
||||
|
||||
Persistence: JSON file at backend/data/infonet.json
|
||||
|
||||
Encrypted gate chat events are private-chain ciphertext records. They are
|
||||
excluded from public read surfaces and replicated only over private Infonet
|
||||
transports.
|
||||
Encrypted gate chat events are intentionally kept off the public chain and
|
||||
persisted separately via GateMessageStore.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -65,8 +64,6 @@ from services.mesh.mesh_schema import (
|
||||
ACTIVE_PUBLIC_LEDGER_EVENT_TYPES,
|
||||
PUBLIC_LEDGER_EVENT_TYPES,
|
||||
validate_event_payload,
|
||||
validate_private_dm_ledger_payload,
|
||||
validate_private_gate_ledger_payload,
|
||||
validate_protocol_fields,
|
||||
validate_public_ledger_payload,
|
||||
)
|
||||
@@ -130,12 +127,6 @@ GATE_SEGMENT_MAX_COMPRESSED_BYTES = max(
|
||||
int(os.environ.get("MESH_GATE_SEGMENT_MAX_COMPRESSED_BYTES", str(2 * 1024 * 1024)) or str(2 * 1024 * 1024)),
|
||||
)
|
||||
GATE_SEGMENT_STORAGE_VERSION = 1
|
||||
DM_HASHCHAIN_SPOOL_LIMIT = max(1, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_LIMIT", "2") or "2"))
|
||||
DM_HASHCHAIN_SPOOL_SENDER_LIMIT = max(
|
||||
1,
|
||||
int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_SENDER_LIMIT", "1") or "1"),
|
||||
)
|
||||
DM_HASHCHAIN_SPOOL_TTL_S = max(60, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_TTL_S", "3600") or "3600"))
|
||||
_PUBLIC_EVENT_APPEND_HOOKS: list[Any] = []
|
||||
_PUBLIC_EVENT_APPEND_HOOKS_LOCK = threading.Lock()
|
||||
|
||||
@@ -225,19 +216,18 @@ def _peer_pair_ref_key(peer_url: str) -> bytes:
|
||||
Returns an empty key on misconfiguration so callers fail closed.
|
||||
"""
|
||||
try:
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
except Exception:
|
||||
return b""
|
||||
if not secret:
|
||||
return b""
|
||||
normalized = normalize_peer_url(peer_url or "")
|
||||
if not normalized:
|
||||
return b""
|
||||
# Issue #256: resolve_peer_key_for_url() prefers per-peer secrets
|
||||
# from MESH_PEER_SECRETS and falls back to the global
|
||||
# MESH_PEER_PUSH_SECRET only when the URL has no per-peer entry.
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
peer_key = _derive_peer_key(secret, normalized)
|
||||
if not peer_key:
|
||||
return b""
|
||||
# Domain-separate from the transport HMAC key so the two
|
||||
@@ -349,32 +339,6 @@ def _private_gate_event_id(
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def _private_gate_signature_payload_variants(gate_id: str, event: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
payload = _private_gate_signature_payload(gate_id, event)
|
||||
variants: list[dict[str, Any]] = [payload]
|
||||
event_payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
reply_to = str(event_payload.get("reply_to", "") or "").strip()
|
||||
if reply_to:
|
||||
variants.append(_private_gate_signature_payload(gate_id, event, include_reply_to=False))
|
||||
if "epoch" in payload:
|
||||
no_epoch = dict(payload)
|
||||
no_epoch.pop("epoch", None)
|
||||
variants.append(no_epoch)
|
||||
if reply_to:
|
||||
no_epoch_no_reply = _private_gate_signature_payload(gate_id, event, include_reply_to=False)
|
||||
no_epoch_no_reply.pop("epoch", None)
|
||||
variants.append(no_epoch_no_reply)
|
||||
deduped: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for variant in variants:
|
||||
material = json.dumps(variant, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if material in seen:
|
||||
continue
|
||||
seen.add(material)
|
||||
deduped.append(variant)
|
||||
return deduped
|
||||
|
||||
|
||||
def _sanitize_private_gate_event(gate_id: str, event: dict[str, Any]) -> dict[str, Any]:
|
||||
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
sanitized = {
|
||||
@@ -1603,18 +1567,11 @@ class Infonet:
|
||||
def _rebuild_state(self) -> None:
|
||||
self.event_index = {}
|
||||
self.node_sequences = {}
|
||||
# Keep private signed-write replay domains that are not represented
|
||||
# on-chain, but rebuild the gate_message sequence domain from chain
|
||||
# events so reloads/fork application do not mix it with public
|
||||
# per-node message sequences.
|
||||
preserved_domains = {}
|
||||
if isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
preserved_domains = {
|
||||
key: value
|
||||
for key, value in self.sequence_domains.items()
|
||||
if not str(key or "").endswith("|gate_message")
|
||||
}
|
||||
self.sequence_domains = dict(preserved_domains)
|
||||
# Keep private signed-write replay domains across public-chain
|
||||
# rebuilds; these domains protect local side effects that are not
|
||||
# represented as public Infonet events.
|
||||
if not isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
self.sequence_domains = {}
|
||||
self.public_key_bindings = {}
|
||||
self.revocations = {}
|
||||
self._replay_filter = ReplayFilter()
|
||||
@@ -1626,12 +1583,9 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(
|
||||
evt.get("event_type", ""), node_id
|
||||
)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
if sequence > last:
|
||||
sequence_table[sequence_key] = sequence
|
||||
self.node_sequences[node_id] = sequence
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
existing = self.public_key_bindings.get(public_key)
|
||||
@@ -1943,295 +1897,6 @@ class Infonet:
|
||||
self._save()
|
||||
return True, "ok"
|
||||
|
||||
def _sequence_table_for_event(self, event_type: str, node_id: str) -> tuple[dict[str, int], str]:
|
||||
normalized = str(event_type or "").strip().lower()
|
||||
if normalized == "gate_message":
|
||||
return self.sequence_domains, f"{node_id}|gate_message"
|
||||
if normalized == "dm_message":
|
||||
return self.sequence_domains, f"{node_id}|dm_message"
|
||||
return self.node_sequences, node_id
|
||||
|
||||
def _dm_spool_target_key(self, payload: dict[str, Any]) -> tuple[str, str]:
|
||||
delivery_class = str(payload.get("delivery_class", "") or "").strip().lower()
|
||||
if delivery_class == "shared":
|
||||
key = str(payload.get("recipient_token", "") or "").strip()
|
||||
else:
|
||||
key = str(payload.get("recipient_id", "") or "").strip()
|
||||
return delivery_class, key
|
||||
|
||||
def _dm_spool_active_counts(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
sender_id: str = "",
|
||||
now: float | None = None,
|
||||
) -> tuple[int, int]:
|
||||
delivery_class, key = self._dm_spool_target_key(payload)
|
||||
if not key:
|
||||
return 0, 0
|
||||
sender_id = str(sender_id or "").strip()
|
||||
current = time.time() if now is None else float(now)
|
||||
total_count = 0
|
||||
sender_count = 0
|
||||
for evt in reversed(self.events):
|
||||
if evt.get("event_type") != "dm_message":
|
||||
continue
|
||||
evt_payload = evt.get("payload") if isinstance(evt.get("payload"), dict) else {}
|
||||
evt_delivery_class, evt_key = self._dm_spool_target_key(evt_payload)
|
||||
if evt_delivery_class != delivery_class:
|
||||
continue
|
||||
if evt_key != key:
|
||||
continue
|
||||
evt_ts = float(evt_payload.get("timestamp", evt.get("timestamp", 0)) or 0)
|
||||
if evt_ts > 0 and current - evt_ts > DM_HASHCHAIN_SPOOL_TTL_S:
|
||||
continue
|
||||
total_count += 1
|
||||
if sender_id and str(evt.get("node_id", "") or "").strip() == sender_id:
|
||||
sender_count += 1
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT and (
|
||||
not sender_id or sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT
|
||||
):
|
||||
break
|
||||
return total_count, sender_count
|
||||
|
||||
def _dm_spool_active_count(self, payload: dict[str, Any], *, now: float | None = None) -> int:
|
||||
total_count, _sender_count = self._dm_spool_active_counts(payload, now=now)
|
||||
return total_count
|
||||
|
||||
def append_private_dm_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted DM dead-drop message to the private Infonet ledger.
|
||||
|
||||
The event is a small offline spool, capped per mailbox target, so the
|
||||
hashchain can carry a couple of sealed DMs without becoming an
|
||||
unbounded global mailbox.
|
||||
"""
|
||||
event_type = "dm_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "plaintext" in raw_payload or "_local_plaintext" in raw_payload:
|
||||
raise ValueError("private DM ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("DM hashchain spool requires private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=node_id)
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
raise ValueError("DM hashchain sender spool full for recipient")
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
raise ValueError("DM hashchain spool full for recipient")
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
logger.info(
|
||||
f"Infonet append [dm_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append_private_gate_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted gate message to the private Infonet ledger.
|
||||
|
||||
Gate messages use their own sequence domain so a gate post cannot
|
||||
consume or replay-block the author's public broadcast sequence.
|
||||
"""
|
||||
event_type = "gate_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "_local_plaintext" in raw_payload or "_local_reply_to" in raw_payload:
|
||||
raise ValueError("private gate ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("gate messages require private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
event_for_signature = {"payload": payload}
|
||||
signature_ok = False
|
||||
for signature_payload in _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
event_for_signature,
|
||||
):
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
|
||||
logger.info(
|
||||
f"Infonet append [gate_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append(
|
||||
self,
|
||||
event_type: str,
|
||||
@@ -2412,18 +2077,6 @@ class Infonet:
|
||||
if not event_id or not prev_hash:
|
||||
rejected.append({"index": idx, "reason": "Missing event_id or prev_hash"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != expected_prev:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2442,14 +2095,25 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "network_id mismatch"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != self.head_hash:
|
||||
rejected.append({"index": idx, "reason": "prev_hash does not match head"})
|
||||
continue
|
||||
if sequence <= 0:
|
||||
rejected.append({"index": idx, "reason": "Invalid sequence"})
|
||||
continue
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
if sequence <= last:
|
||||
rejected.append({"index": idx, "reason": "Replay detected"})
|
||||
continue
|
||||
@@ -2484,18 +2148,7 @@ class Infonet:
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if ok:
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=str(evt.get("node_id", "") or ""))
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
ok, reason = False, "DM hashchain sender spool full for recipient"
|
||||
elif total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
ok, reason = False, "DM hashchain spool full for recipient"
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
@@ -2571,7 +2224,7 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "public key is revoked"})
|
||||
continue
|
||||
last_seq = sequence_table.get(sequence_key, 0)
|
||||
last_seq = self.node_sequences.get(node_id, 0)
|
||||
if sequence <= last_seq:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2607,30 +2260,18 @@ class Infonet:
|
||||
rejected.append({"index": idx, "reason": bind_reason})
|
||||
continue
|
||||
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
@@ -2660,7 +2301,7 @@ class Infonet:
|
||||
self.events.append(evt)
|
||||
self.event_index[event_id] = len(self.events) - 1
|
||||
self.head_hash = event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self.node_sequences[node_id] = sequence
|
||||
self._update_counters_for_event(evt)
|
||||
accepted += 1
|
||||
expected_prev = event_id
|
||||
@@ -2723,7 +2364,6 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
@@ -2734,41 +2374,21 @@ class Infonet:
|
||||
return False, f"public key binding conflict at index {i}"
|
||||
seen_public_keys[public_key] = node_id
|
||||
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
return False, f"Invalid signature at index {i}"
|
||||
|
||||
prev = evt_dict["event_id"]
|
||||
@@ -2833,48 +2453,27 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
return False, f"node_id mismatch at index {i}"
|
||||
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
return False, f"Invalid signature at index {i}"
|
||||
prev = evt_dict["event_id"]
|
||||
|
||||
@@ -2938,14 +2537,7 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last_seq[sequence_key] = max(last_seq.get(sequence_key, 0), sequence)
|
||||
last_seq[node_id] = max(last_seq.get(node_id, 0), sequence)
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
seen_public_keys.setdefault(public_key, node_id)
|
||||
@@ -2965,21 +2557,8 @@ class Infonet:
|
||||
existing_idx = self.event_index.get(event_id)
|
||||
if existing_idx is not None and existing_idx <= prev_index:
|
||||
return False, "duplicate event_id"
|
||||
if event_type == "gate_message":
|
||||
payload = dict(payload or {})
|
||||
elif event_type == "dm_message":
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
else:
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
ok, reason = validate_event_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
proto = evt.get("protocol_version") or PROTOCOL_VERSION
|
||||
@@ -2993,14 +2572,7 @@ class Infonet:
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked and event_type != "key_revoke":
|
||||
return False, "public key revoked"
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if event_type == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if event_type == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last = last_seq.get(sequence_key, 0)
|
||||
last = last_seq.get(node_id, 0)
|
||||
if sequence <= last:
|
||||
return False, "sequence replay"
|
||||
from services.mesh.mesh_crypto import (
|
||||
@@ -3018,35 +2590,23 @@ class Infonet:
|
||||
if existing and existing != node_id:
|
||||
return False, "public key binding conflict"
|
||||
seen_public_keys[public_key] = node_id
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
return False, "invalid signature"
|
||||
computed = ChainEvent.from_dict(evt).event_id
|
||||
if computed != event_id:
|
||||
return False, "event_id mismatch"
|
||||
last_seq[sequence_key] = sequence
|
||||
last_seq[node_id] = sequence
|
||||
|
||||
# Apply fork
|
||||
self.events = prefix + ordered
|
||||
|
||||
@@ -2,64 +2,10 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from email.utils import parsedate_to_datetime
|
||||
from datetime import timezone
|
||||
|
||||
from services.mesh.mesh_peer_store import PeerRecord
|
||||
|
||||
|
||||
class PeerSyncRateLimited(Exception):
|
||||
"""Upstream peer returned HTTP 429 — Too Many Requests.
|
||||
|
||||
Carries the ``Retry-After`` header value (parsed to seconds) so
|
||||
the caller can pass it to ``finish_sync(retry_after_s=...)`` and
|
||||
actually wait that long instead of hammering the upstream every
|
||||
60s and keeping its rate-limit bucket full.
|
||||
|
||||
``retry_after_s`` is 0 when the upstream didn't provide a header.
|
||||
Caller should still apply the exponential backoff in that case.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after_s: int = 0, status: int = 429):
|
||||
super().__init__(message)
|
||||
self.retry_after_s = max(0, int(retry_after_s or 0))
|
||||
self.status = int(status or 429)
|
||||
|
||||
|
||||
def parse_retry_after_header(header_value: str, *, now: float | None = None) -> int:
|
||||
"""Parse the ``Retry-After`` HTTP header.
|
||||
|
||||
Two valid forms per RFC 7231 §7.1.3:
|
||||
|
||||
* Delay-seconds: a non-negative integer (e.g. ``Retry-After: 120``)
|
||||
* HTTP-date: an absolute time (e.g. ``Retry-After: Wed, 21 Oct 2026 07:28:00 GMT``)
|
||||
|
||||
Returns the wait in **seconds from now**. Unparseable / empty headers
|
||||
return 0 (caller falls back to exponential backoff). Clamped at a
|
||||
sane upper bound (1 hour) so a typo'd or hostile peer can't pin us
|
||||
silent for days.
|
||||
"""
|
||||
value = str(header_value or "").strip()
|
||||
if not value:
|
||||
return 0
|
||||
upper_bound = 3600 # never trust a peer to silence us > 1h
|
||||
# Form 1: pure integer seconds.
|
||||
if value.isdigit():
|
||||
return min(max(0, int(value)), upper_bound)
|
||||
# Form 2: HTTP-date.
|
||||
try:
|
||||
target = parsedate_to_datetime(value)
|
||||
if target is None:
|
||||
return 0
|
||||
if target.tzinfo is None:
|
||||
target = target.replace(tzinfo=timezone.utc)
|
||||
current = float(now if now is not None else time.time())
|
||||
delta = int(target.timestamp() - current)
|
||||
return min(max(0, delta), upper_bound)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SyncWorkerState:
|
||||
last_sync_started_at: int = 0
|
||||
@@ -126,59 +72,6 @@ def begin_sync(
|
||||
)
|
||||
|
||||
|
||||
def _failure_backoff_seconds(
|
||||
*,
|
||||
base_backoff_s: int,
|
||||
consecutive_failures: int,
|
||||
retry_after_s: int,
|
||||
cap_s: int = 1800,
|
||||
) -> int:
|
||||
"""Compute the next-attempt delay after a failed sync.
|
||||
|
||||
Two inputs combine:
|
||||
|
||||
* ``retry_after_s`` — when an upstream peer answered HTTP 429
|
||||
with a ``Retry-After`` header, we honor it exactly. Continuing
|
||||
to hammer the upstream every 60s is the bug this fix exists to
|
||||
close: it keeps the upstream's rate-limit bucket full
|
||||
indefinitely and no sync ever lands.
|
||||
|
||||
* Exponential growth on ``consecutive_failures`` — even without an
|
||||
explicit Retry-After, repeated failures should slow us down. The
|
||||
first failure waits ``base`` (preserves pre-fix behavior for
|
||||
one-off blips). Each subsequent failure doubles the wait, capped
|
||||
to ``cap_s`` (default 30 minutes). With base=60 and cap=1800,
|
||||
the schedule is 60s → 120s → 240s → 480s → 960s → 1800s →
|
||||
1800s → … .
|
||||
|
||||
The actual delay is the MAX of the two — whichever asks for more
|
||||
patience wins. ``retry_after_s == 0`` (no header) falls back to
|
||||
pure exponential. An aggressive ``Retry-After`` (say 600s while
|
||||
we're only at 1 failure) wins over the exponential ladder.
|
||||
"""
|
||||
base = max(0, int(base_backoff_s or 0))
|
||||
failures = max(0, int(consecutive_failures or 0))
|
||||
cap = max(0, int(cap_s or 0))
|
||||
retry_after = max(0, int(retry_after_s or 0))
|
||||
# ``cap_s=0`` explicitly disables the exponential ladder entirely
|
||||
# — operators who want the pre-fix "honor Retry-After only" behavior
|
||||
# can set this. The default cap of 1800s is what saturates the
|
||||
# ladder at the 5th-6th failure for base=60.
|
||||
if cap == 0:
|
||||
return retry_after
|
||||
# 2^(failures-1) — so failure #1 = base (preserves the pre-fix
|
||||
# default for transient blips), failure #2 = 2*base, etc. Cap on
|
||||
# the exponent (16) is defense against integer overflow on a
|
||||
# hostile or very large failures counter.
|
||||
if base > 0 and failures > 0:
|
||||
exponent = min(max(0, failures - 1), 16)
|
||||
grown = base * (2 ** exponent)
|
||||
else:
|
||||
grown = 0
|
||||
exponential = min(max(0, grown), cap)
|
||||
return max(exponential, retry_after)
|
||||
|
||||
|
||||
def finish_sync(
|
||||
state: SyncWorkerState,
|
||||
*,
|
||||
@@ -190,26 +83,7 @@ def finish_sync(
|
||||
now: float | None = None,
|
||||
interval_s: int = 300,
|
||||
failure_backoff_s: int = 60,
|
||||
retry_after_s: int = 0,
|
||||
failure_backoff_cap_s: int = 1800,
|
||||
) -> SyncWorkerState:
|
||||
"""Finalise a sync attempt and compute when the next one should run.
|
||||
|
||||
New args (added for the 429 retry storm fix):
|
||||
|
||||
* ``retry_after_s`` — if the peer responded with HTTP 429 + a
|
||||
``Retry-After`` header, pass that value here. ``finish_sync``
|
||||
will use ``max(exponential, retry_after_s)`` for the delay so
|
||||
we never hammer a peer that asked us to back off.
|
||||
* ``failure_backoff_cap_s`` — upper bound on the exponential
|
||||
ladder. Default 1800 (30 min) — keeps a sync queue from going
|
||||
silent for hours while still cutting the request rate to
|
||||
something the upstream can absorb.
|
||||
|
||||
The pre-fix behavior (constant 60s on every failure) is recoverable
|
||||
by passing ``failure_backoff_cap_s=0`` and ``retry_after_s=0``, but
|
||||
there's no reason to.
|
||||
"""
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
if ok:
|
||||
return SyncWorkerState(
|
||||
@@ -225,25 +99,17 @@ def finish_sync(
|
||||
consecutive_failures=0,
|
||||
)
|
||||
|
||||
next_failures = state.consecutive_failures + 1
|
||||
delay_s = _failure_backoff_seconds(
|
||||
base_backoff_s=failure_backoff_s,
|
||||
consecutive_failures=next_failures,
|
||||
retry_after_s=retry_after_s,
|
||||
cap_s=failure_backoff_cap_s,
|
||||
)
|
||||
|
||||
return SyncWorkerState(
|
||||
last_sync_started_at=state.last_sync_started_at,
|
||||
last_sync_finished_at=timestamp,
|
||||
last_sync_ok_at=state.last_sync_ok_at,
|
||||
next_sync_due_at=timestamp + delay_s,
|
||||
next_sync_due_at=timestamp + max(0, int(failure_backoff_s or 0)),
|
||||
last_peer_url=peer_url or state.last_peer_url,
|
||||
last_error=str(error or "").strip(),
|
||||
last_outcome="fork" if fork_detected else "error",
|
||||
current_head=current_head or state.current_head,
|
||||
fork_detected=bool(fork_detected),
|
||||
consecutive_failures=next_failures,
|
||||
consecutive_failures=state.consecutive_failures + 1,
|
||||
)
|
||||
|
||||
|
||||
@@ -276,6 +142,5 @@ def should_run_sync(
|
||||
) -> bool:
|
||||
current_time = int(now if now is not None else time.time())
|
||||
if state.last_outcome == "running":
|
||||
started_at = int(state.last_sync_started_at or 0)
|
||||
return started_at <= 0 or current_time - started_at >= 300
|
||||
return False
|
||||
return int(state.next_sync_due_at or 0) <= current_time
|
||||
|
||||
@@ -26,11 +26,7 @@ from enum import Enum
|
||||
from typing import Any, Callable, Optional
|
||||
from collections import deque
|
||||
from urllib.parse import urlparse
|
||||
from services.mesh.mesh_crypto import (
|
||||
_derive_peer_key,
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
from services.mesh.mesh_privacy_policy import (
|
||||
TRANSPORT_TIER_ORDER as _TIER_RANK,
|
||||
@@ -707,6 +703,7 @@ class InternetTransport(_PeerPushTransportMixin):
|
||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||
except ValueError as exc:
|
||||
return TransportResult(False, self.NAME, str(exc))
|
||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
||||
|
||||
delivered = 0
|
||||
last_error = ""
|
||||
@@ -716,13 +713,10 @@ class InternetTransport(_PeerPushTransportMixin):
|
||||
try:
|
||||
normalized_peer_url = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
# Issue #256: per-peer secret takes precedence over the
|
||||
# global MESH_PEER_PUSH_SECRET. When neither is set the
|
||||
# key is empty and we skip the HMAC header entirely so a
|
||||
# bare (unsigned) push still works on test deployments
|
||||
# that have not yet configured any secret at all.
|
||||
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||
if peer_key:
|
||||
if secret:
|
||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
||||
if not peer_key:
|
||||
raise ValueError("invalid peer URL for HMAC derivation")
|
||||
headers["X-Peer-Url"] = normalized_peer_url
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key,
|
||||
@@ -804,6 +798,7 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||
except ValueError as exc:
|
||||
return TransportResult(False, self.NAME, str(exc))
|
||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
||||
|
||||
delivered = 0
|
||||
last_error = ""
|
||||
@@ -813,10 +808,10 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
||||
try:
|
||||
normalized_peer_url = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
# Issue #256: per-peer secret takes precedence; see the
|
||||
# other transport above for the rationale.
|
||||
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||
if peer_key:
|
||||
if secret:
|
||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
||||
if not peer_key:
|
||||
raise ValueError("invalid peer URL for HMAC derivation")
|
||||
headers["X-Peer-Url"] = normalized_peer_url
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key,
|
||||
|
||||
@@ -2,9 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
|
||||
@@ -36,58 +33,6 @@ def _require_fields(payload: dict[str, Any], fields: tuple[str, ...]) -> tuple[b
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def _decode_base64ish(value: Any) -> bytes | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw or any(ch.isspace() for ch in raw):
|
||||
return None
|
||||
padded = raw + ("=" * (-len(raw) % 4))
|
||||
for altchars in (None, b"-_"):
|
||||
try:
|
||||
return base64.b64decode(padded.encode("ascii"), altchars=altchars, validate=True)
|
||||
except (binascii.Error, UnicodeEncodeError, ValueError):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _byte_entropy(data: bytes) -> float:
|
||||
if not data:
|
||||
return 0.0
|
||||
counts = [0] * 256
|
||||
for byte in data:
|
||||
counts[byte] += 1
|
||||
total = float(len(data))
|
||||
return -sum((count / total) * math.log2(count / total) for count in counts if count)
|
||||
|
||||
|
||||
def _validate_sealed_bytes_field(
|
||||
payload: dict[str, Any],
|
||||
field: str,
|
||||
*,
|
||||
min_bytes: int = 8,
|
||||
entropy_floor: float = 2.5,
|
||||
) -> tuple[bool, str]:
|
||||
data = _decode_base64ish(payload.get(field, ""))
|
||||
if data is None:
|
||||
return False, f"{field} must be base64-encoded sealed bytes"
|
||||
if len(data) < min_bytes:
|
||||
return False, f"{field} is too short"
|
||||
|
||||
# Short test vectors and compact envelopes can be low entropy; only apply
|
||||
# heuristics once there is enough material to distinguish a sealed blob
|
||||
# from accidental base64-encoded plaintext.
|
||||
if len(data) >= 32:
|
||||
printable = sum(1 for byte in data if 32 <= byte <= 126 or byte in (9, 10, 13))
|
||||
if printable / len(data) > 0.9:
|
||||
try:
|
||||
data.decode("utf-8")
|
||||
return False, f"{field} looks like encoded plaintext"
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if _byte_entropy(data) < entropy_floor:
|
||||
return False, f"{field} entropy is too low for sealed bytes"
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def _validate_message(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
ok, reason = _require_fields(
|
||||
payload, ("message", "destination", "channel", "priority", "ephemeral")
|
||||
@@ -386,7 +331,6 @@ ACTIVE_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
||||
LEGACY_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
||||
{
|
||||
"gate_message",
|
||||
"dm_message",
|
||||
}
|
||||
)
|
||||
"""Event types that exist historically on the public chain and must remain
|
||||
@@ -481,8 +425,6 @@ def validate_event_payload(event_type: str, payload: dict[str, Any]) -> tuple[bo
|
||||
|
||||
|
||||
def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
if event_type == "gate_message":
|
||||
return validate_private_gate_ledger_payload(payload)
|
||||
if event_type not in PUBLIC_LEDGER_EVENT_TYPES and event_type not in _EXTENSION_VALIDATORS:
|
||||
return False, f"{event_type} is not allowed on the public ledger"
|
||||
forbidden = sorted(
|
||||
@@ -499,92 +441,6 @@ def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) ->
|
||||
return True, "ok"
|
||||
|
||||
|
||||
_PRIVATE_GATE_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"gate",
|
||||
"ciphertext",
|
||||
"nonce",
|
||||
"sender_ref",
|
||||
"format",
|
||||
"epoch",
|
||||
"gate_envelope",
|
||||
"envelope_hash",
|
||||
"reply_to",
|
||||
"transport_lock",
|
||||
"signed_context",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_private_gate_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
"""Validate ciphertext-only gate events for private Infonet replication."""
|
||||
ok, reason = validate_event_payload("gate_message", payload)
|
||||
if not ok:
|
||||
return ok, reason
|
||||
unexpected = sorted(
|
||||
key
|
||||
for key in payload.keys()
|
||||
if str(key or "").strip().lower() not in _PRIVATE_GATE_LEDGER_ALLOWED_FIELDS
|
||||
)
|
||||
if unexpected:
|
||||
return False, f"private gate ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||
if "message" in payload or "_local_plaintext" in payload or "_local_reply_to" in payload:
|
||||
return False, "private gate ledger payload must not contain plaintext"
|
||||
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||
if transport_lock and transport_lock not in {"private", "private_strong", "rns", "onion"}:
|
||||
return False, "gate messages require private transport_lock"
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "nonce")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
return True, "ok"
|
||||
|
||||
|
||||
_PRIVATE_DM_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"recipient_id",
|
||||
"delivery_class",
|
||||
"recipient_token",
|
||||
"ciphertext",
|
||||
"msg_id",
|
||||
"timestamp",
|
||||
"format",
|
||||
"session_welcome",
|
||||
"sender_seal",
|
||||
"relay_salt",
|
||||
"transport_lock",
|
||||
"signed_context",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_private_dm_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
"""Validate ciphertext-only DM dead-drop events for private Infonet replication."""
|
||||
ok, reason = validate_event_payload("dm_message", payload)
|
||||
if not ok:
|
||||
return ok, reason
|
||||
unexpected = sorted(
|
||||
key
|
||||
for key in payload.keys()
|
||||
if str(key or "").strip().lower() not in _PRIVATE_DM_LEDGER_ALLOWED_FIELDS
|
||||
)
|
||||
if unexpected:
|
||||
return False, f"private DM ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||
if "message" in payload or "plaintext" in payload or "_local_plaintext" in payload:
|
||||
return False, "private DM ledger payload must not contain plaintext"
|
||||
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||
if transport_lock != "private_strong":
|
||||
return False, "DM hashchain spool requires private_strong transport_lock"
|
||||
if not str(payload.get("ciphertext", "") or "").strip():
|
||||
return False, "ciphertext cannot be empty"
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def validate_protocol_fields(protocol_version: str, network_id: str) -> tuple[bool, str]:
|
||||
if protocol_version != PROTOCOL_VERSION:
|
||||
return False, "Unsupported protocol_version"
|
||||
|
||||
@@ -38,11 +38,6 @@ _REVOCATION_TTL_CACHE: dict[str, dict[str, Any]] = {}
|
||||
_REVOCATION_TTL_LOCK = threading.Lock()
|
||||
_REVOCATION_REFRESH_LOCK = threading.Lock()
|
||||
_REVOCATION_REFRESH_FAIL_FAST_WINDOW_S = 5.0
|
||||
|
||||
|
||||
def _request_scope_path(request: Request) -> str:
|
||||
scope = getattr(request, "scope", {}) or {}
|
||||
return str(scope.get("path") or "")
|
||||
_REVOCATION_REFRESH_RETRY_AFTER_S = 5
|
||||
_REVOCATION_PRECHECK_UNAVAILABLE_DETAIL = "Signed event integrity preflight unavailable"
|
||||
|
||||
@@ -171,7 +166,7 @@ def _canonical_signed_write_retry_payload(
|
||||
signed_context = build_signed_context(
|
||||
event_type=prepared.event_type,
|
||||
kind=prepared.kind.value,
|
||||
endpoint=_request_scope_path(request),
|
||||
endpoint=str(request.url.path or ""),
|
||||
lane_floor=_content_private_required_transport_tier(prepared.kind),
|
||||
sequence_domain=_signed_context_sequence_domain(prepared),
|
||||
node_id=prepared.node_id,
|
||||
@@ -545,7 +540,7 @@ def _apply_signed_context_policy(prepared: "PreparedSignedWrite", request: Reque
|
||||
ok, reason = validate_signed_context(
|
||||
event_type=prepared.event_type,
|
||||
kind=prepared.kind.value,
|
||||
endpoint=_request_scope_path(request),
|
||||
endpoint=str(request.url.path or ""),
|
||||
lane_floor=_content_private_required_transport_tier(prepared.kind),
|
||||
sequence_domain=_signed_context_sequence_domain(prepared),
|
||||
node_id=prepared.node_id,
|
||||
|
||||
@@ -91,15 +91,13 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
||||
return {"ok": False, "detail": "lookup token required"}
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
from services.mesh.mesh_router import configured_relay_peer_urls
|
||||
|
||||
settings = get_settings()
|
||||
# Issue #256: secret check moved per-peer below. We still bail out
|
||||
# cleanly when there are no peers configured at all.
|
||||
secret = str(getattr(settings, "MESH_PEER_PUSH_SECRET", "") or "").strip()
|
||||
if not secret:
|
||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||
peers = configured_relay_peer_urls()
|
||||
if not peers:
|
||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||
@@ -123,8 +121,7 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
||||
or os.environ.get("SB_TEST_NODE_URL", "").strip()
|
||||
or normalized_peer_url
|
||||
)
|
||||
# Issue #256: prefer per-peer secret keyed by the sender URL.
|
||||
peer_key = resolve_peer_key_for_url(sender_peer_url)
|
||||
peer_key = _derive_peer_key(secret, sender_peer_url)
|
||||
if not peer_key:
|
||||
continue
|
||||
headers = {
|
||||
@@ -234,12 +231,12 @@ def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, A
|
||||
# Generic UA: any peer-facing crypto request should not carry a
|
||||
# fork-specific identifier — that turns prekey lookups into a
|
||||
# software-fingerprinting beacon.
|
||||
from services.network_utils import default_user_agent
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
request = urllib.request.Request(
|
||||
f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": default_user_agent(),
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
|
||||
@@ -5,9 +5,7 @@ import subprocess
|
||||
import shutil
|
||||
import time
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
@@ -22,190 +20,15 @@ _session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
|
||||
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-operator outbound identification
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Issues #289 / #290 / #291 and the retrofit of PR #284 (#218 / #219 / #220):
|
||||
# every third-party API the backend calls used to identify itself with a
|
||||
# single "Shadowbroker" aggregate User-Agent. From the upstream's
|
||||
# perspective, that meant every Shadowbroker install in the world looked
|
||||
# like one giant entity hammering them. If one install misbehaved, the
|
||||
# upstream's only recourse was to block "Shadowbroker" as a whole — which
|
||||
# would take out every other install too.
|
||||
#
|
||||
# Fix: give each install a stable pseudonymous handle used as the entire
|
||||
# User-Agent product token (no shared "Shadowbroker" label). Upstreams see
|
||||
# ``operator-7f3a92`` (or ``OPERATOR_HANDLE``), not one monolithic app name.
|
||||
#
|
||||
# The handle:
|
||||
#
|
||||
# - Is auto-generated on first call if no `OPERATOR_HANDLE` is configured
|
||||
# (looks like "operator-7f3a92" — 6 hex chars from uuid4()).
|
||||
# - Is persisted to ``backend/data/operator_handle.json`` so it survives
|
||||
# restarts. Under Docker compose that file lives in the volume mount
|
||||
# alongside `carrier_cache.json` and the other persistent state.
|
||||
# - Can be overridden by the operator via the `OPERATOR_HANDLE` setting
|
||||
# (env var or settings UI). Operators with their own GitHub handle,
|
||||
# organization name, etc. can use that for traceability.
|
||||
# - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is
|
||||
# strictly for public third-party API attribution.
|
||||
|
||||
_OPERATOR_HANDLE_FILE = (
|
||||
Path(__file__).parent.parent / "data" / "operator_handle.json"
|
||||
# Default outbound User-Agent. Generic by design — does NOT include any
|
||||
# personal contact info or a fork-specific repo URL. Operators who run a
|
||||
# public-facing relay and want to identify themselves to upstreams (e.g.
|
||||
# for Nominatim / weather.gov usage-policy compliance) can override this
|
||||
# via the SHADOWBROKER_USER_AGENT env var.
|
||||
DEFAULT_USER_AGENT = os.environ.get(
|
||||
"SHADOWBROKER_USER_AGENT",
|
||||
"ShadowBroker-OSINT/0.9",
|
||||
)
|
||||
_OPERATOR_HANDLE_CACHE: str = ""
|
||||
_OPERATOR_HANDLE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _generate_operator_handle() -> str:
|
||||
"""Produce a stable pseudonymous handle for first-launch installs.
|
||||
|
||||
Format: ``operator-7f3a92`` (6 hex chars from a fresh uuid4()).
|
||||
Distinct per install. Carries no real-world identity by default —
|
||||
operators who want one can override via ``OPERATOR_HANDLE``.
|
||||
|
||||
Note: the prefix is deliberately neutral. Earlier drafts used
|
||||
``shadow-`` which, while accurate to the project name, looks
|
||||
exactly like the kind of pattern a third-party abuse-detection
|
||||
system would auto-block as suspicious. ``operator-`` describes
|
||||
what the value actually is and doesn't pattern-match malware.
|
||||
"""
|
||||
return f"operator-{uuid.uuid4().hex[:6]}"
|
||||
|
||||
|
||||
def _load_persisted_operator_handle() -> str:
|
||||
"""Return the previously-saved handle from disk, or empty if none.
|
||||
|
||||
Reads ``backend/data/operator_handle.json`` if it exists. Any read
|
||||
error returns empty so a fresh handle gets generated rather than
|
||||
crashing the request.
|
||||
"""
|
||||
try:
|
||||
if _OPERATOR_HANDLE_FILE.exists():
|
||||
data = json.loads(_OPERATOR_HANDLE_FILE.read_text(encoding="utf-8"))
|
||||
return str(data.get("handle", "") or "").strip()
|
||||
except (OSError, json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _persist_operator_handle(handle: str) -> None:
|
||||
"""Atomically save the auto-generated handle so subsequent restarts
|
||||
use the same one. Failure to persist is non-fatal — the request still
|
||||
succeeds with the in-memory handle, we just may generate a different
|
||||
one on the next process restart."""
|
||||
try:
|
||||
_OPERATOR_HANDLE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = _OPERATOR_HANDLE_FILE.with_suffix(_OPERATOR_HANDLE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(
|
||||
json.dumps({"handle": handle, "_meta": {
|
||||
"purpose": "Per-install operator handle for outbound third-party API attribution.",
|
||||
"see": "backend/services/network_utils.py:outbound_user_agent",
|
||||
}}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
os.replace(tmp, _OPERATOR_HANDLE_FILE)
|
||||
except OSError as exc:
|
||||
logger.debug("Could not persist operator_handle (continuing in-memory): %s", exc)
|
||||
|
||||
|
||||
def get_operator_handle() -> str:
|
||||
"""Return the stable per-install operator handle.
|
||||
|
||||
Resolution order:
|
||||
1. ``OPERATOR_HANDLE`` setting (env var / settings UI) if non-empty.
|
||||
2. Process-cached value from previous call this run.
|
||||
3. Value persisted to ``operator_handle.json`` (from a previous run).
|
||||
4. Newly generated pseudonymous handle, persisted to disk.
|
||||
|
||||
The handle is normalized: stripped of whitespace, lowercased,
|
||||
non-alphanumeric chars (except ``-`` and ``_``) replaced with ``-``.
|
||||
This both sanitizes any HTTP-header-unsafe characters AND prevents
|
||||
the operator from impersonating real third-party projects via
|
||||
inventive whitespace.
|
||||
"""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
# 1. Configured override always wins.
|
||||
configured = ""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
configured = str(getattr(get_settings(), "OPERATOR_HANDLE", "") or "").strip()
|
||||
except Exception:
|
||||
configured = ""
|
||||
if configured:
|
||||
return _normalize_handle(configured)
|
||||
|
||||
# 2. In-memory cache (fast path for repeated calls).
|
||||
if _OPERATOR_HANDLE_CACHE:
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 3. On-disk handle from a previous run.
|
||||
persisted = _load_persisted_operator_handle()
|
||||
if persisted:
|
||||
normalized = _normalize_handle(persisted)
|
||||
# Migrate legacy auto-generated handles (pre-Round-7a ``shadow-`` prefix).
|
||||
if normalized.startswith("shadow-"):
|
||||
normalized = f"operator-{normalized[len('shadow-'):]}"
|
||||
_persist_operator_handle(normalized)
|
||||
_OPERATOR_HANDLE_CACHE = normalized
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 4. Generate, persist, return.
|
||||
fresh = _generate_operator_handle()
|
||||
_persist_operator_handle(fresh)
|
||||
_OPERATOR_HANDLE_CACHE = fresh
|
||||
return fresh
|
||||
|
||||
|
||||
def _normalize_handle(raw: str) -> str:
|
||||
"""Strip whitespace, lowercase, replace unsafe characters with dashes."""
|
||||
safe = "".join(
|
||||
ch if (ch.isalnum() or ch in "-_") else "-"
|
||||
for ch in raw.strip().lower()
|
||||
)
|
||||
# Collapse runs of dashes and trim to a reasonable length so an
|
||||
# operator can't make our outbound logs unreadable.
|
||||
while "--" in safe:
|
||||
safe = safe.replace("--", "-")
|
||||
safe = safe.strip("-")
|
||||
return safe[:48] if safe else "anonymous"
|
||||
|
||||
|
||||
def outbound_user_agent(purpose: str = "") -> str:
|
||||
"""Build a User-Agent for an outbound third-party HTTP request.
|
||||
|
||||
Returns the per-install handle only, e.g. ``operator-7f3a92`` or
|
||||
``operator-7f3a92 (purpose: wikipedia)``. No shared project name — so
|
||||
upstream abuse teams cannot block every install with one ``Shadowbroker``
|
||||
rule.
|
||||
|
||||
Set ``SHADOWBROKER_USER_AGENT`` to override the entire string if needed.
|
||||
"""
|
||||
handle = get_operator_handle()
|
||||
if purpose:
|
||||
purpose_clean = _normalize_handle(purpose)
|
||||
return f"{handle} (purpose: {purpose_clean})"
|
||||
return handle
|
||||
|
||||
|
||||
def _reset_operator_handle_cache_for_tests() -> None:
|
||||
"""Test-only: invalidate the in-memory cache so a test can set a
|
||||
new ``OPERATOR_HANDLE`` env var and see it picked up immediately."""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
_OPERATOR_HANDLE_CACHE = ""
|
||||
|
||||
|
||||
def default_user_agent() -> str:
|
||||
"""Default User-Agent for ``fetch_with_curl`` and legacy call sites."""
|
||||
custom = (os.environ.get("SHADOWBROKER_USER_AGENT") or "").strip()
|
||||
if custom:
|
||||
return custom
|
||||
return outbound_user_agent()
|
||||
|
||||
|
||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
|
||||
@@ -261,7 +84,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
both Python requests and the barebones Windows system curl.
|
||||
"""
|
||||
default_headers = {
|
||||
"User-Agent": default_user_agent(),
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
|
||||
@@ -12,8 +12,6 @@ logger = logging.getLogger(__name__)
|
||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "news_feeds.json"
|
||||
MAX_FEEDS = 50
|
||||
_FEED_URL_REPLACEMENTS = {
|
||||
"http://feeds.bbci.co.uk/news/world/rss.xml": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"http://www.news.cn/english/rss/worldrss.xml": "https://www.news.cn/english/rss/worldrss.xml",
|
||||
"https://www.channelnewsasia.com/rssfeed/8395986": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml",
|
||||
}
|
||||
_DEAD_FEED_URLS = {
|
||||
@@ -29,7 +27,7 @@ _DEAD_FEED_URLS = {
|
||||
|
||||
DEFAULT_FEEDS = [
|
||||
{"name": "NPR", "url": "https://feeds.npr.org/1004/rss.xml", "weight": 4},
|
||||
{"name": "BBC", "url": "https://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
|
||||
{"name": "BBC", "url": "http://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
|
||||
{"name": "AlJazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml", "weight": 2},
|
||||
{"name": "NYT", "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "weight": 1},
|
||||
{"name": "GDACS", "url": "https://www.gdacs.org/xml/rss.xml", "weight": 5},
|
||||
@@ -37,7 +35,7 @@ DEFAULT_FEEDS = [
|
||||
{"name": "Bellingcat", "url": "https://www.bellingcat.com/feed/", "weight": 4},
|
||||
{"name": "Guardian", "url": "https://www.theguardian.com/world/rss", "weight": 3},
|
||||
{"name": "TASS", "url": "https://tass.com/rss/v2.xml", "weight": 2},
|
||||
{"name": "Xinhua", "url": "https://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
{"name": "Xinhua", "url": "http://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
{"name": "CNA", "url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml", "weight": 3},
|
||||
{"name": "Mercopress", "url": "https://en.mercopress.com/rss/", "weight": 3},
|
||||
{"name": "SCMP", "url": "https://www.scmp.com/rss/91/feed", "weight": 4},
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
"""Operator opt-in for Polymarket/Kalshi outbound fetches (Global Threat Intercept)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "prediction_markets_opt_in.json"
|
||||
_OPT_IN_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def get_prediction_markets_ui_opt_in() -> bool:
|
||||
if not _OPT_IN_FILE.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8"))
|
||||
return bool(payload.get("opted_in"))
|
||||
except (OSError, json.JSONDecodeError, TypeError) as exc:
|
||||
logger.warning("Prediction markets opt-in file unreadable: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
def set_prediction_markets_ui_opt_in(opted_in: bool) -> None:
|
||||
_OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _OPT_IN_LOCK:
|
||||
_OPT_IN_FILE.write_text(
|
||||
json.dumps({"opted_in": bool(opted_in)}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def prediction_markets_env_forced_on() -> bool:
|
||||
return _env_flag("PREDICTION_MARKETS_ENABLED") in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def prediction_markets_env_forced_off() -> bool:
|
||||
return _env_flag("PREDICTION_MARKETS_ENABLED") in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""True when UI opt-in or env enables Polymarket/Kalshi pulls."""
|
||||
if get_prediction_markets_ui_opt_in():
|
||||
return True
|
||||
return prediction_markets_env_forced_on()
|
||||
|
||||
|
||||
def prediction_markets_status() -> dict[str, Any]:
|
||||
ui_opted_in = get_prediction_markets_ui_opt_in()
|
||||
env_on = prediction_markets_env_forced_on()
|
||||
env_off = prediction_markets_env_forced_off()
|
||||
env_override = None
|
||||
if env_on:
|
||||
env_override = "on"
|
||||
elif env_off:
|
||||
env_override = "off"
|
||||
return {
|
||||
"enabled": prediction_markets_fetch_enabled(),
|
||||
"ui_opted_in": ui_opted_in,
|
||||
"env_override": env_override,
|
||||
"jitter": {
|
||||
"scheduler_interval_minutes": int(
|
||||
os.environ.get("PREDICTION_MARKETS_INTERVAL_MINUTES", "7")
|
||||
),
|
||||
"scheduler_jitter_seconds": int(
|
||||
os.environ.get("PREDICTION_MARKETS_SCHEDULER_JITTER_S", "240")
|
||||
),
|
||||
"pre_fetch_jitter_seconds": float(
|
||||
os.environ.get("PREDICTION_MARKETS_PRE_FETCH_JITTER_S", "90")
|
||||
),
|
||||
},
|
||||
}
|
||||
@@ -2,34 +2,14 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
from cachetools import cached, TTLCache
|
||||
import cloudscraper
|
||||
import reverse_geocoder as rg
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPENMHZ_AUDIO_HOSTS = {"media.openmhz.com", "media2.openmhz.com", "media3.openmhz.com"}
|
||||
|
||||
|
||||
# Round 7a / Issues #289, #290, #291 (tg12 audit):
|
||||
# We previously sent a spoofed Chrome User-Agent and (for OpenMHz) used
|
||||
# cloudscraper to bypass anti-bot challenges. Both are dishonest and ToS-
|
||||
# unfriendly. We now send the per-install Shadowbroker UA — the upstream
|
||||
# can identify us, rate-limit us per install, and contact us if needed.
|
||||
#
|
||||
# If the upstream actively blocks our honest UA, the feature degrades
|
||||
# gracefully (returns an empty list / cached results) rather than
|
||||
# escalating to deception.
|
||||
|
||||
|
||||
def _broadcastify_user_agent() -> str:
|
||||
return outbound_user_agent("broadcastify")
|
||||
|
||||
|
||||
def _openmhz_user_agent() -> str:
|
||||
return outbound_user_agent("openmhz")
|
||||
|
||||
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
||||
radio_cache = TTLCache(maxsize=1, ttl=300)
|
||||
|
||||
@@ -42,12 +22,8 @@ def get_top_broadcastify_feeds():
|
||||
"""
|
||||
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
||||
headers = {
|
||||
# Issue #289 (tg12) + Round 7a: identify ourselves honestly as a
|
||||
# per-install Shadowbroker scraper. Broadcastify can rate-limit
|
||||
# us per install or block us; either way we stop pretending to be
|
||||
# a browser. If they block, the panel degrades gracefully.
|
||||
"User-Agent": _broadcastify_user_agent(),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
@@ -113,32 +89,21 @@ openmhz_systems_cache = TTLCache(maxsize=1, ttl=3600)
|
||||
|
||||
@cached(openmhz_systems_cache)
|
||||
def get_openmhz_systems():
|
||||
"""Fetches the full directory of OpenMHZ systems.
|
||||
"""Fetches the full directory of OpenMHZ systems."""
|
||||
logger.info("Scraping OpenMHZ Systems (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
|
||||
Issue #290 (tg12) + Round 7a: replaced cloudscraper-based Chrome
|
||||
impersonation with an honest per-install Shadowbroker User-Agent.
|
||||
If OpenMHz's Cloudflare layer blocks honest traffic, we accept
|
||||
that degradation (return empty list) rather than spoof a browser.
|
||||
"""
|
||||
logger.info("Fetching OpenMHZ Systems (Cache Miss)")
|
||||
try:
|
||||
res = requests.get(
|
||||
"https://api.openmhz.com/systems",
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
res = scraper.get("https://api.openmhz.com/systems", timeout=15)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
# Return list of systems
|
||||
return data.get("systems", []) if isinstance(data, dict) else []
|
||||
if res.status_code in (403, 503):
|
||||
logger.warning(
|
||||
"OpenMHZ returned %s for systems directory — Cloudflare may "
|
||||
"be blocking our honest UA. Feature degrades to empty result.",
|
||||
res.status_code,
|
||||
)
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Systems Fetch Exception: {e}")
|
||||
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -148,25 +113,21 @@ openmhz_calls_cache = TTLCache(maxsize=100, ttl=20)
|
||||
|
||||
@cached(openmhz_calls_cache)
|
||||
def get_recent_openmhz_calls(sys_name: str):
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata').
|
||||
|
||||
Issue #290 (tg12) + Round 7a: same honest-UA model as
|
||||
``get_openmhz_systems``.
|
||||
"""
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata')."""
|
||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
|
||||
try:
|
||||
url = f"https://api.openmhz.com/{sys_name}/calls"
|
||||
res = requests.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
res = scraper.get(url, timeout=15)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("calls", []) if isinstance(data, dict) else []
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Calls Fetch Exception ({sys_name}): {e}")
|
||||
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -202,11 +163,9 @@ def openmhz_audio_response(target_url: str):
|
||||
timeout=(5, 20),
|
||||
allow_redirects=False,
|
||||
headers={
|
||||
# Issue #291 (tg12) + Round 7a: drop spoofed Mozilla
|
||||
# UA and the fake first-party Referer. Identify as
|
||||
# the per-install Shadowbroker proxy honestly.
|
||||
"User-Agent": _openmhz_user_agent(),
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
||||
"Referer": "https://openmhz.com/",
|
||||
},
|
||||
)
|
||||
if upstream.is_redirect or upstream.status_code in (301, 302, 303, 307, 308):
|
||||
|
||||
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,31 +15,6 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issues #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path.
|
||||
#
|
||||
# Round 7a: the original fix in PR #284 used a single project-wide
|
||||
# identifier, which from Wikimedia's perspective made every Shadowbroker
|
||||
# install in the world look like one giant scraper. If one install
|
||||
# misbehaved, their only recourse was to block "Shadowbroker" as a
|
||||
# whole. We now build the headers from ``outbound_user_agent('wikimedia')``
|
||||
# which embeds the per-install operator handle (auto-generated or
|
||||
# operator-chosen), so Wikimedia can rate-limit / contact the specific
|
||||
# install instead of the project.
|
||||
|
||||
|
||||
def _wikimedia_request_headers() -> dict[str, str]:
|
||||
ua = outbound_user_agent("wikimedia")
|
||||
return {
|
||||
"User-Agent": ua,
|
||||
# Browser-JS-style header that Wikimedia's policy explicitly
|
||||
# accepts on top of (or instead of) User-Agent. We send both so
|
||||
# whichever the upstream prefers, the per-operator handle is
|
||||
# always available.
|
||||
"Api-User-Agent": ua,
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||
@@ -70,7 +45,9 @@ def _reverse_geocode(lat: float, lng: float) -> dict:
|
||||
f"https://nominatim.openstreetmap.org/reverse?"
|
||||
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
|
||||
)
|
||||
headers = {"User-Agent": outbound_user_agent("nominatim")}
|
||||
headers = {
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard; contact@shadowbroker.app)"
|
||||
}
|
||||
|
||||
for attempt in range(2):
|
||||
# Enforce Nominatim's 1 req/sec policy
|
||||
@@ -144,13 +121,7 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
"""
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||
try:
|
||||
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||
# sends the project default, and we also add the Wikimedia-
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_wikimedia_request_headers())
|
||||
res = fetch_with_curl(url, timeout=6)
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -176,9 +147,7 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
slug = quote(name.replace(" ", "_"))
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||
try:
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_wikimedia_request_headers())
|
||||
res = fetch_with_curl(url, timeout=5)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
@@ -301,36 +270,3 @@ def get_region_dossier(lat: float, lng: float) -> dict:
|
||||
|
||||
dossier_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
def fetch_wikipedia_page_summary(title: str) -> dict | None:
|
||||
"""Wikipedia REST summary for a page title (backend-proxied for #360)."""
|
||||
trimmed = (title or "").strip()
|
||||
if not trimmed:
|
||||
return None
|
||||
data = _fetch_local_wiki_summary(trimmed, "")
|
||||
if not data.get("extract") and not data.get("description"):
|
||||
return None
|
||||
return {
|
||||
"title": trimmed,
|
||||
"description": data.get("description", ""),
|
||||
"extract": data.get("extract", ""),
|
||||
"thumbnail": data.get("thumbnail", ""),
|
||||
"type": "standard",
|
||||
}
|
||||
|
||||
|
||||
def fetch_wikidata_sparql_bindings(sparql: str) -> list:
|
||||
"""Run a Wikidata SPARQL query; returns bindings list (empty on failure)."""
|
||||
trimmed = (sparql or "").strip()
|
||||
if not trimmed:
|
||||
return []
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(trimmed)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=8, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
bindings = res.json().get("results", {}).get("bindings", [])
|
||||
return bindings if isinstance(bindings, list) else []
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.warning("Wikidata SPARQL failed: %s", e)
|
||||
return []
|
||||
|
||||
@@ -34,11 +34,6 @@ from services.sar.sar_config import (
|
||||
copernicus_token,
|
||||
earthdata_token,
|
||||
)
|
||||
|
||||
|
||||
def _sar_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("sar-products")
|
||||
from services.sar.sar_normalize import (
|
||||
SarAnomaly,
|
||||
evidence_hash_for_payload,
|
||||
@@ -447,7 +442,7 @@ def _fetch_unosat_packages() -> list[dict[str, Any]]:
|
||||
# HDX CKAN returns 406 without explicit Accept + a browser-ish UA.
|
||||
hdx_headers = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _sar_user_agent(),
|
||||
"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker-SAR/1.0)",
|
||||
}
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=20, headers=hdx_headers)
|
||||
|
||||
@@ -11,61 +11,12 @@ import requests
|
||||
from datetime import datetime, timedelta
|
||||
from cachetools import TTLCache
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
|
||||
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
|
||||
|
||||
|
||||
def _planetary_user_agent() -> str:
|
||||
# Round 7a: per-install handle so Microsoft Planetary Computer can
|
||||
# attribute requests to the specific operator rather than treating
|
||||
# the whole Shadowbroker user base as one entity.
|
||||
return outbound_user_agent("sentinel2-planetary-computer")
|
||||
|
||||
|
||||
def _sign_planetary_href(href: str) -> str:
|
||||
"""Sign a Planetary Computer blob URL with a short-lived SAS token."""
|
||||
if not href or "blob.core.windows.net" not in href:
|
||||
return href
|
||||
try:
|
||||
account = href.split(".blob.core.windows.net")[0].split("//")[-1]
|
||||
token_resp = requests.get(
|
||||
f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{account}",
|
||||
timeout=5,
|
||||
headers={"User-Agent": _planetary_user_agent()},
|
||||
)
|
||||
token_resp.raise_for_status()
|
||||
token = token_resp.json().get("token", "")
|
||||
if not token:
|
||||
return href
|
||||
sep = "&" if "?" in href else "?"
|
||||
return f"{href}{sep}{token}"
|
||||
except (requests.RequestException, ValueError, KeyError):
|
||||
return href
|
||||
|
||||
|
||||
def _scene_from_stac_feature(item: dict) -> dict:
|
||||
assets = item.get("assets", {}) or {}
|
||||
rendered = assets.get("rendered_preview") or {}
|
||||
thumbnail = assets.get("thumbnail") or {}
|
||||
props = item.get("properties", {}) or {}
|
||||
thumb_href = _sign_planetary_href(thumbnail.get("href") or rendered.get("href") or "")
|
||||
full_href = _sign_planetary_href(rendered.get("href") or thumbnail.get("href") or "")
|
||||
return {
|
||||
"found": True,
|
||||
"scene_id": item.get("id"),
|
||||
"datetime": props.get("datetime"),
|
||||
"cloud_cover": props.get("eo:cloud_cover"),
|
||||
"thumbnail_url": thumb_href or None,
|
||||
"fullres_url": full_href or None,
|
||||
"bbox": list(item.get("bbox", [])) if item.get("bbox") else None,
|
||||
"platform": props.get("platform", "Sentinel-2"),
|
||||
}
|
||||
|
||||
|
||||
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
lat_span = 0.18
|
||||
lng_span = 0.24
|
||||
@@ -93,14 +44,14 @@ def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
|
||||
|
||||
def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
"""Search for up to 3 recent Sentinel-2 L2A scenes covering a point."""
|
||||
"""Search for the latest Sentinel-2 L2A scene covering a point."""
|
||||
cache_key = f"{round(lat, 2)}_{round(lng, 2)}"
|
||||
if cache_key in _sentinel_cache:
|
||||
return _sentinel_cache[cache_key]
|
||||
|
||||
try:
|
||||
end = datetime.utcnow()
|
||||
start = end - timedelta(days=60)
|
||||
start = end - timedelta(days=30)
|
||||
search_payload = {
|
||||
"collections": ["sentinel-2-l2a"],
|
||||
"intersects": {"type": "Point", "coordinates": [lng, lat]},
|
||||
@@ -113,7 +64,7 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=8,
|
||||
headers={"User-Agent": _planetary_user_agent()},
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard)"},
|
||||
)
|
||||
search_res.raise_for_status()
|
||||
data = search_res.json()
|
||||
@@ -123,8 +74,26 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
_sentinel_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
scenes = [_scene_from_stac_feature(item) for item in features[:3]]
|
||||
result = {**scenes[0], "scenes": scenes}
|
||||
item = features[0]
|
||||
assets = item.get("assets", {}) or {}
|
||||
rendered = assets.get("rendered_preview") or {}
|
||||
thumbnail = assets.get("thumbnail") or {}
|
||||
|
||||
# Full-res image URL — what opens when user clicks
|
||||
fullres_url = rendered.get("href") or thumbnail.get("href")
|
||||
# Thumbnail URL — what shows in the popup card
|
||||
thumb_url = thumbnail.get("href") or rendered.get("href")
|
||||
|
||||
result = {
|
||||
"found": True,
|
||||
"scene_id": item.get("id"),
|
||||
"datetime": item.get("properties", {}).get("datetime"),
|
||||
"cloud_cover": item.get("properties", {}).get("eo:cloud_cover"),
|
||||
"thumbnail_url": thumb_url,
|
||||
"fullres_url": fullres_url,
|
||||
"bbox": list(item.get("bbox", [])) if item.get("bbox") else None,
|
||||
"platform": item.get("properties", {}).get("platform", "Sentinel-2"),
|
||||
}
|
||||
_sentinel_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
@@ -20,11 +20,7 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SHODAN_BASE = "https://api.shodan.io"
|
||||
# Round 7a: per-install attribution. Shodan already has the operator API
|
||||
# key for billing, but the UA still identifies the install.
|
||||
def _shodan_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("shodan")
|
||||
_USER_AGENT = "ShadowBroker/0.9.79 local Shodan connector"
|
||||
_REQUEST_TIMEOUT = 15
|
||||
_MIN_INTERVAL_SECONDS = 1.05 # Shodan docs say API plans are rate limited to ~1 req/sec.
|
||||
_DEFAULT_SEARCH_PAGES = 1
|
||||
@@ -183,7 +179,7 @@ def _request(path: str, *, params: dict[str, Any], cache: TTLCache[str, dict[str
|
||||
f"{_SHODAN_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _shodan_user_agent(), "Accept": "application/json"},
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
@@ -58,7 +58,7 @@ SLO_REGISTRY: Dict[str, SLO] = {
|
||||
"uap_sightings": SLO(
|
||||
max_age_s=26 * _HOUR,
|
||||
min_rows=50,
|
||||
description="NUFORC rolling 60-day window (weekly refresh)",
|
||||
description="NUFORC rolling 60-day window (daily refresh)",
|
||||
),
|
||||
"wastewater": SLO(
|
||||
max_age_s=30 * _HOUR,
|
||||
|
||||
@@ -19,13 +19,6 @@ from pathlib import Path
|
||||
import requests
|
||||
from sgp4.api import Satrec, WGS72, jday
|
||||
|
||||
|
||||
|
||||
def _tinygs_user_agent(purpose: str) -> str:
|
||||
"""Round 7a: per-install handle for CelesTrak / TinyGS attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent(f"tinygs-{purpose}")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -120,7 +113,7 @@ def _fetch_celestrak_tles() -> list[dict]:
|
||||
params={"GROUP": group, "FORMAT": "json"},
|
||||
timeout=20,
|
||||
headers={
|
||||
"User-Agent": _tinygs_user_agent("celestrak"),
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (CelesTrak fair-use)",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
@@ -266,7 +259,7 @@ def _fetch_tinygs_telemetry() -> None:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _tinygs_user_agent("tinygs"),
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -173,94 +173,6 @@ def _verify_tor_bundle(archive_path: Path, bundle_url: str) -> tuple[bool, str]:
|
||||
return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)"
|
||||
|
||||
|
||||
def _extract_tor_bundle_safely(archive_path: Path, install_dir: Path) -> bool:
|
||||
"""Extract a Tor Expert Bundle tar.gz safely.
|
||||
|
||||
Issue #251: the previous extractor checked tarinfo.name against path
|
||||
traversal but never inspected tarinfo.linkname for symlink/hardlink
|
||||
members. Python 3.11's tarfile honors symlinks during extractall(),
|
||||
so a malicious archive could ship a member like::
|
||||
|
||||
name = "innocent.txt" # passes the path check
|
||||
type = SYMTYPE
|
||||
linkname = "C:\\Windows\\System32\\config\\system"
|
||||
|
||||
and extractall() would then create that symlink. Subsequent reads
|
||||
of innocent.txt deference to a sensitive system file; subsequent
|
||||
writes corrupt one. Tor bundles never legitimately contain symlinks
|
||||
or hardlinks, so we refuse all link members categorically rather
|
||||
than trying to validate linkname targets (which has its own pitfalls
|
||||
around relative path resolution).
|
||||
|
||||
Also refuses non-regular-non-directory members (devices, FIFOs,
|
||||
character/block special files) for completeness — none of those
|
||||
belong in a Tor Expert Bundle and accepting them is a category of
|
||||
bug we don't need to debug later.
|
||||
|
||||
Returns True on success, False on rejection (and logs the reason).
|
||||
The caller is responsible for cleaning up the archive file.
|
||||
"""
|
||||
import tarfile
|
||||
|
||||
install_resolved = install_dir.resolve()
|
||||
|
||||
try:
|
||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||
for member in tar.getmembers():
|
||||
# Reject anything that isn't a regular file or directory.
|
||||
# Symlinks (SYMTYPE) and hardlinks (LNKTYPE) are the
|
||||
# path-traversal vectors; the others (CHRTYPE, BLKTYPE,
|
||||
# FIFOTYPE, CONTTYPE) have no legitimate use in a Tor
|
||||
# Expert Bundle.
|
||||
if member.issym() or member.islnk():
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: link member %s -> %s "
|
||||
"(symlinks/hardlinks are not allowed in Tor bundles; "
|
||||
"this archive is malformed or hostile)",
|
||||
member.name,
|
||||
member.linkname,
|
||||
)
|
||||
return False
|
||||
if not (member.isfile() or member.isdir()):
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: unexpected member type "
|
||||
"for %s (only regular files and directories are allowed)",
|
||||
member.name,
|
||||
)
|
||||
return False
|
||||
|
||||
# Path traversal check (preserves the original guard).
|
||||
try:
|
||||
member_path = (install_dir / member.name).resolve()
|
||||
except OSError as exc:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: cannot resolve member "
|
||||
"path %s: %s",
|
||||
member.name,
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
try:
|
||||
member_path.relative_to(install_resolved)
|
||||
except ValueError:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: path traversal on %s "
|
||||
"(resolves to %s, outside install dir %s)",
|
||||
member.name,
|
||||
member_path,
|
||||
install_resolved,
|
||||
)
|
||||
return False
|
||||
|
||||
# All members validated — extract.
|
||||
tar.extractall(path=str(install_dir))
|
||||
except tarfile.TarError as exc:
|
||||
logger.error("Tor bundle extraction failed: malformed tar (%s)", exc)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _auto_install_tor() -> str | None:
|
||||
"""Install or download Tor when it is safe to do so."""
|
||||
if os.name != "nt":
|
||||
@@ -291,9 +203,14 @@ def _auto_install_tor() -> str | None:
|
||||
logger.info("Download complete, extracting...")
|
||||
import tarfile
|
||||
|
||||
if not _extract_tor_bundle_safely(archive_path, TOR_INSTALL_DIR):
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||
for member in tar.getmembers():
|
||||
member_path = (TOR_INSTALL_DIR / member.name).resolve()
|
||||
if not str(member_path).startswith(str(TOR_INSTALL_DIR.resolve())):
|
||||
logger.error("Tar path traversal blocked: %s", member.name)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
tar.extractall(path=str(TOR_INSTALL_DIR))
|
||||
|
||||
archive_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@@ -24,9 +24,7 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_FINNHUB_BASE = "https://finnhub.io/api/v1"
|
||||
def _finnhub_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("finnhub")
|
||||
_USER_AGENT = "ShadowBroker/0.9.79 Finnhub connector"
|
||||
_REQUEST_TIMEOUT = 12
|
||||
_MIN_INTERVAL_SECONDS = 0.35 # Stay well under 60 calls/min
|
||||
|
||||
@@ -91,7 +89,7 @@ def _request(path: str, params: dict[str, Any] | None = None) -> Any:
|
||||
f"{_FINNHUB_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _finnhub_user_agent(), "Accept": "application/json"},
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
+14
-232
@@ -6,11 +6,9 @@ Public API:
|
||||
schedule_restart(project_root) (spawn detached start script, then exit)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
@@ -31,19 +29,6 @@ DOCKER_UPDATE_COMMANDS = (
|
||||
"docker compose pull && docker compose up -d"
|
||||
)
|
||||
|
||||
# Issue #231: baked-in release digests. Loaded lazily, used as a fallback
|
||||
# verification source when the release's SHA256SUMS.txt asset can't be
|
||||
# fetched (e.g. transient network failure during update).
|
||||
_RELEASE_DIGESTS_FILE = (
|
||||
Path(__file__).resolve().parent.parent / "data" / "release_digests.json"
|
||||
)
|
||||
# Pattern for the maintainer's signed source-archive release asset. This
|
||||
# is the file we prefer over the auto-generated ``zipball_url`` because
|
||||
# the maintainer's build process publishes it with a matching entry in
|
||||
# SHA256SUMS.txt — the zipball does not have a signed digest.
|
||||
_SOURCE_ASSET_PATTERN = re.compile(r"^ShadowBroker_v\d", re.IGNORECASE)
|
||||
_SHA256SUMS_ASSET_NAME = "SHA256SUMS.txt"
|
||||
|
||||
|
||||
def _is_docker() -> bool:
|
||||
"""Detect if we're running inside a Docker container."""
|
||||
@@ -55,6 +40,7 @@ def _is_docker() -> bool:
|
||||
except (FileNotFoundError, PermissionError):
|
||||
pass
|
||||
return os.environ.get("container") == "docker"
|
||||
_EXPECTED_SHA256 = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
||||
_ALLOWED_UPDATE_HOSTS = {
|
||||
"api.github.com",
|
||||
"codeload.github.com",
|
||||
@@ -133,16 +119,7 @@ def _validate_update_url(url: str, *, allow_release_page: bool = False) -> str:
|
||||
# ---------------------------------------------------------------------------
|
||||
def _download_release(temp_dir: str) -> tuple:
|
||||
"""Fetch latest release info and download the source zip archive.
|
||||
|
||||
Issue #231: prefer the maintainer's signed release asset (matching
|
||||
``ShadowBroker_v*.zip``) over the auto-generated ``zipball_url``,
|
||||
because the maintainer's release process publishes a matching entry
|
||||
in SHA256SUMS.txt for the named asset but NOT for the zipball.
|
||||
|
||||
Returns (zip_path, version_tag, download_url, release_url, asset_name,
|
||||
sha256sums_url) — the last two are empty strings when the release
|
||||
doesn't publish a signed asset, falling back to the legacy zipball
|
||||
path.
|
||||
Returns (zip_path, version_tag, download_url, release_url).
|
||||
"""
|
||||
logger.info("Fetching latest release info from GitHub...")
|
||||
_validate_update_url(GITHUB_RELEASES_URL)
|
||||
@@ -154,42 +131,9 @@ def _download_release(temp_dir: str) -> tuple:
|
||||
tag = release.get("tag_name", "unknown")
|
||||
release_url = str(release.get("html_url") or GITHUB_RELEASES_PAGE_URL).strip()
|
||||
_validate_update_url(release_url, allow_release_page=True)
|
||||
|
||||
# Prefer the maintainer-signed release asset. Fall back to the
|
||||
# auto-generated zipball if the release doesn't publish one.
|
||||
assets = release.get("assets") or []
|
||||
asset_name = ""
|
||||
asset_url = ""
|
||||
sha256sums_url = ""
|
||||
for a in assets:
|
||||
name = str(a.get("name") or "").strip()
|
||||
download = str(a.get("browser_download_url") or "").strip()
|
||||
if not name or not download:
|
||||
continue
|
||||
if _SOURCE_ASSET_PATTERN.match(name) and name.lower().endswith(".zip"):
|
||||
asset_name = name
|
||||
asset_url = download
|
||||
elif name == _SHA256SUMS_ASSET_NAME:
|
||||
sha256sums_url = download
|
||||
|
||||
if asset_url:
|
||||
zip_url = asset_url
|
||||
logger.info(
|
||||
"Using signed release asset %s (sha256sums=%s)",
|
||||
asset_name,
|
||||
"yes" if sha256sums_url else "no",
|
||||
)
|
||||
else:
|
||||
zip_url = str(release.get("zipball_url") or "").strip()
|
||||
if not zip_url:
|
||||
raise RuntimeError("Latest release is missing a source archive URL")
|
||||
logger.warning(
|
||||
"Release does not publish a signed ShadowBroker_v*.zip asset — "
|
||||
"falling back to auto-generated zipball_url. Integrity will be "
|
||||
"verified against the baked-in release_digests.json (if present) "
|
||||
"or HTTPS-only otherwise."
|
||||
)
|
||||
|
||||
zip_url = str(release.get("zipball_url") or "").strip()
|
||||
if not zip_url:
|
||||
raise RuntimeError("Latest release is missing a source archive URL")
|
||||
_validate_update_url(zip_url)
|
||||
|
||||
logger.info(f"Downloading {zip_url} ...")
|
||||
@@ -206,174 +150,19 @@ def _download_release(temp_dir: str) -> tuple:
|
||||
|
||||
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
|
||||
logger.info(f"Downloaded {size_mb:.1f} MB — ZIP validated OK")
|
||||
return zip_path, tag, zip_url, release_url, asset_name, sha256sums_url
|
||||
return zip_path, tag, zip_url, release_url
|
||||
|
||||
|
||||
def _compute_sha256(zip_path: str) -> str:
|
||||
"""Return the hex SHA-256 of the file at ``zip_path`` (lowercase)."""
|
||||
def _validate_zip_hash(zip_path: str) -> None:
|
||||
if not _EXPECTED_SHA256:
|
||||
return
|
||||
h = hashlib.sha256()
|
||||
with open(zip_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 128), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest().lower()
|
||||
|
||||
|
||||
def _load_baked_in_release_digests() -> dict:
|
||||
"""Return the ``release_digests.json`` mapping, or an empty dict.
|
||||
|
||||
Schema (issue #231):
|
||||
{
|
||||
"<release_tag>": {
|
||||
"<asset_filename>": "<sha256_hex>",
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
"""
|
||||
try:
|
||||
raw = _RELEASE_DIGESTS_FILE.read_text(encoding="utf-8")
|
||||
parsed = json.loads(raw)
|
||||
except (OSError, ValueError) as exc:
|
||||
logger.debug("Release digest file unreadable: %s", exc)
|
||||
return {}
|
||||
if not isinstance(parsed, dict):
|
||||
return {}
|
||||
cleaned: dict[str, dict[str, str]] = {}
|
||||
for k, v in parsed.items():
|
||||
if not isinstance(k, str) or k.startswith("_"):
|
||||
continue
|
||||
if isinstance(v, dict):
|
||||
entries = {
|
||||
fname: digest.strip().lower()
|
||||
for fname, digest in v.items()
|
||||
if isinstance(fname, str) and isinstance(digest, str)
|
||||
}
|
||||
if entries:
|
||||
cleaned[k] = entries
|
||||
return cleaned
|
||||
|
||||
|
||||
def _fetch_sha256sums(sha256sums_url: str) -> dict[str, str]:
|
||||
"""Download a SHA256SUMS.txt and return {filename: digest_hex_lower}.
|
||||
|
||||
Standard ``sha256sum`` format: ``<digest> <filename>`` per line. The
|
||||
leading ``*`` binary-mode marker (e.g. ``<digest> *<filename>``) is
|
||||
handled.
|
||||
"""
|
||||
try:
|
||||
_validate_update_url(sha256sums_url)
|
||||
except RuntimeError as exc:
|
||||
logger.warning("SHA256SUMS URL rejected: %s", exc)
|
||||
return {}
|
||||
try:
|
||||
resp = requests.get(sha256sums_url, timeout=15)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
logger.info("SHA256SUMS fetch failed: %s", exc)
|
||||
return {}
|
||||
out: dict[str, str] = {}
|
||||
for line in resp.text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
# Tolerant split: handle both `<digest> <name>` and `<digest> *<name>`.
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
digest, fname = parts
|
||||
fname = fname.lstrip("*").strip()
|
||||
digest = digest.strip().lower()
|
||||
if len(digest) == 64 and all(c in "0123456789abcdef" for c in digest) and fname:
|
||||
out[fname] = digest
|
||||
return out
|
||||
|
||||
|
||||
def _validate_zip_hash(
|
||||
zip_path: str,
|
||||
*,
|
||||
asset_name: str = "",
|
||||
sha256sums_url: str = "",
|
||||
release_tag: str = "",
|
||||
) -> str:
|
||||
"""Verify the downloaded archive against trusted digest sources.
|
||||
|
||||
Issue #231: previously this returned silently when ``MESH_UPDATE_SHA256``
|
||||
was unset, which made the auto-updater a supply-chain RCE vector on any
|
||||
compromise of the GitHub release pipeline. The chain now is:
|
||||
|
||||
1. ``MESH_UPDATE_SHA256`` env var (operator override — preserved for
|
||||
power-users who want to pin an exact digest manually)
|
||||
2. ``SHA256SUMS.txt`` release asset (primary — the maintainer's
|
||||
release process already publishes this)
|
||||
3. Baked-in ``backend/data/release_digests.json`` (second line of
|
||||
defense for releases that lack the SHA256SUMS asset, or when the
|
||||
asset can't be fetched at update time)
|
||||
4. HTTPS-only fallback with a loud warning (preserves the auto-update
|
||||
flow during transient outages — but never silently)
|
||||
|
||||
A mismatch from a source that DID respond is fatal: the update is
|
||||
refused and the existing install keeps running. Only the "no source
|
||||
reachable at all" case falls back to HTTPS-only.
|
||||
|
||||
Returns a short human-readable description of which source verified
|
||||
the archive (used in the update-success message).
|
||||
"""
|
||||
actual = _compute_sha256(zip_path)
|
||||
|
||||
# Source 1: explicit operator override.
|
||||
override = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
||||
if override:
|
||||
if actual == override:
|
||||
return f"verified via MESH_UPDATE_SHA256 ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs MESH_UPDATE_SHA256: archive={actual[:16]}..., "
|
||||
f"expected={override[:16]}..."
|
||||
)
|
||||
|
||||
# Source 2: SHA256SUMS.txt asset from the release.
|
||||
sums_map: dict[str, str] = {}
|
||||
if sha256sums_url and asset_name:
|
||||
sums_map = _fetch_sha256sums(sha256sums_url)
|
||||
|
||||
sums_expected = sums_map.get(asset_name) if asset_name else None
|
||||
if sums_expected:
|
||||
if actual == sums_expected:
|
||||
return f"verified via release SHA256SUMS.txt ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs release SHA256SUMS.txt: "
|
||||
f"archive={actual[:16]}..., expected={sums_expected[:16]}..."
|
||||
)
|
||||
|
||||
# Source 3: baked-in digest list.
|
||||
baked = _load_baked_in_release_digests()
|
||||
baked_expected = ""
|
||||
if release_tag and asset_name:
|
||||
baked_expected = baked.get(release_tag, {}).get(asset_name, "")
|
||||
if baked_expected:
|
||||
if actual == baked_expected:
|
||||
return f"verified via baked-in digest list ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs baked-in digest list: "
|
||||
f"archive={actual[:16]}..., expected={baked_expected[:16]}..."
|
||||
)
|
||||
|
||||
# Source 4: HTTPS-only fallback. We keep onboarding/auto-update working
|
||||
# during transient outages (no SHA256SUMS reachable AND no baked-in
|
||||
# entry for this release), but surface the degraded posture loudly so
|
||||
# the operator can see it in logs and the maintainer can populate the
|
||||
# digest list on the next release bump.
|
||||
logger.warning(
|
||||
"Update integrity check fell back to HTTPS-only trust "
|
||||
"(no SHA256SUMS.txt response and no baked-in digest for "
|
||||
"release=%s asset=%s). The archive SHA-256 is %s. Once the "
|
||||
"release ships a SHA256SUMS.txt asset OR backend/data/"
|
||||
"release_digests.json is updated with this release, the secure "
|
||||
"path will activate automatically.",
|
||||
release_tag or "unknown",
|
||||
asset_name or "unknown",
|
||||
actual,
|
||||
)
|
||||
return f"https-only (no digest source reachable, archive={actual[:16]}...)"
|
||||
digest = h.hexdigest().lower()
|
||||
if digest != _EXPECTED_SHA256:
|
||||
raise RuntimeError("Update SHA-256 mismatch")
|
||||
|
||||
|
||||
def _is_source_checkout(project_root: str) -> bool:
|
||||
@@ -545,7 +334,7 @@ def perform_update(project_root: str) -> dict:
|
||||
temp_dir = tempfile.mkdtemp(prefix="sb_update_")
|
||||
manual_url = GITHUB_RELEASES_PAGE_URL
|
||||
try:
|
||||
zip_path, version, url, release_url, asset_name, sha256sums_url = _download_release(temp_dir)
|
||||
zip_path, version, url, release_url = _download_release(temp_dir)
|
||||
manual_url = release_url or manual_url
|
||||
|
||||
if in_docker:
|
||||
@@ -577,13 +366,7 @@ def perform_update(project_root: str) -> dict:
|
||||
),
|
||||
}
|
||||
|
||||
verification_note = _validate_zip_hash(
|
||||
zip_path,
|
||||
asset_name=asset_name,
|
||||
sha256sums_url=sha256sums_url,
|
||||
release_tag=version,
|
||||
)
|
||||
logger.info("Update archive %s", verification_note)
|
||||
_validate_zip_hash(zip_path)
|
||||
backup_path = _backup_current(project_root, temp_dir)
|
||||
copied = _extract_and_copy(zip_path, project_root, temp_dir)
|
||||
|
||||
@@ -595,7 +378,6 @@ def perform_update(project_root: str) -> dict:
|
||||
"manual_url": manual_url,
|
||||
"release_url": release_url,
|
||||
"download_url": url,
|
||||
"integrity": verification_note,
|
||||
"message": f"Updated to {version} — {copied} files replaced. Restarting...",
|
||||
}
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,677 +0,0 @@
|
||||
{
|
||||
"_meta": {
|
||||
"issue": "#239",
|
||||
"note": "Snapshot of currently-tolerated duplicate route registrations. The test in test_no_new_duplicate_routes.py fails if any NEW (method, path) duplicate appears outside this list. Removing entries (by actually deduping) is fine and the test stays green. New entries here require explicit, reviewed updates.",
|
||||
"generated_with": "python -c 'see tests/test_no_new_duplicate_routes.py'"
|
||||
},
|
||||
"duplicates": {
|
||||
"DELETE /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/contact/{peer_id}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/invite/handles/{handle}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/cctv/media": [
|
||||
"main",
|
||||
"routers.cctv"
|
||||
],
|
||||
"GET /api/debug-latest": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/geocode/reverse": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/geocode/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/health": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/live-data": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/fast": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/slow": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/mesh/channels": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/prekey-bundle": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/pubkey": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/gate/list": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/event/{event_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/events": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/locator": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/merkle": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages/wait": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/node/{node_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/log": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/metrics": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/oracle/consensus": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets/more": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/predictions": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/profile": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/search": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/stakes/{message_id}": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/all": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/batch": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/rns/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/signals": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/trust/vouches": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/oracle/region-intel": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/radio/nearest": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/nearest-list": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/audio": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/calls/{sys_name}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/systems": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/top": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/refresh": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/region-dossier": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/route/{callsign}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/sentinel2/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/settings/api-keys": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/api-keys/meta": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole-status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/sigint/nearest-sdr": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/thermal/verify": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/tools/shodan/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/tools/uw/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/wormhole/dm/contacts": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite/handles": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/personas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/health": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PATCH /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/ais/feed": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/layers": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/mesh/dm/block": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/register": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/send": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/gate/create": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-pull": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/{gate_id}/message": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/revoke": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/rotate": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/ingest": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/oracle/predict": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve-stakes": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/stake": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/report": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/send": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/trust/vouch": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/vote": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/sentinel/tile": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/sentinel/token": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/settings/news-feeds/reset": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/sigint/transmit": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"POST /api/system/update": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/tools/shodan/count": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/host": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/congress": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/darkpool": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/flow": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/viewport": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/wormhole/connect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/disconnect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/build-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-tokens": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/invite/import": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/open-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/prekey/register": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/register-key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/reset": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sender-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/enter": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/grant": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/sign-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/messages/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/activate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/clear": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/create": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/retire": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/proof": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/state/export": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/identity/bootstrap": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/join": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/restart": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign-raw": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/envelope_policy": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/legacy_envelope_fallback": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/wormhole/dm/contact": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -87,28 +87,11 @@ def _run_gate_release_once(monkeypatch, *, transport_tier="private_strong"):
|
||||
def _patch_for_successful_post(monkeypatch, module):
|
||||
"""Apply standard monkeypatches so a gate_message post succeeds."""
|
||||
import main
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_setup_gate_outbox(monkeypatch)
|
||||
monkeypatch.setattr(main, "_verify_gate_message_signed_write", lambda **kw: (True, "ok", kw.get("reply_to", "")))
|
||||
monkeypatch.setattr(main, "_resolve_envelope_policy", lambda _gate_id: "envelope_disabled")
|
||||
|
||||
def _fake_private_gate_append(**kwargs):
|
||||
return {
|
||||
"event_id": f"ledger-ev-{kwargs.get('sequence', 0)}",
|
||||
"event_type": "gate_message",
|
||||
"node_id": kwargs["node_id"],
|
||||
"payload": dict(kwargs["payload"]),
|
||||
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||
"sequence": kwargs["sequence"],
|
||||
"signature": kwargs["signature"],
|
||||
"public_key": kwargs["public_key"],
|
||||
"public_key_algo": kwargs["public_key_algo"],
|
||||
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", _fake_private_gate_append)
|
||||
|
||||
from services.mesh.mesh_reputation import gate_manager, reputation_ledger
|
||||
|
||||
monkeypatch.setattr(gate_manager, "can_enter", lambda *a, **kw: (True, "ok"))
|
||||
@@ -272,30 +255,19 @@ def test_gate_post_preserves_gate_envelope_in_store(monkeypatch):
|
||||
|
||||
|
||||
def test_gate_post_advances_sequence(monkeypatch):
|
||||
"""append_private_gate_message must receive the gate sequence."""
|
||||
"""validate_and_set_sequence must be called to advance the counter."""
|
||||
import main
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_patch_for_successful_post(monkeypatch, main)
|
||||
|
||||
append_calls = []
|
||||
seq_calls = []
|
||||
|
||||
def track_private_append(**kwargs):
|
||||
append_calls.append(kwargs)
|
||||
return {
|
||||
"event_id": "ev-seq",
|
||||
"event_type": "gate_message",
|
||||
"node_id": kwargs["node_id"],
|
||||
"payload": dict(kwargs["payload"]),
|
||||
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||
"sequence": kwargs["sequence"],
|
||||
"signature": kwargs["signature"],
|
||||
"public_key": kwargs["public_key"],
|
||||
"public_key_algo": kwargs["public_key_algo"],
|
||||
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||
}
|
||||
def track_seq(node_id, seq, *, domain=""):
|
||||
seq_calls.append((node_id, seq, domain))
|
||||
return (True, "ok")
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", track_private_append)
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "validate_and_set_sequence", track_seq)
|
||||
monkeypatch.setattr(
|
||||
mesh_hashchain.gate_store,
|
||||
"append",
|
||||
@@ -308,9 +280,8 @@ def test_gate_post_advances_sequence(monkeypatch):
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["queued"] is True
|
||||
assert len(append_calls) == 1
|
||||
assert append_calls[0]["node_id"] == "!sb_test1234567890"
|
||||
assert append_calls[0]["sequence"] == 42
|
||||
assert len(seq_calls) == 1
|
||||
assert seq_calls[0] == ("!sb_test1234567890", 42, "gate_message")
|
||||
|
||||
|
||||
def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
||||
@@ -319,11 +290,11 @@ def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_patch_for_successful_post(monkeypatch, main)
|
||||
|
||||
def reject_private_append(**_kwargs):
|
||||
raise ValueError("Replay detected: sequence 1 <= last 1")
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", reject_private_append)
|
||||
monkeypatch.setattr(
|
||||
mesh_hashchain.infonet,
|
||||
"validate_and_set_sequence",
|
||||
lambda node_id, seq: (False, "Replay detected: sequence 1 <= last 1"),
|
||||
)
|
||||
|
||||
gate_id = "infonet"
|
||||
body = _build_gate_message_body(gate_id, sequence=1)
|
||||
|
||||
@@ -1,261 +0,0 @@
|
||||
"""Infonet sync respects upstream HTTP 429 + applies exponential backoff.
|
||||
|
||||
Background
|
||||
----------
|
||||
Before this fix, ``finish_sync`` used a constant 60s ``failure_backoff_s``
|
||||
regardless of how many consecutive failures preceded. When an upstream
|
||||
peer (e.g. the seed onion) returned HTTP 429 "Too Many Requests", the
|
||||
sync worker would:
|
||||
|
||||
1. Receive 429
|
||||
2. Stringify the status into a generic ``ValueError``
|
||||
3. Call ``finish_sync(error=str(exc))`` -- losing the status code
|
||||
4. Schedule next attempt for ``now + 60s``
|
||||
5. Retry. Upstream's rate-limit bucket is still full. 429 again. Loop.
|
||||
|
||||
Net effect: a node with one transient 429 would hammer the upstream
|
||||
every 60s forever, keeping the bucket full and never recovering. This
|
||||
is what kept the user's Infonet node from reaching the seed peer.
|
||||
|
||||
What the fix does
|
||||
-----------------
|
||||
* New typed exception ``PeerSyncRateLimited`` carries the parsed
|
||||
``Retry-After`` value out of the HTTP layer.
|
||||
* ``_sync_from_peer`` returns ``(ok, error, forked, retry_after_s)``
|
||||
instead of the old 3-tuple.
|
||||
* ``finish_sync`` honors ``retry_after_s`` AND applies exponential
|
||||
backoff: ``delay = max(retry_after_s, base * 2^failures, cap=1800)``.
|
||||
* ``parse_retry_after_header`` handles both RFC 7231 forms (delay
|
||||
seconds, and HTTP-date).
|
||||
|
||||
These tests pin every part of the new contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_retry_after_header — both RFC 7231 forms + edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParseRetryAfter:
|
||||
def test_integer_seconds(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("120") == 120
|
||||
assert parse_retry_after_header(" 30 ") == 30
|
||||
assert parse_retry_after_header("0") == 0
|
||||
|
||||
def test_http_date(self):
|
||||
"""RFC 7231 §7.1.3 explicitly allows ``Retry-After: <HTTP-date>``.
|
||||
We compute seconds-from-now so callers can use the same field
|
||||
regardless of which form the upstream chose."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
# Pin "now" so the test is deterministic.
|
||||
now = 1_700_000_000.0 # 2023-11-14T22:13:20Z
|
||||
# 300 seconds in the future, formatted per RFC 7231.
|
||||
future = "Tue, 14 Nov 2023 22:18:20 GMT"
|
||||
result = parse_retry_after_header(future, now=now)
|
||||
assert 295 <= result <= 305, f"expected ~300s, got {result}"
|
||||
|
||||
def test_http_date_in_past_returns_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
now = 1_700_000_000.0
|
||||
past = "Mon, 13 Nov 2023 00:00:00 GMT"
|
||||
assert parse_retry_after_header(past, now=now) == 0
|
||||
|
||||
def test_empty_and_whitespace_return_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("") == 0
|
||||
assert parse_retry_after_header(" ") == 0
|
||||
|
||||
def test_malformed_returns_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("not a header") == 0
|
||||
assert parse_retry_after_header("xyz") == 0
|
||||
|
||||
def test_clamps_to_one_hour(self):
|
||||
"""A hostile peer can't silence us for a week by claiming a
|
||||
24h Retry-After. We cap at 1 hour."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("86400") == 3600 # 24h -> 1h
|
||||
assert parse_retry_after_header("99999999") == 3600
|
||||
|
||||
def test_negative_returns_zero(self):
|
||||
"""RFC 7231 says ``Retry-After`` is a non-negative integer;
|
||||
leading-minus parses as a non-digit and yields 0 here."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("-10") == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _failure_backoff_seconds — exponential growth, retry-after override, cap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFailureBackoffSeconds:
|
||||
def test_exponential_growth(self):
|
||||
"""First failure uses the base (preserves pre-fix behavior
|
||||
for one-off blips). Each subsequent failure doubles the wait,
|
||||
capped at 1800s. With base=60: 60, 120, 240, 480, 960, 1800,
|
||||
1800, 1800."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
delays = [
|
||||
_failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=n,
|
||||
retry_after_s=0,
|
||||
cap_s=1800,
|
||||
)
|
||||
for n in range(1, 9)
|
||||
]
|
||||
assert delays == [60, 120, 240, 480, 960, 1800, 1800, 1800], delays
|
||||
|
||||
def test_retry_after_wins_when_larger(self):
|
||||
"""If the upstream says ``Retry-After: 600`` but exponential
|
||||
would only ask for 60s (one failure), we honor the upstream."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=1,
|
||||
retry_after_s=600,
|
||||
cap_s=1800,
|
||||
) == 600
|
||||
|
||||
def test_exponential_wins_when_larger(self):
|
||||
"""If exponential is asking for 1800s (6+ failures) but
|
||||
upstream only sent ``Retry-After: 30``, we honor exponential.
|
||||
The 30s was the upstream's view at one moment; our exponential
|
||||
reflects sustained failure."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
result = _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=7,
|
||||
retry_after_s=30,
|
||||
cap_s=1800,
|
||||
)
|
||||
assert result == 1800
|
||||
|
||||
def test_cap_zero_disables_exponential(self):
|
||||
"""Operators who want pre-fix behavior can set cap=0; only the
|
||||
upstream's Retry-After is respected. (Pre-fix had no
|
||||
exponential growth at all.)"""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=10,
|
||||
retry_after_s=120,
|
||||
cap_s=0,
|
||||
) == 120
|
||||
|
||||
def test_zero_inputs_return_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=0,
|
||||
consecutive_failures=0,
|
||||
retry_after_s=0,
|
||||
) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# finish_sync end-to-end — failure path with retry-after + growing counter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFinishSyncBackoff:
|
||||
def _state(self, **overrides):
|
||||
from services.mesh.mesh_infonet_sync_support import SyncWorkerState
|
||||
base = {
|
||||
"last_sync_started_at": 0,
|
||||
"last_sync_finished_at": 0,
|
||||
"last_sync_ok_at": 0,
|
||||
"next_sync_due_at": 0,
|
||||
"last_peer_url": "",
|
||||
"last_error": "",
|
||||
"last_outcome": "idle",
|
||||
"current_head": "",
|
||||
"fork_detected": False,
|
||||
"consecutive_failures": 0,
|
||||
}
|
||||
base.update(overrides)
|
||||
return SyncWorkerState(**base)
|
||||
|
||||
def test_first_failure_uses_base_unchanged(self):
|
||||
"""One failure means consecutive_failures becomes 1, which uses
|
||||
``base * 2^0 = base``. Preserves the pre-fix behavior so a
|
||||
single transient upstream blip doesn't suddenly take 2 minutes
|
||||
to retry — that change has to be earned by sustained failure."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(),
|
||||
ok=False,
|
||||
error="some upstream blip",
|
||||
now=1000.0,
|
||||
failure_backoff_s=60,
|
||||
)
|
||||
assert result.consecutive_failures == 1
|
||||
assert result.next_sync_due_at == 1000 + 60
|
||||
assert result.last_error == "some upstream blip"
|
||||
assert result.last_outcome == "error"
|
||||
|
||||
def test_consecutive_failures_grow_the_delay(self):
|
||||
"""After 5 prior failures already in state, the next failure
|
||||
sets consecutive=6 and uses the cap (1800s = 60 * 2^5)."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=5),
|
||||
ok=False,
|
||||
error="HTTP 429",
|
||||
now=2000.0,
|
||||
failure_backoff_s=60,
|
||||
)
|
||||
assert result.consecutive_failures == 6
|
||||
assert result.next_sync_due_at == 2000 + 1800
|
||||
|
||||
def test_retry_after_honored_at_low_failure_count(self):
|
||||
"""When the upstream says ``Retry-After: 900`` but we'd
|
||||
otherwise only wait 240s (4 failures = 60*2^3), wait 900s."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=3),
|
||||
ok=False,
|
||||
error="HTTP 429",
|
||||
now=5000.0,
|
||||
failure_backoff_s=60,
|
||||
retry_after_s=900,
|
||||
)
|
||||
assert result.consecutive_failures == 4
|
||||
assert result.next_sync_due_at == 5000 + 900
|
||||
|
||||
def test_success_resets_consecutive_failures(self):
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=4),
|
||||
ok=True,
|
||||
now=7000.0,
|
||||
interval_s=300,
|
||||
)
|
||||
assert result.consecutive_failures == 0
|
||||
assert result.next_sync_due_at == 7000 + 300
|
||||
assert result.last_outcome == "ok"
|
||||
|
||||
def test_last_error_carries_status_string(self):
|
||||
"""The pre-fix path stringified exceptions into ``last_error``
|
||||
but the string was often empty (HTTP layer raised ValueError
|
||||
with no message). We now require callers to pass something
|
||||
meaningful — see the typed exception path in main.py."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(),
|
||||
ok=False,
|
||||
error="HTTP 429 from peer (retry_after=120s): rate-limited",
|
||||
now=1000.0,
|
||||
failure_backoff_s=60,
|
||||
retry_after_s=120,
|
||||
)
|
||||
assert "HTTP 429" in result.last_error
|
||||
assert "retry_after=120s" in result.last_error
|
||||
@@ -117,11 +117,3 @@ def test_finish_solo_sync_marks_first_node_ready_without_peer_failure():
|
||||
assert finished.next_sync_due_at == 500
|
||||
assert should_run_sync(finished, now=499) is False
|
||||
assert should_run_sync(finished, now=500) is True
|
||||
|
||||
|
||||
def test_should_run_sync_recovers_stale_running_state():
|
||||
fresh = SyncWorkerState(last_sync_started_at=100, last_outcome="running")
|
||||
stale = SyncWorkerState(last_sync_started_at=100, last_outcome="running")
|
||||
|
||||
assert should_run_sync(fresh, now=399) is False
|
||||
assert should_run_sync(stale, now=400) is True
|
||||
|
||||
@@ -8,53 +8,6 @@ from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
|
||||
def test_onion_peer_requests_use_arti_socks_proxy(monkeypatch):
|
||||
import main
|
||||
from services import wormhole_supervisor
|
||||
|
||||
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
main,
|
||||
"get_settings",
|
||||
lambda: SimpleNamespace(MESH_ARTI_ENABLED=True, MESH_ARTI_SOCKS_PORT=19050),
|
||||
)
|
||||
monkeypatch.setattr(wormhole_supervisor, "_check_arti_ready", lambda: True)
|
||||
|
||||
proxies = main._infonet_peer_requests_proxies("http://exampleabcd.onion:8000")
|
||||
|
||||
assert proxies == {
|
||||
"http": "socks5h://127.0.0.1:19050",
|
||||
"https": "socks5h://127.0.0.1:19050",
|
||||
}
|
||||
|
||||
|
||||
def test_private_peer_requests_reject_clearnet(monkeypatch):
|
||||
import main
|
||||
|
||||
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||
|
||||
try:
|
||||
main._infonet_peer_requests_proxies("https://seed.example")
|
||||
except RuntimeError as exc:
|
||||
assert "private Infonet requires onion/RNS transport" in str(exc)
|
||||
else:
|
||||
raise AssertionError("clearnet peer was allowed while private transport is required")
|
||||
|
||||
|
||||
def test_local_peer_url_prefers_configured_public_peer_url(monkeypatch):
|
||||
import main
|
||||
|
||||
monkeypatch.setattr(
|
||||
main,
|
||||
"get_settings",
|
||||
lambda: SimpleNamespace(
|
||||
MESH_PUBLIC_PEER_URL="HTTP://LOCALPEEREXAMPLE.onion:8000/",
|
||||
),
|
||||
)
|
||||
|
||||
assert main._local_infonet_peer_url() == "http://localpeerexample.onion:8000"
|
||||
|
||||
|
||||
def _write_signed_manifest(path, *, private_key):
|
||||
from services.mesh.mesh_bootstrap_manifest import BOOTSTRAP_MANIFEST_VERSION
|
||||
from services.mesh.mesh_crypto import canonical_json
|
||||
@@ -189,134 +142,6 @@ def test_refresh_node_peer_store_suppresses_clearnet_seed_by_default(tmp_path, m
|
||||
assert store.records_for_bucket("sync") == []
|
||||
|
||||
|
||||
def test_refresh_node_peer_store_prunes_persisted_clearnet_records_in_private_mode(tmp_path, monkeypatch):
|
||||
import main
|
||||
from services.config import get_settings
|
||||
from services.mesh import mesh_peer_store as peer_store_mod
|
||||
|
||||
peer_store_path = tmp_path / "peer_store.json"
|
||||
monkeypatch.setattr(peer_store_mod, "DEFAULT_PEER_STORE_PATH", peer_store_path)
|
||||
store = peer_store_mod.PeerStore(peer_store_path)
|
||||
store.upsert(
|
||||
peer_store_mod.make_bootstrap_peer_record(
|
||||
peer_url="https://node.shadowbroker.info",
|
||||
transport="clearnet",
|
||||
role="seed",
|
||||
signer_id="shadowbroker-default",
|
||||
now=1_749_999_900,
|
||||
)
|
||||
)
|
||||
store.upsert(
|
||||
peer_store_mod.make_sync_peer_record(
|
||||
peer_url="https://node.shadowbroker.info",
|
||||
transport="clearnet",
|
||||
role="seed",
|
||||
source="bundle",
|
||||
now=1_749_999_900,
|
||||
)
|
||||
)
|
||||
store.upsert(
|
||||
peer_store_mod.make_push_peer_record(
|
||||
peer_url="https://node.shadowbroker.info",
|
||||
transport="clearnet",
|
||||
role="relay",
|
||||
now=1_749_999_900,
|
||||
)
|
||||
)
|
||||
store.save()
|
||||
|
||||
onion_seed = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
monkeypatch.setenv("MESH_RELAY_PEERS", "")
|
||||
monkeypatch.setenv("MESH_BOOTSTRAP_SEED_PEERS", onion_seed)
|
||||
monkeypatch.setenv("MESH_DEFAULT_SYNC_PEERS", "")
|
||||
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||
monkeypatch.setenv("MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY", "")
|
||||
get_settings.cache_clear()
|
||||
|
||||
try:
|
||||
snapshot = main._refresh_node_peer_store(now=1_750_000_000)
|
||||
store = peer_store_mod.PeerStore(peer_store_path)
|
||||
store.load()
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
assert snapshot["private_transport_required"] is True
|
||||
assert snapshot["pruned_clearnet_peer_count"] == 3
|
||||
assert [record.peer_url for record in store.records()] == [onion_seed, onion_seed]
|
||||
assert {record.bucket for record in store.records()} == {"bootstrap", "sync"}
|
||||
assert all(record.transport == "onion" for record in store.records())
|
||||
|
||||
|
||||
def test_infonet_peer_url_filter_excludes_clearnet_in_private_mode(monkeypatch):
|
||||
import main
|
||||
from services.config import get_settings
|
||||
|
||||
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||
get_settings.cache_clear()
|
||||
|
||||
try:
|
||||
assert main._filter_infonet_peer_urls(
|
||||
[
|
||||
"https://node.shadowbroker.info",
|
||||
"http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000",
|
||||
]
|
||||
) == ["http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"]
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_public_sync_cycle_backs_off_on_429_retry_after(tmp_path, monkeypatch):
|
||||
import time
|
||||
|
||||
import main
|
||||
from services.config import get_settings
|
||||
from services.mesh import mesh_peer_store as peer_store_mod
|
||||
|
||||
peer_store_path = tmp_path / "peer_store.json"
|
||||
monkeypatch.setattr(peer_store_mod, "DEFAULT_PEER_STORE_PATH", peer_store_path)
|
||||
onion_seed = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
store = peer_store_mod.PeerStore(peer_store_path)
|
||||
store.upsert(
|
||||
peer_store_mod.make_sync_peer_record(
|
||||
peer_url=onion_seed,
|
||||
transport="onion",
|
||||
role="seed",
|
||||
source="bundle",
|
||||
now=1_750_000_000,
|
||||
)
|
||||
)
|
||||
store.save()
|
||||
|
||||
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||
monkeypatch.setenv("MESH_SYNC_FAILURE_BACKOFF_S", "60")
|
||||
monkeypatch.setenv("MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S", "15")
|
||||
get_settings.cache_clear()
|
||||
monkeypatch.setattr(main, "_participant_node_enabled", lambda: True)
|
||||
monkeypatch.setattr(main, "_ensure_infonet_private_transport_ready", lambda reason="": True)
|
||||
monkeypatch.setattr(
|
||||
main,
|
||||
"_sync_from_peer",
|
||||
lambda peer_url: (_ for _ in ()).throw(
|
||||
main.PeerSyncHTTPError(429, "rate limited", retry_after_s=180)
|
||||
),
|
||||
)
|
||||
main.set_sync_state(main.SyncWorkerState())
|
||||
|
||||
try:
|
||||
before = int(time.time())
|
||||
state = main._run_public_sync_cycle()
|
||||
store = peer_store_mod.PeerStore(peer_store_path)
|
||||
store.load()
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
main.set_sync_state(main.SyncWorkerState())
|
||||
|
||||
record = store.records_for_bucket("sync")[0]
|
||||
assert state.last_error == "HTTP 429: rate limited"
|
||||
assert state.next_sync_due_at >= before + 180
|
||||
assert record.cooldown_until >= before + 180
|
||||
|
||||
|
||||
def test_verify_peer_push_hmac_requires_allowlisted_peer(monkeypatch):
|
||||
import hashlib
|
||||
import hmac
|
||||
@@ -400,29 +225,3 @@ def test_public_sync_cycle_allows_first_node_without_peers(tmp_path, monkeypatch
|
||||
assert result.last_error == ""
|
||||
assert result.last_peer_url == ""
|
||||
assert result.consecutive_failures == 0
|
||||
|
||||
|
||||
def test_headless_mesh_node_runtime_is_explicit(monkeypatch):
|
||||
import main
|
||||
|
||||
monkeypatch.setattr(main, "_MESH_ONLY", True)
|
||||
monkeypatch.setattr(main, "_HEADLESS_MESH_NODE_RUNTIME", False)
|
||||
assert main._infonet_node_runtime_requested() is False
|
||||
|
||||
monkeypatch.setattr(main, "_HEADLESS_MESH_NODE_RUNTIME", True)
|
||||
assert main._infonet_node_runtime_requested() is True
|
||||
|
||||
|
||||
def test_meshnode_scripts_enable_private_hashchain_runtime():
|
||||
from pathlib import Path
|
||||
|
||||
root = Path(__file__).resolve().parents[3]
|
||||
bat = (root / "meshnode.bat").read_text(encoding="utf-8")
|
||||
sh = (root / "meshnode.sh").read_text(encoding="utf-8")
|
||||
|
||||
for script in (bat, sh):
|
||||
assert "SHADOWBROKER_MESH_NODE_RUNTIME=true" in script
|
||||
assert "MESH_INFONET_ALLOW_CLEARNET_SYNC=false" in script
|
||||
assert "MESH_ARTI_ENABLED=true" in script
|
||||
assert "MESH_DM_HASHCHAIN_SPOOL_LIMIT=2" in script
|
||||
assert "gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000" in script
|
||||
|
||||
@@ -1,213 +0,0 @@
|
||||
import base64
|
||||
import time
|
||||
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||
|
||||
from services.config import get_settings
|
||||
from services.mesh import mesh_crypto, mesh_dm_relay, mesh_hashchain, mesh_protocol, mesh_secure_storage
|
||||
|
||||
|
||||
def _keypair():
|
||||
private_key = ed25519.Ed25519PrivateKey.generate()
|
||||
public_raw = private_key.public_key().public_bytes(
|
||||
encoding=serialization.Encoding.Raw,
|
||||
format=serialization.PublicFormat.Raw,
|
||||
)
|
||||
public_key = base64.b64encode(public_raw).decode("utf-8")
|
||||
node_id = mesh_crypto.derive_node_id(public_key)
|
||||
return private_key, public_key, node_id
|
||||
|
||||
|
||||
def _payload(recipient_id: str = "recipient-a", msg_id: str = "dm-1") -> dict:
|
||||
return mesh_protocol.normalize_payload(
|
||||
"dm_message",
|
||||
{
|
||||
"recipient_id": recipient_id,
|
||||
"delivery_class": "request",
|
||||
"recipient_token": "",
|
||||
"ciphertext": base64.b64encode(f"cipher-{msg_id}".encode("utf-8")).decode("ascii"),
|
||||
"msg_id": msg_id,
|
||||
"timestamp": int(time.time()),
|
||||
"format": "mls1",
|
||||
"transport_lock": "private_strong",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _signature(private_key, node_id: str, sequence: int, payload: dict) -> str:
|
||||
signature_payload = mesh_crypto.build_signature_payload(
|
||||
event_type="dm_message",
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
return private_key.sign(signature_payload.encode("utf-8")).hex()
|
||||
|
||||
|
||||
def _fresh_infonet(tmp_path, monkeypatch) -> mesh_hashchain.Infonet:
|
||||
monkeypatch.setattr(mesh_hashchain, "DATA_DIR", tmp_path)
|
||||
monkeypatch.setattr(mesh_hashchain, "CHAIN_FILE", tmp_path / "infonet.json")
|
||||
monkeypatch.setattr(mesh_hashchain, "WAL_FILE", tmp_path / "infonet.wal")
|
||||
return mesh_hashchain.Infonet()
|
||||
|
||||
|
||||
def _fresh_relay(tmp_path, monkeypatch) -> mesh_dm_relay.DMRelay:
|
||||
monkeypatch.setattr(mesh_dm_relay, "DATA_DIR", tmp_path)
|
||||
monkeypatch.setattr(mesh_dm_relay, "RELAY_FILE", tmp_path / "dm_relay.json")
|
||||
monkeypatch.setattr(mesh_secure_storage, "DATA_DIR", tmp_path)
|
||||
monkeypatch.setattr(mesh_secure_storage, "MASTER_KEY_FILE", tmp_path / "wormhole_secure_store.key")
|
||||
get_settings.cache_clear()
|
||||
return mesh_dm_relay.DMRelay()
|
||||
|
||||
|
||||
def test_private_dm_hashchain_spools_two_ciphertexts_per_recipient_from_distinct_senders(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
senders = [_keypair(), _keypair()]
|
||||
|
||||
for idx, (private_key, public_key, node_id) in enumerate(senders, start=1):
|
||||
payload = _payload(msg_id=f"dm-{idx}")
|
||||
event = inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
signature=_signature(private_key, node_id, 1, payload),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=float(payload["timestamp"]),
|
||||
)
|
||||
assert event["event_type"] == "dm_message"
|
||||
|
||||
private_key, public_key, node_id = _keypair()
|
||||
third = _payload(msg_id="dm-3")
|
||||
try:
|
||||
inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=third,
|
||||
signature=_signature(private_key, node_id, 1, third),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=float(third["timestamp"]),
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "spool full" in str(exc)
|
||||
else:
|
||||
raise AssertionError("third DM spool event was accepted")
|
||||
|
||||
for _private_key, _public_key, sender_node_id in senders:
|
||||
assert inf.sequence_domains[f"{sender_node_id}|dm_message"] == 1
|
||||
assert inf.validate_chain(verify_signatures=True)[0] is True
|
||||
|
||||
|
||||
def test_private_dm_hashchain_limits_one_active_spool_per_sender_recipient_pair(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
|
||||
first = _payload(msg_id="dm-1")
|
||||
inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=first,
|
||||
signature=_signature(private_key, node_id, 1, first),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=float(first["timestamp"]),
|
||||
)
|
||||
|
||||
second = _payload(msg_id="dm-2")
|
||||
try:
|
||||
inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=second,
|
||||
signature=_signature(private_key, node_id, 2, second),
|
||||
sequence=2,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=float(second["timestamp"]),
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "sender spool full" in str(exc)
|
||||
else:
|
||||
raise AssertionError("second DM from same sender to same recipient was accepted")
|
||||
|
||||
|
||||
def test_private_dm_hashchain_rejects_plaintext(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _payload()
|
||||
payload["message"] = "plaintext"
|
||||
|
||||
try:
|
||||
inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
signature=_signature(private_key, node_id, 1, _payload()),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "plaintext" in str(exc)
|
||||
else:
|
||||
raise AssertionError("private DM append accepted plaintext")
|
||||
|
||||
|
||||
def test_private_dm_hashchain_rejects_non_sealed_ciphertext_shape(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _payload()
|
||||
payload["ciphertext"] = "not sealed plaintext"
|
||||
|
||||
try:
|
||||
inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
signature=_signature(private_key, node_id, 1, payload),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "sealed bytes" in str(exc)
|
||||
else:
|
||||
raise AssertionError("private DM append accepted non-base64 ciphertext")
|
||||
|
||||
|
||||
def test_hydrate_dm_relay_from_chain_delivers_to_poll_claim(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path / "chain", monkeypatch)
|
||||
relay = _fresh_relay(tmp_path / "relay", monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", inf)
|
||||
monkeypatch.setattr(mesh_dm_relay, "dm_relay", relay)
|
||||
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _payload(recipient_id="recipient-a", msg_id="dm-chain-1")
|
||||
event = inf.append_private_dm_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
signature=_signature(private_key, node_id, 1, payload),
|
||||
sequence=1,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=float(payload["timestamp"]),
|
||||
)
|
||||
|
||||
from main import _hydrate_dm_relay_from_chain
|
||||
|
||||
assert _hydrate_dm_relay_from_chain([event]) == 1
|
||||
messages, more = relay.collect_claims(
|
||||
"recipient-a",
|
||||
[{"type": "requests", "token": "recipient-request-token"}],
|
||||
limit=8,
|
||||
)
|
||||
|
||||
assert more is False
|
||||
assert [message["msg_id"] for message in messages] == ["dm-chain-1"]
|
||||
assert messages[0]["ciphertext"] == payload["ciphertext"]
|
||||
@@ -1,269 +0,0 @@
|
||||
import base64
|
||||
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||
|
||||
from services.mesh import mesh_crypto, mesh_hashchain, mesh_protocol
|
||||
|
||||
|
||||
def _keypair():
|
||||
private_key = ed25519.Ed25519PrivateKey.generate()
|
||||
public_raw = private_key.public_key().public_bytes(
|
||||
encoding=serialization.Encoding.Raw,
|
||||
format=serialization.PublicFormat.Raw,
|
||||
)
|
||||
public_key = base64.b64encode(public_raw).decode("utf-8")
|
||||
node_id = mesh_crypto.derive_node_id(public_key)
|
||||
return private_key, public_key, node_id
|
||||
|
||||
|
||||
def _sign(private_key, *, event_type: str, node_id: str, sequence: int, payload: dict) -> str:
|
||||
signature_payload = mesh_crypto.build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
return private_key.sign(signature_payload.encode("utf-8")).hex()
|
||||
|
||||
|
||||
def _message_payload(text: str) -> dict:
|
||||
return mesh_protocol.normalize_payload(
|
||||
"message",
|
||||
{
|
||||
"message": text,
|
||||
"destination": "broadcast",
|
||||
"channel": "LongFast",
|
||||
"priority": "normal",
|
||||
"ephemeral": False,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _gate_payload(gate_id: str = "ops-gate", *, epoch: int = 2, plaintext: bool = False) -> dict:
|
||||
payload = {
|
||||
"gate": gate_id,
|
||||
"ciphertext": base64.b64encode(b"encrypted-gate-ciphertext").decode("ascii"),
|
||||
"nonce": base64.b64encode(b"nonce-value-1234").decode("ascii"),
|
||||
"sender_ref": "sender-ref-1",
|
||||
"format": "mls1",
|
||||
"transport_lock": "private_strong",
|
||||
}
|
||||
if epoch > 0:
|
||||
payload["epoch"] = epoch
|
||||
if plaintext:
|
||||
payload["message"] = "this must never land on the chain"
|
||||
return mesh_protocol.normalize_payload("gate_message", payload) if not plaintext else payload
|
||||
|
||||
|
||||
def _gate_event(
|
||||
private_key,
|
||||
public_key: str,
|
||||
node_id: str,
|
||||
*,
|
||||
sequence: int,
|
||||
prev_hash: str,
|
||||
payload: dict,
|
||||
signature_payload: dict | None = None,
|
||||
) -> dict:
|
||||
signature = _sign(
|
||||
private_key,
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload or payload,
|
||||
)
|
||||
return mesh_hashchain.ChainEvent(
|
||||
prev_hash=prev_hash,
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=1234.0 + sequence,
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
network_id=mesh_protocol.NETWORK_ID,
|
||||
).to_dict()
|
||||
|
||||
|
||||
def _fresh_infonet(tmp_path, monkeypatch) -> mesh_hashchain.Infonet:
|
||||
monkeypatch.setattr(mesh_hashchain, "DATA_DIR", tmp_path)
|
||||
monkeypatch.setattr(mesh_hashchain, "CHAIN_FILE", tmp_path / "infonet.json")
|
||||
monkeypatch.setattr(mesh_hashchain, "WAL_FILE", tmp_path / "infonet.wal")
|
||||
return mesh_hashchain.Infonet()
|
||||
|
||||
|
||||
def test_private_gate_fork_uses_gate_sequence_domain_and_signature_variants(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
|
||||
public_payload = _message_payload("public prefix")
|
||||
public_event = inf.append(
|
||||
event_type="message",
|
||||
node_id=node_id,
|
||||
payload=public_payload,
|
||||
sequence=1,
|
||||
signature=_sign(
|
||||
private_key,
|
||||
event_type="message",
|
||||
node_id=node_id,
|
||||
sequence=1,
|
||||
payload=public_payload,
|
||||
),
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
|
||||
gate_payload = _gate_payload(epoch=3)
|
||||
signature_payload = dict(gate_payload)
|
||||
signature_payload.pop("epoch", None)
|
||||
gate_event = _gate_event(
|
||||
private_key,
|
||||
public_key,
|
||||
node_id,
|
||||
sequence=1,
|
||||
prev_hash=public_event["event_id"],
|
||||
payload=gate_payload,
|
||||
signature_payload=signature_payload,
|
||||
)
|
||||
|
||||
ok, reason = inf.apply_fork([gate_event], gate_event["event_id"], proof_count=2, quorum=2)
|
||||
|
||||
assert ok is True, reason
|
||||
assert inf.events[-1]["event_type"] == "gate_message"
|
||||
assert inf.node_sequences[node_id] == 1
|
||||
assert inf.sequence_domains[f"{node_id}|gate_message"] == 1
|
||||
assert inf.validate_chain(verify_signatures=True)[0] is True
|
||||
|
||||
|
||||
def test_private_gate_fork_rejects_plaintext_payload(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
|
||||
public_payload = _message_payload("public prefix")
|
||||
public_event = inf.append(
|
||||
event_type="message",
|
||||
node_id=node_id,
|
||||
payload=public_payload,
|
||||
sequence=1,
|
||||
signature=_sign(
|
||||
private_key,
|
||||
event_type="message",
|
||||
node_id=node_id,
|
||||
sequence=1,
|
||||
payload=public_payload,
|
||||
),
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
|
||||
plaintext_payload = _gate_payload(plaintext=True)
|
||||
gate_event = _gate_event(
|
||||
private_key,
|
||||
public_key,
|
||||
node_id,
|
||||
sequence=1,
|
||||
prev_hash=public_event["event_id"],
|
||||
payload=plaintext_payload,
|
||||
)
|
||||
|
||||
ok, reason = inf.apply_fork([gate_event], gate_event["event_id"], proof_count=2, quorum=2)
|
||||
|
||||
assert ok is False
|
||||
assert "normalized" in reason or "plaintext" in reason
|
||||
assert len(inf.events) == 1
|
||||
assert "gate_message" not in inf.get_info()["event_types"]
|
||||
|
||||
|
||||
def test_append_private_gate_message_rejects_plaintext_before_normalizing(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _gate_payload()
|
||||
payload["message"] = "plaintext should not be silently dropped"
|
||||
|
||||
try:
|
||||
inf.append_private_gate_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
sequence=1,
|
||||
signature=_sign(
|
||||
private_key,
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
sequence=1,
|
||||
payload=_gate_payload(),
|
||||
),
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "plaintext" in str(exc)
|
||||
else:
|
||||
raise AssertionError("private gate append accepted plaintext")
|
||||
|
||||
assert inf.events == []
|
||||
|
||||
|
||||
def test_append_private_gate_message_requires_private_strong_transport_lock(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _gate_payload()
|
||||
payload.pop("transport_lock", None)
|
||||
|
||||
try:
|
||||
inf.append_private_gate_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
sequence=1,
|
||||
signature=_sign(
|
||||
private_key,
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
sequence=1,
|
||||
payload=_gate_payload(),
|
||||
),
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "private_strong" in str(exc)
|
||||
else:
|
||||
raise AssertionError("private gate append accepted missing transport_lock")
|
||||
|
||||
assert inf.events == []
|
||||
|
||||
|
||||
def test_append_private_gate_message_rejects_non_sealed_ciphertext_shape(tmp_path, monkeypatch):
|
||||
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||
private_key, public_key, node_id = _keypair()
|
||||
payload = _gate_payload()
|
||||
payload["ciphertext"] = "not sealed plaintext"
|
||||
|
||||
try:
|
||||
inf.append_private_gate_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
sequence=1,
|
||||
signature=_sign(
|
||||
private_key,
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
sequence=1,
|
||||
payload=payload,
|
||||
),
|
||||
public_key=public_key,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert "sealed bytes" in str(exc)
|
||||
else:
|
||||
raise AssertionError("private gate append accepted non-base64 ciphertext")
|
||||
|
||||
assert inf.events == []
|
||||
@@ -1,12 +1,14 @@
|
||||
"""S14B private sync gate event policy.
|
||||
"""S14B Public Sync Gate Event Filter.
|
||||
|
||||
Private Infonet sync carries encrypted gate_message ledger events. If a node
|
||||
is configured to allow clearnet-compatible sync, those gate events are filtered
|
||||
out of the sync response.
|
||||
Tests:
|
||||
- GET /api/mesh/infonet/sync excludes gate_message when local infonet contains legacy gate_message plus public events
|
||||
- POST /api/mesh/infonet/sync excludes gate_message under the same condition
|
||||
- Both main app and router-served paths are covered
|
||||
- Non-gate public redactions still hold (vote gate label stripped, key_rotate identity stripped)
|
||||
- Do not overclaim that gate_message is removed from historical infonet storage or ingest
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
|
||||
from starlette.requests import Request
|
||||
@@ -15,6 +17,9 @@ import main
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _message_event() -> dict:
|
||||
return {
|
||||
"event_id": "msg-1",
|
||||
@@ -78,7 +83,6 @@ def _gate_message_event() -> dict:
|
||||
"nonce": "nonce-1",
|
||||
"sender_ref": "sender-ref-1",
|
||||
"format": "mls1",
|
||||
"transport_lock": "private_strong",
|
||||
},
|
||||
"timestamp": 103.0,
|
||||
"sequence": 4,
|
||||
@@ -89,31 +93,9 @@ def _gate_message_event() -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _dm_message_event() -> dict:
|
||||
return {
|
||||
"event_id": "dm-1",
|
||||
"event_type": "dm_message",
|
||||
"node_id": "!node-5",
|
||||
"payload": {
|
||||
"recipient_id": "recipient-a",
|
||||
"delivery_class": "request",
|
||||
"recipient_token": "",
|
||||
"ciphertext": base64.b64encode(b"sealed-dm-ciphertext").decode("ascii"),
|
||||
"msg_id": "dm-1",
|
||||
"timestamp": 104,
|
||||
"format": "mls1",
|
||||
"transport_lock": "private_strong",
|
||||
},
|
||||
"timestamp": 104.0,
|
||||
"sequence": 5,
|
||||
"signature": "sig",
|
||||
"public_key": "pub",
|
||||
"public_key_algo": "Ed25519",
|
||||
"protocol_version": "infonet/2",
|
||||
}
|
||||
|
||||
|
||||
class _FakeInfonet:
|
||||
"""Minimal fake infonet with a gate_message among public events."""
|
||||
|
||||
def __init__(self):
|
||||
self.head_hash = "head-1"
|
||||
self.events = [
|
||||
@@ -131,10 +113,12 @@ class _FakeInfonet:
|
||||
return int(getattr(limit, "default", 100) or 100)
|
||||
|
||||
def get_events_after(self, after_hash: str, limit=100):
|
||||
return [dict(e) for e in self.events[: self._limit_value(limit)]]
|
||||
resolved = self._limit_value(limit)
|
||||
return [dict(e) for e in self.events[:resolved]]
|
||||
|
||||
def get_events_after_locator(self, locator: list[str], limit=100):
|
||||
return self.head_hash, 0, [dict(e) for e in self.events[: self._limit_value(limit)]]
|
||||
resolved = self._limit_value(limit)
|
||||
return self.head_hash, 0, [dict(e) for e in self.events[:resolved]]
|
||||
|
||||
def get_merkle_proofs(self, start_index: int, count: int):
|
||||
return {"root": "merkle-root", "total": len(self.events), "start": start_index, "proofs": []}
|
||||
@@ -143,7 +127,7 @@ class _FakeInfonet:
|
||||
return "merkle-root"
|
||||
|
||||
|
||||
def _json_request(path: str, body: dict, *, client_host: str = "127.0.0.1", headers: dict[str, str] | None = None) -> Request:
|
||||
def _json_request(path: str, body: dict) -> Request:
|
||||
payload = json.dumps(body).encode("utf-8")
|
||||
sent = {"value": False}
|
||||
|
||||
@@ -153,14 +137,11 @@ def _json_request(path: str, body: dict, *, client_host: str = "127.0.0.1", head
|
||||
sent["value"] = True
|
||||
return {"type": "http.request", "body": payload, "more_body": False}
|
||||
|
||||
raw_headers = [(b"content-type", b"application/json")]
|
||||
for key, value in dict(headers or {}).items():
|
||||
raw_headers.append((key.lower().encode("ascii"), str(value).encode("ascii")))
|
||||
return Request(
|
||||
{
|
||||
"type": "http",
|
||||
"headers": raw_headers,
|
||||
"client": (client_host, 12345),
|
||||
"headers": [(b"content-type", b"application/json")],
|
||||
"client": ("test", 12345),
|
||||
"method": "POST",
|
||||
"path": path,
|
||||
},
|
||||
@@ -168,15 +149,20 @@ def _json_request(path: str, body: dict, *, client_host: str = "127.0.0.1", head
|
||||
)
|
||||
|
||||
|
||||
def _get_request(path: str, *, client_host: str = "127.0.0.1", headers: dict[str, str] | None = None) -> Request:
|
||||
def _get_request(path: str) -> Request:
|
||||
sent = {"value": False}
|
||||
|
||||
async def receive():
|
||||
if sent["value"]:
|
||||
return {"type": "http.request", "body": b"", "more_body": False}
|
||||
sent["value"] = True
|
||||
return {"type": "http.request", "body": b"", "more_body": False}
|
||||
|
||||
return Request(
|
||||
{
|
||||
"type": "http",
|
||||
"headers": [(key.lower().encode("ascii"), str(value).encode("ascii")) for key, value in dict(headers or {}).items()],
|
||||
"client": (client_host, 12345),
|
||||
"headers": [],
|
||||
"client": ("test", 12345),
|
||||
"method": "GET",
|
||||
"path": path,
|
||||
},
|
||||
@@ -184,166 +170,120 @@ def _get_request(path: str, *, client_host: str = "127.0.0.1", headers: dict[str
|
||||
)
|
||||
|
||||
|
||||
def _force_private_sync(monkeypatch):
|
||||
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||
monkeypatch.setattr(main, "_request_appears_private_infonet_transport", lambda request: True)
|
||||
# ── GET sync excludes gate_message (main app) ──────────────────────────
|
||||
|
||||
|
||||
def _force_private_policy_only(monkeypatch):
|
||||
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||
|
||||
|
||||
def _force_clearnet_sync(monkeypatch):
|
||||
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: False)
|
||||
|
||||
|
||||
def _event_types(events: list[dict]) -> list[str]:
|
||||
return [str(e.get("event_type", "")) for e in events]
|
||||
|
||||
|
||||
def test_private_sync_redacts_private_events_from_exposed_clearnet_request(monkeypatch):
|
||||
_force_private_policy_only(monkeypatch)
|
||||
request = _get_request("/api/mesh/infonet/sync", client_host="203.0.113.10")
|
||||
|
||||
events = main._infonet_sync_response_events(
|
||||
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||
request=request,
|
||||
)
|
||||
|
||||
assert _event_types(events) == ["message"]
|
||||
|
||||
|
||||
def test_private_sync_includes_private_events_for_loopback_request(monkeypatch):
|
||||
_force_private_policy_only(monkeypatch)
|
||||
request = _get_request("/api/mesh/infonet/sync", client_host="127.0.0.1")
|
||||
|
||||
events = main._infonet_sync_response_events(
|
||||
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||
request=request,
|
||||
)
|
||||
|
||||
assert _event_types(events) == ["message", "gate_message", "dm_message"]
|
||||
|
||||
|
||||
def test_private_sync_redacts_private_events_when_forwarded_for_is_clearnet(monkeypatch):
|
||||
_force_private_policy_only(monkeypatch)
|
||||
request = _get_request(
|
||||
"/api/mesh/infonet/sync",
|
||||
client_host="127.0.0.1",
|
||||
headers={"x-forwarded-for": "198.51.100.44"},
|
||||
)
|
||||
|
||||
events = main._infonet_sync_response_events(
|
||||
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||
request=request,
|
||||
)
|
||||
|
||||
assert _event_types(events) == ["message"]
|
||||
|
||||
|
||||
def test_get_sync_includes_gate_message_on_private_transport(client, monkeypatch):
|
||||
_force_private_sync(monkeypatch)
|
||||
def test_get_sync_excludes_gate_message(client, monkeypatch):
|
||||
"""GET /api/mesh/infonet/sync must not return gate_message events."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
data = client.get("/api/mesh/infonet/sync").json()
|
||||
|
||||
assert "gate_message" in _event_types(data["events"])
|
||||
assert data["count"] == 4
|
||||
resp = client.get("/api/mesh/infonet/sync")
|
||||
data = resp.json()
|
||||
event_types = [e["event_type"] for e in data["events"]]
|
||||
assert "gate_message" not in event_types
|
||||
assert "message" in event_types
|
||||
assert "vote" in event_types
|
||||
assert "key_rotate" in event_types
|
||||
|
||||
|
||||
def test_post_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||
_force_private_sync(monkeypatch)
|
||||
def test_get_sync_count_excludes_gate_message(client, monkeypatch):
|
||||
"""GET sync count field must reflect filtered events (gate_message excluded)."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
resp = client.get("/api/mesh/infonet/sync")
|
||||
data = resp.json()
|
||||
assert data["count"] == 3 # message, vote, key_rotate — not gate_message
|
||||
|
||||
|
||||
# ── POST sync excludes gate_message (main app) ─────────────────────────
|
||||
|
||||
|
||||
def test_post_sync_excludes_gate_message(monkeypatch):
|
||||
"""POST /api/mesh/infonet/sync must not return gate_message events."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
result = asyncio.run(
|
||||
main.infonet_sync_post(
|
||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||
)
|
||||
)
|
||||
|
||||
assert "gate_message" in _event_types(result["events"])
|
||||
assert result["count"] == 4
|
||||
event_types = [e["event_type"] for e in result["events"]]
|
||||
assert "gate_message" not in event_types
|
||||
assert "message" in event_types
|
||||
assert "vote" in event_types
|
||||
assert "key_rotate" in event_types
|
||||
|
||||
|
||||
def test_router_get_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||
def test_post_sync_count_excludes_gate_message(monkeypatch):
|
||||
"""POST sync count field must reflect filtered events."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
result = asyncio.run(
|
||||
main.infonet_sync_post(
|
||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||
)
|
||||
)
|
||||
assert result["count"] == 3
|
||||
|
||||
|
||||
# ── Router-served paths ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_router_get_sync_excludes_gate_message(monkeypatch):
|
||||
"""Router GET /api/mesh/infonet/sync must not return gate_message."""
|
||||
from routers.mesh_public import infonet_sync
|
||||
|
||||
_force_private_sync(monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
result = asyncio.run(infonet_sync(_get_request("/api/mesh/infonet/sync")))
|
||||
|
||||
assert "gate_message" in _event_types(result["events"])
|
||||
assert result["count"] == len(result["events"])
|
||||
event_types = [e["event_type"] for e in result["events"]]
|
||||
assert "gate_message" not in event_types
|
||||
assert "message" in event_types
|
||||
assert data_count_matches(result)
|
||||
|
||||
|
||||
def test_router_post_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||
def test_router_post_sync_excludes_gate_message(monkeypatch):
|
||||
"""Router POST /api/mesh/infonet/sync must not return gate_message."""
|
||||
from routers.mesh_public import infonet_sync_post
|
||||
|
||||
_force_private_sync(monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
result = asyncio.run(
|
||||
infonet_sync_post(
|
||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||
)
|
||||
)
|
||||
|
||||
assert "gate_message" in _event_types(result["events"])
|
||||
assert result["count"] == len(result["events"])
|
||||
event_types = [e["event_type"] for e in result["events"]]
|
||||
assert "gate_message" not in event_types
|
||||
assert "message" in event_types
|
||||
assert data_count_matches(result)
|
||||
|
||||
|
||||
def test_get_sync_excludes_gate_message_when_clearnet_sync_allowed(client, monkeypatch):
|
||||
_force_clearnet_sync(monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
data = client.get("/api/mesh/infonet/sync").json()
|
||||
|
||||
assert "gate_message" not in _event_types(data["events"])
|
||||
assert data["count"] == 3
|
||||
def data_count_matches(result: dict) -> bool:
|
||||
return result["count"] == len(result["events"])
|
||||
|
||||
|
||||
def test_post_sync_excludes_gate_message_when_clearnet_sync_allowed(monkeypatch):
|
||||
_force_clearnet_sync(monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
result = asyncio.run(
|
||||
main.infonet_sync_post(
|
||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||
)
|
||||
)
|
||||
|
||||
assert "gate_message" not in _event_types(result["events"])
|
||||
assert result["count"] == 3
|
||||
# ── Non-gate redactions still hold ─────────────────────────────────────
|
||||
|
||||
|
||||
def test_get_sync_still_redacts_vote_gate_label(client, monkeypatch):
|
||||
_force_private_sync(monkeypatch)
|
||||
"""Public sync must still strip gate label from vote payload."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
events = client.get("/api/mesh/infonet/sync").json()["events"]
|
||||
resp = client.get("/api/mesh/infonet/sync")
|
||||
events = resp.json()["events"]
|
||||
vote = next(e for e in events if e["event_type"] == "vote")
|
||||
|
||||
assert "gate" not in vote.get("payload", {})
|
||||
|
||||
|
||||
def test_get_sync_still_redacts_key_rotate_identity(client, monkeypatch):
|
||||
_force_private_sync(monkeypatch)
|
||||
"""Public sync must still strip old identity fields from key_rotate payload."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
events = client.get("/api/mesh/infonet/sync").json()["events"]
|
||||
resp = client.get("/api/mesh/infonet/sync")
|
||||
events = resp.json()["events"]
|
||||
rotate = next(e for e in events if e["event_type"] == "key_rotate")
|
||||
payload = rotate.get("payload", {})
|
||||
|
||||
assert "old_node_id" not in payload
|
||||
assert "old_public_key" not in payload
|
||||
assert "old_signature" not in payload
|
||||
|
||||
|
||||
def test_post_sync_still_redacts_vote_and_rotate(monkeypatch):
|
||||
_force_private_sync(monkeypatch)
|
||||
"""POST sync must still apply standard public redactions to non-gate events."""
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||
|
||||
result = asyncio.run(
|
||||
main.infonet_sync_post(
|
||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||
@@ -351,17 +291,24 @@ def test_post_sync_still_redacts_vote_and_rotate(monkeypatch):
|
||||
)
|
||||
vote = next(e for e in result["events"] if e["event_type"] == "vote")
|
||||
rotate = next(e for e in result["events"] if e["event_type"] == "key_rotate")
|
||||
|
||||
assert "gate" not in vote.get("payload", {})
|
||||
assert "old_node_id" not in rotate.get("payload", {})
|
||||
|
||||
|
||||
# ── No overclaim ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_gate_message_still_in_fake_infonet_storage():
|
||||
"""The filter does NOT remove gate_message from underlying storage.
|
||||
This test documents that the infonet still holds gate_message events;
|
||||
only the public sync response surface filters them out."""
|
||||
fake = _FakeInfonet()
|
||||
assert "gate_message" in _event_types(fake.events)
|
||||
all_types = [e["event_type"] for e in fake.events]
|
||||
assert "gate_message" in all_types
|
||||
|
||||
|
||||
def test_private_sync_with_only_gate_messages_returns_gate_events(client, monkeypatch):
|
||||
def test_sync_with_only_gate_messages_returns_empty(client, monkeypatch):
|
||||
"""If infonet contains only gate_message events, sync returns empty list."""
|
||||
class _GateOnlyInfonet:
|
||||
head_hash = "head-1"
|
||||
events = [_gate_message_event()]
|
||||
@@ -378,10 +325,8 @@ def test_private_sync_with_only_gate_messages_returns_gate_events(client, monkey
|
||||
def get_merkle_root(self):
|
||||
return "r"
|
||||
|
||||
_force_private_sync(monkeypatch)
|
||||
monkeypatch.setattr(mesh_hashchain, "infonet", _GateOnlyInfonet(), raising=False)
|
||||
|
||||
data = client.get("/api/mesh/infonet/sync").json()
|
||||
|
||||
assert _event_types(data["events"]) == ["gate_message"]
|
||||
assert data["count"] == 1
|
||||
resp = client.get("/api/mesh/infonet/sync")
|
||||
data = resp.json()
|
||||
assert data["events"] == []
|
||||
assert data["count"] == 0
|
||||
|
||||
@@ -66,20 +66,6 @@ def _make_gate_message_event(priv, pub_b64, node_id, sequence, prev_hash, gate_i
|
||||
return evt.to_dict()
|
||||
|
||||
|
||||
def _make_gate_payload(gate_id="test-gate") -> dict:
|
||||
return mesh_protocol.normalize_payload(
|
||||
"gate_message",
|
||||
{
|
||||
"gate": gate_id,
|
||||
"ciphertext": base64.b64encode(b"encrypted-data").decode(),
|
||||
"nonce": base64.b64encode(b"nonce-value-1234").decode(),
|
||||
"sender_ref": "sender-abc",
|
||||
"format": "mls1",
|
||||
"transport_lock": "private_strong",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fresh_env(tmp_path, monkeypatch):
|
||||
"""Set up isolated infonet + gate_store, return (infonet, gate_store)."""
|
||||
@@ -103,74 +89,6 @@ def fresh_env(tmp_path, monkeypatch):
|
||||
# ── Rejected gate_message must NOT hydrate gate_store ─────────────────────
|
||||
|
||||
|
||||
def test_append_private_gate_message_uses_hashchain_gate_sequence(fresh_env):
|
||||
"""Local gate posts become private hashchain events in a gate sequence domain."""
|
||||
inf, _gs = fresh_env
|
||||
priv, pub_b64, node_id = _make_keypair()
|
||||
sequence = 1
|
||||
payload = _make_gate_payload("test-gate")
|
||||
sig_payload = mesh_crypto.build_signature_payload(
|
||||
event_type="gate_message",
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
signature = priv.sign(sig_payload.encode("utf-8")).hex()
|
||||
|
||||
event = inf.append_private_gate_message(
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
signature=signature,
|
||||
sequence=sequence,
|
||||
public_key=pub_b64,
|
||||
public_key_algo="Ed25519",
|
||||
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||
timestamp=123.0,
|
||||
)
|
||||
|
||||
assert event["event_type"] == "gate_message"
|
||||
assert inf.head_hash == event["event_id"]
|
||||
assert inf.sequence_domains[f"{node_id}|gate_message"] == sequence
|
||||
assert inf.node_sequences.get(node_id, 0) == 0
|
||||
assert event["payload"]["transport_lock"] == "private_strong"
|
||||
|
||||
|
||||
def test_ingest_accepts_new_suffix_after_duplicate_prefix(fresh_env):
|
||||
"""Peer-push batches may include events the receiver already has."""
|
||||
inf, _gs = fresh_env
|
||||
priv, pub_b64, node_id = _make_keypair()
|
||||
evt1 = _make_gate_message_event(
|
||||
priv,
|
||||
pub_b64,
|
||||
node_id,
|
||||
sequence=1,
|
||||
prev_hash=mesh_hashchain.GENESIS_HASH,
|
||||
)
|
||||
assert inf.ingest_events([evt1])["accepted"] == 1
|
||||
evt2 = _make_gate_message_event(
|
||||
priv,
|
||||
pub_b64,
|
||||
node_id,
|
||||
sequence=2,
|
||||
prev_hash=evt1["event_id"],
|
||||
)
|
||||
assert inf.ingest_events([evt2])["accepted"] == 1
|
||||
evt3 = _make_gate_message_event(
|
||||
priv,
|
||||
pub_b64,
|
||||
node_id,
|
||||
sequence=3,
|
||||
prev_hash=evt2["event_id"],
|
||||
)
|
||||
|
||||
result = inf.ingest_events([evt1, evt2, evt3])
|
||||
|
||||
assert result["duplicates"] == 2
|
||||
assert result["accepted"] == 1
|
||||
assert result["rejected"] == []
|
||||
assert inf.head_hash == evt3["event_id"]
|
||||
|
||||
|
||||
def test_rejected_event_does_not_hydrate_gate_store(fresh_env):
|
||||
"""A gate_message rejected by ingest must not appear in gate_store."""
|
||||
inf, gs = fresh_env
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
"""AIS upstream-connectivity telemetry.
|
||||
|
||||
Background
|
||||
----------
|
||||
On 2026-05-23, stream.aisstream.io went fully offline (TCP timeouts on port
|
||||
443). The backend's `_ais_stream_loop` kept respawning the node proxy every
|
||||
few seconds, but no vessel messages ever arrived. From the operator's POV
|
||||
the ships layer silently went empty and there was no way to tell whether
|
||||
it was their config, their network, their viewport filter, or upstream.
|
||||
|
||||
The fix surfaces three signals from ``ais_proxy_status()``:
|
||||
|
||||
* ``connected`` — bool, true when we received a vessel message in the
|
||||
last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||
* ``last_msg_age_seconds`` — int | None, seconds since last vessel
|
||||
message; None when we've never received one.
|
||||
* ``proxy_spawn_count`` — int, how many times we've spawned the node
|
||||
proxy. Sustained increase without ``connected`` means upstream is dead.
|
||||
|
||||
Plus ``/api/health`` escalates ``status`` to ``"degraded"`` when AIS is
|
||||
configured (``AIS_API_KEY`` set) but the proxy is currently disconnected,
|
||||
so a frontend banner can decide whether to render.
|
||||
|
||||
These tests pin every signal.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
import pytest
|
||||
|
||||
|
||||
def _reset_ais_module():
|
||||
"""Reset module-level state so tests don't bleed into each other."""
|
||||
from services import ais_stream as ais
|
||||
with ais._vessels_lock:
|
||||
ais._proxy_status.clear()
|
||||
ais._last_msg_at = 0.0
|
||||
ais._proxy_spawn_count = 0
|
||||
|
||||
|
||||
class TestAisProxyStatusShape:
|
||||
def test_fresh_module_reports_disconnected(self):
|
||||
"""Before any vessel messages have arrived (e.g. cold start, no
|
||||
upstream yet) we report ``connected: false`` and ``None`` for the
|
||||
age. Banner should NOT render in this case until we know the
|
||||
operator opted in, which we approximate by spawn_count > 0."""
|
||||
_reset_ais_module()
|
||||
from services.ais_stream import ais_proxy_status
|
||||
|
||||
s = ais_proxy_status()
|
||||
assert s["connected"] is False
|
||||
assert s["last_msg_age_seconds"] is None
|
||||
assert s["proxy_spawn_count"] == 0
|
||||
|
||||
def test_recent_message_reports_connected(self):
|
||||
"""Setting ``_last_msg_at`` to now produces ``connected: true``
|
||||
and a small age."""
|
||||
_reset_ais_module()
|
||||
from services import ais_stream as ais
|
||||
|
||||
with ais._vessels_lock:
|
||||
ais._last_msg_at = time.time() - 5
|
||||
s = ais.ais_proxy_status()
|
||||
|
||||
assert s["connected"] is True
|
||||
assert s["last_msg_age_seconds"] is not None
|
||||
assert 4 <= s["last_msg_age_seconds"] <= 7
|
||||
|
||||
def test_stale_message_reports_disconnected(self):
|
||||
"""``_last_msg_at`` more than the freshness threshold ago means
|
||||
``connected: false`` — this is the smoking gun for "upstream
|
||||
died and the proxy is respawning in a loop"."""
|
||||
_reset_ais_module()
|
||||
from services import ais_stream as ais
|
||||
|
||||
with ais._vessels_lock:
|
||||
# 5 minutes ago — well past the 60s freshness window.
|
||||
ais._last_msg_at = time.time() - 300
|
||||
s = ais.ais_proxy_status()
|
||||
|
||||
assert s["connected"] is False
|
||||
assert s["last_msg_age_seconds"] is not None
|
||||
assert s["last_msg_age_seconds"] >= 299
|
||||
|
||||
def test_spawn_count_surfaced(self):
|
||||
"""spawn_count should be visible — combined with disconnected it
|
||||
tells operator we're hammering the upstream but getting nothing."""
|
||||
_reset_ais_module()
|
||||
from services import ais_stream as ais
|
||||
|
||||
with ais._vessels_lock:
|
||||
ais._proxy_spawn_count = 42
|
||||
s = ais.ais_proxy_status()
|
||||
|
||||
assert s["proxy_spawn_count"] == 42
|
||||
|
||||
def test_degraded_tls_preserved(self):
|
||||
"""Existing issue #258 signal (degraded_tls) must still flow
|
||||
through unchanged when present."""
|
||||
_reset_ais_module()
|
||||
from services import ais_stream as ais
|
||||
|
||||
with ais._vessels_lock:
|
||||
ais._proxy_status["degraded_tls"] = True
|
||||
s = ais.ais_proxy_status()
|
||||
|
||||
assert s.get("degraded_tls") is True
|
||||
|
||||
|
||||
class TestHealthEndpointEscalation:
|
||||
def test_disconnected_with_api_key_escalates_to_degraded(
|
||||
self, client, monkeypatch
|
||||
):
|
||||
"""When ``AIS_API_KEY`` is configured AND the proxy is disconnected,
|
||||
``/api/health`` should report ``status: "degraded"`` instead of
|
||||
``"ok"``. This is what the frontend banner reads."""
|
||||
_reset_ais_module()
|
||||
monkeypatch.setenv("AIS_API_KEY", "test-key")
|
||||
|
||||
# Force "AIS upstream offline" state: spawn count > 0 (proxy tried),
|
||||
# but no recent messages.
|
||||
from services import ais_stream as ais
|
||||
with ais._vessels_lock:
|
||||
ais._proxy_spawn_count = 5
|
||||
ais._last_msg_at = time.time() - 600 # 10 min ago
|
||||
|
||||
res = client.get("/api/health")
|
||||
assert res.status_code == 200
|
||||
body = res.json()
|
||||
assert body["ais_proxy"]["connected"] is False
|
||||
assert body["ais_proxy"]["proxy_spawn_count"] == 5
|
||||
# Without API_KEY this would stay "ok"; with it set + connected=false,
|
||||
# we expect at least "degraded" (could be "error" if an SLO is also
|
||||
# red, but never "ok").
|
||||
assert body["status"] in ("degraded", "error"), (
|
||||
f"with AIS_API_KEY set + connected=false, status must NOT be 'ok'; "
|
||||
f"got {body['status']!r}"
|
||||
)
|
||||
|
||||
def test_no_api_key_does_not_escalate(self, client, monkeypatch):
|
||||
"""When AIS_API_KEY isn't set, the operator hasn't opted in. Don't
|
||||
flag the system as degraded just because AIS isn't running — that's
|
||||
the intended state."""
|
||||
_reset_ais_module()
|
||||
monkeypatch.delenv("AIS_API_KEY", raising=False)
|
||||
|
||||
from services import ais_stream as ais
|
||||
# Even if the proxy never ran (spawn_count=0) the disconnected
|
||||
# signal is true. Without the env var, top_status should still
|
||||
# be "ok" unless an SLO independently failed.
|
||||
with ais._vessels_lock:
|
||||
ais._proxy_spawn_count = 0
|
||||
ais._last_msg_at = 0.0
|
||||
|
||||
res = client.get("/api/health")
|
||||
assert res.status_code == 200
|
||||
body = res.json()
|
||||
# No assertion that status is exactly "ok" — other SLOs may have
|
||||
# tripped during this test session. The contract is "AIS-being-off
|
||||
# alone doesn't escalate when no key is set."
|
||||
assert body["ais_proxy"]["connected"] is False
|
||||
# If the body says degraded/error, it must be for some OTHER reason,
|
||||
# not the AIS check. Practically: status==ok in a fresh test run.
|
||||
# (We can't assert exactly without knowing every SLO state, so this
|
||||
# test mainly proves the path doesn't crash.)
|
||||
@@ -1,432 +0,0 @@
|
||||
"""AISHub REST fallback for ship tracking.
|
||||
|
||||
Background
|
||||
----------
|
||||
When ``stream.aisstream.io`` (the WebSocket primary) is unreachable, the
|
||||
ships layer goes empty. ``aishub_fallback.py`` polls ``data.aishub.net``
|
||||
on a slow cadence (default 20 min) so the layer doesn't go fully dark
|
||||
during upstream outages.
|
||||
|
||||
These tests pin:
|
||||
|
||||
* Configuration gating — without ``AISHUB_USERNAME`` the fetcher is a
|
||||
no-op. The username's presence is the opt-in.
|
||||
* Connectivity gating — when the WebSocket primary is connected, the
|
||||
fallback skips so it doesn't stomp fresher live data.
|
||||
* Response parsing — successful, error, and empty AISHub payloads.
|
||||
* Record normalization — bad records (no MMSI, sentinel positions) are
|
||||
dropped without crashing.
|
||||
* Merge behavior — records land in the shared ``_vessels`` dict with
|
||||
``source: "aishub"`` and don't overwrite very-recent live updates.
|
||||
* Poll interval clamping — env var overrides honored within [1, 360].
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration / gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGating:
|
||||
def test_no_username_means_disabled(self, monkeypatch):
|
||||
from services.fetchers.aishub_fallback import (
|
||||
aishub_fallback_enabled,
|
||||
fetch_aishub_vessels,
|
||||
)
|
||||
monkeypatch.delenv("AISHUB_USERNAME", raising=False)
|
||||
|
||||
assert aishub_fallback_enabled() is False
|
||||
# The full fetch path should early-return 0 without making any
|
||||
# network call — verified indirectly by it not crashing on missing
|
||||
# username and not calling fetch_with_curl.
|
||||
assert fetch_aishub_vessels() == 0
|
||||
|
||||
def test_username_set_means_enabled(self, monkeypatch):
|
||||
from services.fetchers.aishub_fallback import aishub_fallback_enabled
|
||||
monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
|
||||
|
||||
assert aishub_fallback_enabled() is True
|
||||
|
||||
def test_skips_when_websocket_primary_is_connected(self, monkeypatch):
|
||||
"""If the AISStream WebSocket is currently delivering messages,
|
||||
the fallback should skip — fresher live data is already flowing."""
|
||||
from services.fetchers import aishub_fallback
|
||||
from services import ais_stream as ais
|
||||
|
||||
monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
|
||||
|
||||
# Force "connected" state in the ais_stream module.
|
||||
with ais._vessels_lock:
|
||||
ais._last_msg_at = time.time() - 5 # 5s ago — well inside 60s
|
||||
ais._proxy_spawn_count = 1
|
||||
# Sanity check the gate:
|
||||
assert ais.ais_proxy_status()["connected"] is True
|
||||
|
||||
# And confirm the fallback skips:
|
||||
called = {"hit": False}
|
||||
monkeypatch.setattr(
|
||||
aishub_fallback,
|
||||
"fetch_with_curl",
|
||||
lambda *a, **kw: (_ for _ in ()).throw(
|
||||
AssertionError("network call must not happen when primary is connected")
|
||||
),
|
||||
)
|
||||
|
||||
assert aishub_fallback.fetch_aishub_vessels() == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResponseParsing:
|
||||
def test_successful_response_parsed(self):
|
||||
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||
|
||||
payload = json.dumps([
|
||||
{"ERROR": False, "USERNAME": "test", "FORMAT": "1", "RECORDS": 2},
|
||||
[
|
||||
{"MMSI": 123, "LATITUDE": 40.0, "LONGITUDE": -73.0},
|
||||
{"MMSI": 456, "LATITUDE": 51.5, "LONGITUDE": -0.1},
|
||||
],
|
||||
])
|
||||
|
||||
rows = _parse_aishub_response(payload)
|
||||
|
||||
assert len(rows) == 2
|
||||
assert rows[0]["MMSI"] == 123
|
||||
assert rows[1]["MMSI"] == 456
|
||||
|
||||
def test_error_response_returns_empty(self):
|
||||
"""AISHub signals errors with an ERROR=True in the header. We log
|
||||
and treat as no data."""
|
||||
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||
|
||||
payload = json.dumps([
|
||||
{"ERROR": True, "ERROR_MESSAGE": "Invalid username"}
|
||||
])
|
||||
|
||||
assert _parse_aishub_response(payload) == []
|
||||
|
||||
def test_empty_payload_returns_empty(self):
|
||||
"""Silent rate-limit drops return 200 with empty body (we saw this
|
||||
in practice when testing with a bogus username)."""
|
||||
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||
assert _parse_aishub_response("") == []
|
||||
assert _parse_aishub_response(" ") == []
|
||||
|
||||
def test_malformed_json_returns_empty(self):
|
||||
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||
assert _parse_aishub_response("not json {") == []
|
||||
|
||||
def test_unexpected_shape_returns_empty(self):
|
||||
"""Defensive: shape doesn't match what AISHub documents."""
|
||||
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||
assert _parse_aishub_response(json.dumps({"unexpected": "object"})) == []
|
||||
assert _parse_aishub_response(json.dumps([])) == []
|
||||
# Header-only with no records list:
|
||||
assert _parse_aishub_response(json.dumps([
|
||||
{"ERROR": False, "RECORDS": 0}
|
||||
])) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Record normalization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormalize:
|
||||
def test_full_record_normalized(self):
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
|
||||
record = _normalize_record({
|
||||
"MMSI": 366998410,
|
||||
"LATITUDE": 37.8,
|
||||
"LONGITUDE": -122.4,
|
||||
"COG": 280,
|
||||
"SOG": 12.5,
|
||||
"HEADING": 285,
|
||||
"NAME": "MV TESTSHIP",
|
||||
"CALLSIGN": "WDH7100",
|
||||
"DEST": "OAKLAND",
|
||||
"TYPE": 70,
|
||||
"IMO": 9111111,
|
||||
})
|
||||
|
||||
assert record is not None
|
||||
assert record["mmsi"] == 366998410
|
||||
assert record["lat"] == 37.8
|
||||
assert record["lng"] == -122.4
|
||||
assert record["sog"] == 12.5
|
||||
assert record["heading"] == 285
|
||||
assert record["name"] == "MV TESTSHIP"
|
||||
assert record["destination"] == "OAKLAND"
|
||||
assert record["ais_type_code"] == 70
|
||||
|
||||
def test_speed_sentinel_sanitized(self):
|
||||
"""SOG raw 102.3+ kn = "speed not available" in the AIS spec.
|
||||
Sanitize to 0 so it doesn't look like a 200-knot ship."""
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
record = _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||
"SOG": 102.3, "COG": 0,
|
||||
})
|
||||
assert record["sog"] == 0.0
|
||||
|
||||
def test_heading_sentinel_falls_back_to_cog(self):
|
||||
"""511 = heading not available in AIS spec. Use COG instead."""
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
record = _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||
"HEADING": 511, "COG": 280,
|
||||
})
|
||||
assert record["heading"] == 280
|
||||
|
||||
def test_missing_mmsi_rejected(self):
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
assert _normalize_record({"LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
|
||||
assert _normalize_record({"MMSI": 0, "LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
|
||||
|
||||
def test_no_position_rejected(self):
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
assert _normalize_record({"MMSI": 1}) is None
|
||||
assert _normalize_record({"MMSI": 1, "LATITUDE": 0.5}) is None
|
||||
assert _normalize_record({"MMSI": 1, "LONGITUDE": 0.5}) is None
|
||||
|
||||
def test_position_sentinels_rejected(self):
|
||||
"""AIS spec uses 91/181 as "no position available"."""
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
assert _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 91.0, "LONGITUDE": 0.0
|
||||
}) is None
|
||||
assert _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 181.0
|
||||
}) is None
|
||||
|
||||
def test_out_of_range_rejected(self):
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
assert _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 95.0, "LONGITUDE": 0.0
|
||||
}) is None
|
||||
assert _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 200.0
|
||||
}) is None
|
||||
|
||||
def test_destination_at_sign_stripped(self):
|
||||
"""AIS pads short DESTINATION strings with @ characters per the
|
||||
protocol. Strip them so the UI doesn't render "OAKLAND@@@@@"."""
|
||||
from services.fetchers.aishub_fallback import _normalize_record
|
||||
record = _normalize_record({
|
||||
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||
"DEST": "OAKLAND@@@",
|
||||
})
|
||||
assert record["destination"] == "OAKLAND"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Poll interval clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPollInterval:
|
||||
def test_default_is_twenty_minutes(self, monkeypatch):
|
||||
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||
monkeypatch.delenv("AISHUB_POLL_INTERVAL_MINUTES", raising=False)
|
||||
assert aishub_poll_interval_minutes() == 20
|
||||
|
||||
def test_env_override_honored(self, monkeypatch):
|
||||
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "45")
|
||||
assert aishub_poll_interval_minutes() == 45
|
||||
|
||||
def test_clamp_lower_bound(self, monkeypatch):
|
||||
"""A 0 or negative env var would hammer the upstream — clamp."""
|
||||
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "0")
|
||||
assert aishub_poll_interval_minutes() == 1
|
||||
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "-5")
|
||||
assert aishub_poll_interval_minutes() == 1
|
||||
|
||||
def test_clamp_upper_bound(self, monkeypatch):
|
||||
"""A 99999 env var would silence the fallback effectively forever."""
|
||||
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "99999")
|
||||
assert aishub_poll_interval_minutes() == 360
|
||||
|
||||
def test_malformed_env_defaults(self, monkeypatch):
|
||||
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "twenty")
|
||||
assert aishub_poll_interval_minutes() == 20
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end fetch + merge into _vessels store
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFetchAndMerge:
|
||||
def _force_primary_disconnected(self):
|
||||
"""Set ais_stream module state so the gate allows the fallback."""
|
||||
from services import ais_stream as ais
|
||||
with ais._vessels_lock:
|
||||
# Far in the past → connected = false; spawn_count > 0 → primary
|
||||
# has at least tried so the gate engages.
|
||||
ais._last_msg_at = time.time() - 3600
|
||||
ais._proxy_spawn_count = 5
|
||||
ais._vessels.clear()
|
||||
|
||||
def test_vessels_merged_with_source_tag(self, monkeypatch):
|
||||
"""Happy path: AISHub returns 2 ships, both land in ``_vessels``
|
||||
with ``source: 'aishub'``."""
|
||||
from services.fetchers import aishub_fallback
|
||||
from services import ais_stream as ais
|
||||
|
||||
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||
self._force_primary_disconnected()
|
||||
|
||||
payload = json.dumps([
|
||||
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 2},
|
||||
[
|
||||
{
|
||||
"MMSI": 111111111,
|
||||
"LATITUDE": 40.0,
|
||||
"LONGITUDE": -73.0,
|
||||
"SOG": 12.0,
|
||||
"COG": 270,
|
||||
"HEADING": 275,
|
||||
"NAME": "SHIP A",
|
||||
"TYPE": 70,
|
||||
},
|
||||
{
|
||||
"MMSI": 222222222,
|
||||
"LATITUDE": 51.5,
|
||||
"LONGITUDE": -0.1,
|
||||
"SOG": 8.0,
|
||||
"COG": 90,
|
||||
"HEADING": 92,
|
||||
"NAME": "SHIP B",
|
||||
"TYPE": 60,
|
||||
},
|
||||
],
|
||||
])
|
||||
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
text = payload
|
||||
|
||||
monkeypatch.setattr(
|
||||
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
count = aishub_fallback.fetch_aishub_vessels()
|
||||
|
||||
assert count == 2
|
||||
with ais._vessels_lock:
|
||||
v1 = ais._vessels.get(111111111)
|
||||
v2 = ais._vessels.get(222222222)
|
||||
assert v1 is not None
|
||||
assert v1["source"] == "aishub"
|
||||
assert v1["lat"] == 40.0
|
||||
assert v1["name"] == "SHIP A"
|
||||
assert v2 is not None
|
||||
assert v2["source"] == "aishub"
|
||||
assert v2["type"] == "passenger" # AIS type 60 → passenger
|
||||
|
||||
def test_does_not_overwrite_fresh_live_data(self, monkeypatch):
|
||||
"""If the WebSocket pushed an update for an MMSI 0.5s ago and the
|
||||
AISHub poll completes in that window, we should NOT clobber the
|
||||
fresher live data."""
|
||||
from services.fetchers import aishub_fallback
|
||||
from services import ais_stream as ais
|
||||
|
||||
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||
self._force_primary_disconnected()
|
||||
|
||||
# Pre-seed _vessels with a "very fresh" live record.
|
||||
fresh_ts = time.time()
|
||||
with ais._vessels_lock:
|
||||
ais._vessels[111111111] = {
|
||||
"mmsi": 111111111,
|
||||
"lat": 12.34,
|
||||
"lng": 56.78,
|
||||
"source": "aisstream",
|
||||
"_updated": fresh_ts,
|
||||
}
|
||||
|
||||
payload = json.dumps([
|
||||
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
|
||||
[
|
||||
{
|
||||
"MMSI": 111111111,
|
||||
"LATITUDE": 99.0, # bogus to make the test obvious
|
||||
"LONGITUDE": 99.0,
|
||||
"NAME": "STALE",
|
||||
"SOG": 0,
|
||||
"COG": 0,
|
||||
"TYPE": 0,
|
||||
},
|
||||
],
|
||||
])
|
||||
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
text = payload
|
||||
|
||||
monkeypatch.setattr(
|
||||
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
# Note: 99.0/99.0 also exceeds the 91/181 sentinel guard and
|
||||
# would be filtered. Pick a valid-but-bogus position instead.
|
||||
payload = json.dumps([
|
||||
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
|
||||
[
|
||||
{
|
||||
"MMSI": 111111111,
|
||||
"LATITUDE": 0.0, # different from the live 12.34
|
||||
"LONGITUDE": 0.0,
|
||||
"NAME": "STALE",
|
||||
"SOG": 0,
|
||||
"COG": 0,
|
||||
"TYPE": 0,
|
||||
},
|
||||
],
|
||||
])
|
||||
monkeypatch.setattr(
|
||||
aishub_fallback, "fetch_with_curl",
|
||||
lambda *a, **kw: type("R", (), {"status_code": 200, "text": payload})(),
|
||||
)
|
||||
|
||||
aishub_fallback.fetch_aishub_vessels()
|
||||
|
||||
with ais._vessels_lock:
|
||||
v = ais._vessels.get(111111111)
|
||||
# Live data wins — position should still be 12.34 / 56.78.
|
||||
assert v["lat"] == 12.34
|
||||
assert v["lng"] == 56.78
|
||||
assert v["source"] == "aisstream"
|
||||
|
||||
def test_http_failure_returns_zero(self, monkeypatch):
|
||||
from services.fetchers import aishub_fallback
|
||||
|
||||
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||
self._force_primary_disconnected()
|
||||
|
||||
class FailResp:
|
||||
status_code = 503
|
||||
text = ""
|
||||
|
||||
monkeypatch.setattr(
|
||||
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FailResp()
|
||||
)
|
||||
|
||||
assert aishub_fallback.fetch_aishub_vessels() == 0
|
||||
@@ -22,11 +22,9 @@ class TestHealthEndpoint:
|
||||
|
||||
|
||||
class TestLiveDataEndpoints:
|
||||
def test_live_data_returns_200_or_304(self, client):
|
||||
def test_live_data_returns_200(self, client):
|
||||
r = client.get("/api/live-data")
|
||||
assert r.status_code in (200, 304)
|
||||
if r.status_code == 200:
|
||||
assert r.headers.get("etag")
|
||||
assert r.status_code == 200
|
||||
|
||||
def test_live_data_fast_returns_200_or_304(self, client):
|
||||
r = client.get("/api/live-data/fast")
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
from starlette.requests import Request
|
||||
|
||||
import auth
|
||||
|
||||
|
||||
async def _empty_receive():
|
||||
return {"type": "http.request", "body": b"", "more_body": False}
|
||||
|
||||
|
||||
def _request(path: str, *, host: str = "example.com/health?x=", client_host: str = "203.0.113.10") -> Request:
|
||||
return Request(
|
||||
{
|
||||
"type": "http",
|
||||
"method": "GET",
|
||||
"scheme": "http",
|
||||
"server": ("127.0.0.1", 8000),
|
||||
"client": (client_host, 12345),
|
||||
"path": path,
|
||||
"raw_path": path.encode("ascii"),
|
||||
"query_string": b"",
|
||||
"headers": [(b"host", host.encode("ascii"))],
|
||||
},
|
||||
receive=_empty_receive,
|
||||
)
|
||||
|
||||
|
||||
def test_scope_auth_uses_asgi_path_not_host_derived_url_path():
|
||||
request = _request("/api/mesh/gate/alpha/message")
|
||||
|
||||
assert auth._request_scope_path(request) == "/api/mesh/gate/alpha/message"
|
||||
assert auth._required_scope_for_request(request) == "mesh"
|
||||
|
||||
|
||||
def test_debug_test_request_does_not_trust_host_header(monkeypatch):
|
||||
monkeypatch.setattr(auth, "_debug_mode_enabled", lambda: True)
|
||||
|
||||
request = _request("/api/admin", host="test/api/public?x=")
|
||||
|
||||
assert auth._is_debug_test_request(request) is False
|
||||
|
||||
|
||||
def test_peer_hmac_identity_requires_explicit_peer_url_header():
|
||||
request = _request("/api/mesh/infonet/push", host="https://peer.example/api/public?x=")
|
||||
|
||||
assert auth._peer_hmac_url_from_request(request) == ""
|
||||
|
||||
request = _request("/api/mesh/infonet/push")
|
||||
request.scope["headers"].append((b"x-peer-url", b"https://peer.example/"))
|
||||
|
||||
assert auth._peer_hmac_url_from_request(request) == "https://peer.example"
|
||||
@@ -1,389 +0,0 @@
|
||||
"""Issues #244, #245, #246 (tg12 external audit): carrier tracker
|
||||
quality + provenance + freshness.
|
||||
|
||||
These tests pin the post-fix contract:
|
||||
|
||||
- **#244**: dated editorial snapshot positions no longer live in the
|
||||
registry. They live in a one-shot seed file that is consumed once
|
||||
on first-ever startup. After that, the runtime cache reflects only
|
||||
what THIS install has actually observed.
|
||||
|
||||
- **#245**: headline-derived positions (centroid of a region keyword)
|
||||
are stamped ``position_confidence = "approximate"`` so the UI can
|
||||
render them with appropriate uncertainty.
|
||||
|
||||
- **#246**: freshness is a *labelling* decision, not an eviction
|
||||
decision. Positions older than the configurable freshness window
|
||||
flip from ``"recent"`` to ``"stale"`` but are NEVER replaced with
|
||||
the registry default — that would teleport the carrier. The user
|
||||
always sees the last position the system actually observed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_tracker(tmp_path, monkeypatch):
|
||||
"""Isolated carrier_tracker with seed/cache paths redirected to tmp.
|
||||
|
||||
Yields the module so tests can call its functions; resets globals
|
||||
between tests so position caches don't leak across cases.
|
||||
"""
|
||||
from services import carrier_tracker
|
||||
|
||||
seed_path = tmp_path / "data" / "carrier_seed.json"
|
||||
cache_path = tmp_path / "carrier_cache.json"
|
||||
seed_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(carrier_tracker, "SEED_FILE", seed_path)
|
||||
monkeypatch.setattr(carrier_tracker, "CACHE_FILE", cache_path)
|
||||
monkeypatch.delenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", raising=False)
|
||||
|
||||
# Reset module-level mutable state.
|
||||
carrier_tracker._carrier_positions.clear()
|
||||
carrier_tracker._cached_gdelt_articles.clear()
|
||||
carrier_tracker._last_gdelt_fetch_at = 0.0
|
||||
|
||||
yield carrier_tracker
|
||||
|
||||
# Clean up so subsequent tests start fresh.
|
||||
carrier_tracker._carrier_positions.clear()
|
||||
carrier_tracker._cached_gdelt_articles.clear()
|
||||
|
||||
|
||||
def _write_seed(path: Path, hull: str = "CVN-78", **overrides) -> None:
|
||||
payload = {
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/...",
|
||||
"note": "test",
|
||||
},
|
||||
"carriers": {
|
||||
hull: {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
**overrides,
|
||||
}
|
||||
},
|
||||
}
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #244 — first-run seed bootstrap, never re-seeds after that
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSeedBootstrap:
|
||||
def test_first_ever_startup_bootstraps_from_seed(self, fresh_tracker, tmp_path):
|
||||
_write_seed(fresh_tracker.SEED_FILE)
|
||||
# No cache exists yet.
|
||||
assert not fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
# The seed entry made it into the cache.
|
||||
assert "CVN-78" in positions
|
||||
assert positions["CVN-78"]["lat"] == 18.0
|
||||
assert positions["CVN-78"]["position_confidence"] == "seed"
|
||||
# And the cache file is now on disk so subsequent runs skip the seed.
|
||||
assert fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
def test_subsequent_startup_ignores_seed(self, fresh_tracker, tmp_path):
|
||||
# Pre-seed a different position into the cache; the seed file says Red Sea.
|
||||
cache_data = {
|
||||
"CVN-78": {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf — operator-observed",
|
||||
"source": "Operator log",
|
||||
"source_url": "",
|
||||
"position_source_at": "2026-04-15T12:00:00Z",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
}
|
||||
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||
_write_seed(fresh_tracker.SEED_FILE) # seed is present but should NOT be used
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
assert positions["CVN-78"]["lat"] == 25.0
|
||||
assert positions["CVN-78"]["desc"] == "Persian Gulf — operator-observed"
|
||||
|
||||
def test_no_seed_no_cache_falls_back_to_homeport(self, fresh_tracker):
|
||||
# Neither seed nor cache. Must fall back to homeport defaults
|
||||
# (carrier never disappears).
|
||||
assert not fresh_tracker.SEED_FILE.exists()
|
||||
assert not fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
# Every registered carrier has SOMETHING.
|
||||
assert set(positions.keys()) == set(fresh_tracker.CARRIER_REGISTRY.keys())
|
||||
# All entries are labelled as homeport defaults.
|
||||
for hull, entry in positions.items():
|
||||
assert entry["position_confidence"] == "homeport_default"
|
||||
registry = fresh_tracker.CARRIER_REGISTRY[hull]
|
||||
assert entry["lat"] == registry["homeport_lat"]
|
||||
assert entry["lng"] == registry["homeport_lng"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #244 — no editorial fallbacks live in the registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegistryShape:
|
||||
def test_registry_has_no_dated_fallback_fields(self, fresh_tracker):
|
||||
"""The Mar 9 editorial coordinates are gone from the registry.
|
||||
They live only in the seed file."""
|
||||
forbidden = {"fallback_lat", "fallback_lng", "fallback_heading", "fallback_desc"}
|
||||
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||
offending = forbidden & set(entry.keys())
|
||||
assert not offending, f"{hull} still has dated registry fields: {offending}"
|
||||
|
||||
def test_registry_keeps_homeport_for_every_hull(self, fresh_tracker):
|
||||
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||
assert "homeport_lat" in entry, f"{hull} missing homeport_lat"
|
||||
assert "homeport_lng" in entry, f"{hull} missing homeport_lng"
|
||||
assert "name" in entry
|
||||
assert "wiki" in entry
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #246 — freshness labelling, NOT eviction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFreshnessLabelling:
|
||||
def test_recent_observation_labels_recent(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=3)).isoformat(),
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||
|
||||
def test_aged_observation_flips_to_stale(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=30)).isoformat(),
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||
|
||||
def test_seed_label_is_preserved_explicitly(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
# Even though the source is months old, the explicit "seed" label wins
|
||||
# so the UI can render the seed-specific badge instead of generic "stale".
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "seed"
|
||||
|
||||
def test_homeport_default_label_is_preserved(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 36.95,
|
||||
"lng": -76.32,
|
||||
"position_source_at": now.isoformat(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "homeport_default"
|
||||
|
||||
def test_freshness_window_is_env_configurable(self, fresh_tracker, monkeypatch):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=20)).isoformat(),
|
||||
}
|
||||
# Default window = 14 days → 20-day-old entry is stale.
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||
# Stretch to 30 days → same entry is now "recent".
|
||||
monkeypatch.setenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "30")
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||
|
||||
def test_aged_cache_entry_keeps_its_position_never_reverts(self, fresh_tracker):
|
||||
"""The core regression test for the user's intent: a year-old
|
||||
cache entry must NOT be replaced with the seed or homeport.
|
||||
The PHYSICAL position the user sees is the last one observed;
|
||||
only the freshness LABEL changes."""
|
||||
a_year_ago = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
||||
cache_data = {
|
||||
"CVN-78": {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf",
|
||||
"source": "GDELT News API",
|
||||
"source_url": "https://news.example/...",
|
||||
"position_source_at": a_year_ago,
|
||||
"position_confidence": "recent", # was recent when written
|
||||
}
|
||||
}
|
||||
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", positions["CVN-78"])
|
||||
|
||||
# The position is preserved exactly.
|
||||
assert enriched["lat"] == 25.0
|
||||
assert enriched["lng"] == 55.0
|
||||
# But the live label has flipped to stale.
|
||||
assert enriched["position_confidence"] == "stale"
|
||||
assert enriched["is_fallback"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #245 — approximate confidence for region-centroid positions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApproximateConfidenceForNewsDerivedPositions:
|
||||
def test_news_parsing_stamps_approximate_confidence(self, fresh_tracker):
|
||||
articles = [
|
||||
{
|
||||
"title": "USS Ford carrier deployed in Mediterranean for joint exercise",
|
||||
"url": "https://news.example/ford-mediterranean",
|
||||
"seendate": "20260415120000",
|
||||
}
|
||||
]
|
||||
updates = fresh_tracker._parse_carrier_positions_from_news(articles)
|
||||
assert "CVN-78" in updates
|
||||
entry = updates["CVN-78"]
|
||||
assert entry["position_confidence"] == "approximate"
|
||||
# And the source_at is the article's seen date, not now().
|
||||
assert entry["position_source_at"].startswith("2026-04-15")
|
||||
|
||||
def test_gdelt_seendate_parser_handles_well_formed_input(self, fresh_tracker):
|
||||
iso = fresh_tracker._gdelt_seendate_to_iso("20260415120000")
|
||||
assert iso is not None
|
||||
assert iso.startswith("2026-04-15T12:00:00")
|
||||
|
||||
def test_gdelt_seendate_parser_returns_none_on_garbage(self, fresh_tracker):
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("") is None
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("not-a-date") is None
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("2026") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full enrichment → public API shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEnrichForRendering:
|
||||
def test_seed_entry_produces_expected_public_fields(self, fresh_tracker):
|
||||
seed_entry = {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", seed_entry)
|
||||
# Existing UI fields preserved.
|
||||
assert enriched["lat"] == 18.0
|
||||
assert enriched["lng"] == 39.5
|
||||
assert enriched["source"].startswith("USNI")
|
||||
assert enriched["last_osint_update"] == "2026-03-09T00:00:00Z"
|
||||
# New audit-required fields.
|
||||
assert enriched["position_confidence"] == "seed"
|
||||
assert enriched["position_source_at"] == "2026-03-09T00:00:00Z"
|
||||
assert enriched["is_fallback"] is True
|
||||
|
||||
def test_recent_observation_is_not_fallback(self, fresh_tracker):
|
||||
now = datetime.now(timezone.utc)
|
||||
recent_entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf",
|
||||
"source": "GDELT News API",
|
||||
"source_url": "https://news.example/...",
|
||||
"position_source_at": (now - timedelta(days=2)).isoformat(),
|
||||
"position_confidence": "approximate",
|
||||
}
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", recent_entry, now=now)
|
||||
assert enriched["position_confidence"] == "approximate"
|
||||
# Approximate (from a recent headline) is honest precision, but the UI
|
||||
# treats it as live data — is_fallback only flips True for explicit
|
||||
# fallback categories (seed / stale / homeport_default).
|
||||
assert enriched["is_fallback"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression: existing frontend fields are preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPublicResponseShapeBackwardCompat:
|
||||
"""The frontend ShipPopup expects `estimated`, `source`, `source_url`,
|
||||
`last_osint_update`. The new fields are additive and existing fields
|
||||
keep their meaning so the UI does not need updating to keep working."""
|
||||
|
||||
def test_get_carrier_positions_preserves_existing_keys(self, fresh_tracker):
|
||||
_write_seed(fresh_tracker.SEED_FILE)
|
||||
fresh_tracker._bootstrap_cache_if_missing()
|
||||
with fresh_tracker._positions_lock:
|
||||
fresh_tracker._carrier_positions.update(
|
||||
{
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea (seed)",
|
||||
"source": "Seed",
|
||||
"source_url": "",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
out = fresh_tracker.get_carrier_positions()
|
||||
assert len(out) == 1
|
||||
c = out[0]
|
||||
# Old fields the frontend uses.
|
||||
for key in (
|
||||
"name",
|
||||
"type",
|
||||
"lat",
|
||||
"lng",
|
||||
"country",
|
||||
"desc",
|
||||
"wiki",
|
||||
"estimated",
|
||||
"source",
|
||||
"source_url",
|
||||
"last_osint_update",
|
||||
):
|
||||
assert key in c, f"missing legacy field {key!r}"
|
||||
# New fields.
|
||||
for key in ("position_confidence", "position_source_at", "is_fallback"):
|
||||
assert key in c, f"missing audit-required field {key!r}"
|
||||
assert c["type"] == "carrier"
|
||||
assert c["estimated"] is True
|
||||
@@ -89,34 +89,6 @@ import pytest
|
||||
# relay through the backend. 60/minute rate limit is not enough on
|
||||
# a streaming endpoint.
|
||||
("get", "/api/radio/openmhz/audio?url=https%3A%2F%2Fmedia.openmhz.com%2Faudio%2Fabc.mp3", None),
|
||||
# Issue #299 (tg12): /api/sentinel/token relays Copernicus CDSE
|
||||
# OAuth token requests for caller-supplied client_id/secret.
|
||||
# Anonymous access turns the backend into a free OAuth-mint relay.
|
||||
(
|
||||
"post",
|
||||
"/api/sentinel/token",
|
||||
None, # body sent via raw form-encoded data — None lets the
|
||||
# remote_client wrapper send an empty body; the auth
|
||||
# check fires before the form parser runs.
|
||||
),
|
||||
# Issue #300 (tg12): /api/sentinel/tile relays Sentinel Hub Process
|
||||
# API tile fetches. Anonymous access is a bandwidth/quota relay
|
||||
# for any caller's Copernicus account.
|
||||
(
|
||||
"post",
|
||||
"/api/sentinel/tile",
|
||||
{
|
||||
"client_id": "ignored",
|
||||
"client_secret": "ignored",
|
||||
"preset": "TRUE-COLOR",
|
||||
"date": "2026-01-01",
|
||||
"z": 6, "x": 30, "y": 20,
|
||||
},
|
||||
),
|
||||
# Issue #301 (tg12): /api/sentinel2/search hits Planetary Computer
|
||||
# STAC + Esri fallback. Anonymous access is a free external-search
|
||||
# relay even though no caller credentials are involved.
|
||||
("get", "/api/sentinel2/search?lat=0&lng=0", None),
|
||||
],
|
||||
)
|
||||
def test_remote_control_surface_rejects_without_local_operator_or_admin(
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
"""DeepState GitHub mirror pinning (#362)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import services.geopolitics as gp
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_defaults(monkeypatch):
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_COMMIT", raising=False)
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_REPO", raising=False)
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "main"
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_pinned_commit(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "abc123def456")
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_REPO", "cyterat/deepstate-map-data")
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "abc123def456"
|
||||
|
||||
|
||||
def test_fetch_ukraine_frontlines_uses_pinned_tree_url(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "deadbeef")
|
||||
gp.frontline_cache.clear()
|
||||
|
||||
tree_resp = MagicMock(status_code=200)
|
||||
tree_resp.json.return_value = {
|
||||
"tree": [{"path": "data/deepstatemap_data_20260101.geojson"}]
|
||||
}
|
||||
geo_resp = MagicMock(status_code=200)
|
||||
geo_resp.json.return_value = {"features": []}
|
||||
|
||||
with patch("services.geopolitics.requests.get", side_effect=[tree_resp, geo_resp]) as get:
|
||||
result = gp.fetch_ukraine_frontlines()
|
||||
|
||||
assert result == {"features": []}
|
||||
tree_call = get.call_args_list[0][0][0]
|
||||
raw_call = get.call_args_list[1][0][0]
|
||||
assert "/git/trees/deadbeef" in tree_call
|
||||
assert "raw.githubusercontent.com/cyterat/deepstate-map-data/deadbeef/" in raw_call
|
||||
|
||||
gp.frontline_cache.clear()
|
||||
@@ -1,270 +0,0 @@
|
||||
"""Per-(sender, recipient) anti-spam cap on the DM relay.
|
||||
|
||||
The user-stated rule: a single sender can have at most N UNACKED messages
|
||||
parked in a single recipient's mailbox at any one time (N=2 by default).
|
||||
Once the recipient pulls a message, the sender's quota for that pair
|
||||
frees up.
|
||||
|
||||
Network rule, not local rule
|
||||
-----------------------------
|
||||
The cap is enforced TWICE:
|
||||
|
||||
1. ``DMRelay.deposit(...)`` -- local check on the sender's own node.
|
||||
Refuses to spool the (N+1)th message before it can be replicated.
|
||||
|
||||
2. ``DMRelay.accept_replica(...)`` -- replication-acceptance check on
|
||||
every receiving peer. Refuses to accept an inbound replica that
|
||||
would put the local mailbox over the cap, even if the originating
|
||||
peer claims it had cap room.
|
||||
|
||||
The double enforcement matters because cap (1) is client-side -- a
|
||||
hostile relay could patch it out and continue to spool extras locally.
|
||||
Cap (2) means those extras can't propagate: every honest peer rejects
|
||||
them on the way in. A recipient who polls from honest peers therefore
|
||||
never sees more than N pending from any one sender, regardless of how
|
||||
many spam attempts the sender's own relay accepted.
|
||||
|
||||
These tests pin both halves of the rule.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def relay():
|
||||
"""Fresh ``DMRelay`` per test."""
|
||||
from services.mesh.mesh_dm_relay import DMRelay
|
||||
r = DMRelay()
|
||||
r._mailboxes.clear()
|
||||
r._blocks.clear()
|
||||
r._stats = {"messages_in_memory": 0}
|
||||
return r
|
||||
|
||||
|
||||
def _deposit(
|
||||
relay,
|
||||
*,
|
||||
sender: str = "alice",
|
||||
recipient_token: str = "bob_mailbox_token_abc",
|
||||
ciphertext: str = "ciphertext-blob",
|
||||
msg_id: str = "",
|
||||
):
|
||||
"""Convenience wrapper using ``shared`` delivery class."""
|
||||
return relay.deposit(
|
||||
sender_id=sender,
|
||||
raw_sender_id=sender,
|
||||
recipient_id="bob",
|
||||
ciphertext=ciphertext,
|
||||
msg_id=msg_id,
|
||||
delivery_class="shared",
|
||||
recipient_token=recipient_token,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Local cap on ``deposit``
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDepositCap:
|
||||
def test_two_deposits_from_same_sender_succeed(self, relay):
|
||||
r1 = _deposit(relay)
|
||||
r2 = _deposit(relay)
|
||||
assert r1["ok"] is True
|
||||
assert r2["ok"] is True
|
||||
assert r1["msg_id"] != r2["msg_id"]
|
||||
|
||||
def test_third_deposit_from_same_sender_rejected(self, relay):
|
||||
_deposit(relay)
|
||||
_deposit(relay)
|
||||
r3 = _deposit(relay)
|
||||
assert r3["ok"] is False
|
||||
detail = r3["detail"].lower()
|
||||
assert "unread" in detail or "read your messages" in detail
|
||||
|
||||
def test_different_senders_have_independent_quotas(self, relay):
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice")["ok"] is True
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="carol")["ok"] is True
|
||||
assert _deposit(relay, sender="carol")["ok"] is False
|
||||
|
||||
def test_different_recipients_have_independent_quotas(self, relay):
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice", recipient_token="bob_token")["ok"] is True
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice", recipient_token="dave_token")["ok"] is True
|
||||
|
||||
def test_ack_frees_quota(self, relay):
|
||||
r1 = _deposit(relay)
|
||||
_deposit(relay)
|
||||
assert _deposit(relay)["ok"] is False
|
||||
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
relay._mailboxes[mailbox_key] = [
|
||||
m for m in relay._mailboxes[mailbox_key]
|
||||
if m.msg_id != r1["msg_id"]
|
||||
]
|
||||
relay._stats["messages_in_memory"] = sum(
|
||||
len(v) for v in relay._mailboxes.values()
|
||||
)
|
||||
|
||||
r3 = _deposit(relay)
|
||||
assert r3["ok"] is True, f"expected quota free after ack, got: {r3}"
|
||||
|
||||
def test_cap_is_env_tunable(self, relay, monkeypatch):
|
||||
import services.mesh.mesh_dm_relay as mdr
|
||||
monkeypatch.setattr(
|
||||
mdr.DMRelay,
|
||||
"_per_sender_pending_limit",
|
||||
lambda self: 1,
|
||||
)
|
||||
|
||||
assert _deposit(relay)["ok"] is True
|
||||
assert _deposit(relay)["ok"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replication-acceptance cap (the half that makes this a network rule)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAcceptReplicaCap:
|
||||
def _envelope(self, *, msg_id: str, sender_block_ref: str, mailbox_key: str):
|
||||
return {
|
||||
"msg_id": msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"sender_block_ref": sender_block_ref,
|
||||
"sender_id": "alice",
|
||||
"sender_seal": "",
|
||||
"ciphertext": f"ciphertext-{msg_id}",
|
||||
"timestamp": time.time(),
|
||||
"delivery_class": "shared",
|
||||
"relay_salt": "",
|
||||
"payload_format": "dm1",
|
||||
"session_welcome": "",
|
||||
}
|
||||
|
||||
def test_replica_accepted_under_cap(self, relay):
|
||||
env = self._envelope(
|
||||
msg_id="dm_replica_1",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key="mailbox_xyz",
|
||||
)
|
||||
result = relay.accept_replica(envelope=env)
|
||||
assert result["ok"] is True
|
||||
|
||||
def test_replica_idempotent_on_duplicate_msg_id(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
env = self._envelope(
|
||||
msg_id="dm_dup_1",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
)
|
||||
r1 = relay.accept_replica(envelope=env)
|
||||
r2 = relay.accept_replica(envelope=env)
|
||||
assert r1["ok"] is True
|
||||
assert r2["ok"] is True
|
||||
assert r2.get("duplicate") is True
|
||||
assert len(relay._mailboxes[mailbox_key]) == 1
|
||||
|
||||
def test_replica_rejected_when_local_count_already_at_cap(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
for i in (1, 2):
|
||||
relay.accept_replica(envelope=self._envelope(
|
||||
msg_id=f"dm_seeded_{i}",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
|
||||
result = relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_overcap_3",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
assert result["ok"] is False
|
||||
assert result.get("cap_violation") is True
|
||||
assert result.get("pending") == 2
|
||||
assert result.get("limit") == 2
|
||||
assert len(relay._mailboxes[mailbox_key]) == 2
|
||||
|
||||
def test_replica_from_different_sender_passes_when_one_is_at_cap(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
for i in (1, 2):
|
||||
relay.accept_replica(envelope=self._envelope(
|
||||
msg_id=f"dm_alice_{i}",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
assert relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_alice_3",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))["ok"] is False
|
||||
assert relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_carol_1",
|
||||
sender_block_ref="carol_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))["ok"] is True
|
||||
|
||||
def test_replica_rejects_malformed_envelopes(self, relay):
|
||||
for bad in (
|
||||
{},
|
||||
{"msg_id": "x"},
|
||||
{"msg_id": "x", "mailbox_key": "y"},
|
||||
"not an object at all",
|
||||
):
|
||||
result = relay.accept_replica(envelope=bad)
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ``envelope_for_replication`` -- helper for the outbound replication path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEnvelopeForReplication:
|
||||
def test_returns_envelope_for_stored_message(self, relay):
|
||||
r = _deposit(relay, ciphertext="hello-ciphertext")
|
||||
msg_id = r["msg_id"]
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
|
||||
env = relay.envelope_for_replication(mailbox_key=mailbox_key, msg_id=msg_id)
|
||||
assert env is not None
|
||||
assert env["msg_id"] == msg_id
|
||||
assert env["mailbox_key"] == mailbox_key
|
||||
assert env["ciphertext"] == "hello-ciphertext"
|
||||
assert env["delivery_class"] == "shared"
|
||||
for k in ("msg_id", "mailbox_key", "sender_block_ref", "ciphertext"):
|
||||
assert env.get(k), f"envelope missing required field {k!r}"
|
||||
|
||||
def test_returns_none_for_unknown_message(self, relay):
|
||||
env = relay.envelope_for_replication(
|
||||
mailbox_key="never_existed", msg_id="never_existed",
|
||||
)
|
||||
assert env is None
|
||||
|
||||
def test_envelope_round_trips_through_accept_replica(self, relay):
|
||||
from services.mesh.mesh_dm_relay import DMRelay
|
||||
receiver_relay = DMRelay()
|
||||
receiver_relay._mailboxes.clear()
|
||||
receiver_relay._stats = {"messages_in_memory": 0}
|
||||
|
||||
r = _deposit(relay)
|
||||
msg_id = r["msg_id"]
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
env = relay.envelope_for_replication(
|
||||
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||
)
|
||||
assert env is not None
|
||||
|
||||
result = receiver_relay.accept_replica(envelope=env)
|
||||
assert result["ok"] is True
|
||||
stored = receiver_relay._mailboxes.get(mailbox_key, [])
|
||||
assert len(stored) == 1
|
||||
assert stored[0].msg_id == msg_id
|
||||
assert stored[0].ciphertext == "ciphertext-blob"
|
||||
@@ -1,150 +0,0 @@
|
||||
"""POST /api/mesh/dm/replicate-envelope — receiving side of cross-node DM
|
||||
mailbox replication.
|
||||
|
||||
This is the endpoint that peer relays call when they want to hand off an
|
||||
encrypted DM envelope to us (so the recipient can log into our node and
|
||||
find their messages). It re-enforces the per-(sender, recipient) anti-spam
|
||||
cap so hostile sender relays can't widen the cap by skipping the local
|
||||
check on their own deposit path.
|
||||
|
||||
The endpoint:
|
||||
|
||||
* authenticates the caller via the existing per-peer HMAC pattern
|
||||
(same one /api/mesh/infonet/peer-push and /api/mesh/gate/peer-push
|
||||
use, introduced in #256 — ``X-Peer-Url`` + ``X-Peer-HMAC`` headers
|
||||
keyed off ``resolve_peer_key_for_url``)
|
||||
* rejects bodies > 64 KB (DM envelope size is bounded by
|
||||
``MESH_DM_MAX_MSG_BYTES`` — 64KB ceiling has generous headroom)
|
||||
* rejects requests without a valid peer HMAC with 403
|
||||
* passes the envelope to ``DMRelay.accept_replica`` which enforces
|
||||
the cap
|
||||
|
||||
This file pins the endpoint contract. The cap enforcement itself is
|
||||
tested in ``test_dm_relay_per_sender_cap.py`` against the relay's
|
||||
``accept_replica`` method directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def remote_client():
|
||||
"""ASGI client with peer IP 1.2.3.4 — never on the local-operator
|
||||
allowlist. Used to prove the endpoint isn't accidentally reachable
|
||||
by random remote callers without peer HMAC."""
|
||||
from main import app
|
||||
|
||||
class _RemoteClient:
|
||||
def __init__(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=("1.2.3.4", 12345))
|
||||
self._base = "http://1.2.3.4:8000"
|
||||
|
||||
def post(self, url, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.post(url, **kw)
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def close(self):
|
||||
self._loop.close()
|
||||
|
||||
c = _RemoteClient()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
class TestReplicateEndpointAuth:
|
||||
def test_rejects_request_without_peer_hmac(self, remote_client):
|
||||
"""A peer push that does NOT carry X-Peer-Url + X-Peer-HMAC
|
||||
must be rejected with 403 before the envelope is ever passed
|
||||
to the relay. Same gate the existing infonet/gate peer-push
|
||||
endpoints enforce."""
|
||||
payload = {
|
||||
"envelope": {
|
||||
"msg_id": "dm_unauth_1",
|
||||
"mailbox_key": "mb",
|
||||
"sender_block_ref": "sender",
|
||||
"ciphertext": "x",
|
||||
},
|
||||
}
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
json=payload,
|
||||
)
|
||||
assert r.status_code == 403
|
||||
assert "peer HMAC" in r.text or "peer hmac" in r.text.lower()
|
||||
|
||||
def test_rejects_wrong_peer_hmac(self, remote_client, monkeypatch):
|
||||
"""A request with a peer HMAC header keyed off the WRONG secret
|
||||
is rejected. Confirms the HMAC is actually verified — a tampered
|
||||
body or a key-substitution attack doesn't sneak through."""
|
||||
# Plant a known peer secret. The request will sign with a
|
||||
# DIFFERENT key, so verification must fail.
|
||||
from services.config import get_settings
|
||||
monkeypatch.setenv("MESH_PEER_PUSH_SECRET", "real-secret-32-chars-min-padding-padding")
|
||||
get_settings.cache_clear()
|
||||
|
||||
body = json.dumps({
|
||||
"envelope": {
|
||||
"msg_id": "dm_wronghmac",
|
||||
"mailbox_key": "mb",
|
||||
"sender_block_ref": "sender",
|
||||
"ciphertext": "x",
|
||||
},
|
||||
}).encode("utf-8")
|
||||
wrong_hmac = hmac.new(b"wrong-key", body, hashlib.sha256).hexdigest()
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
content=body,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"X-Peer-Url": "http://example-peer.onion:8000",
|
||||
"X-Peer-HMAC": wrong_hmac,
|
||||
},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_rejects_oversize_body(self, remote_client):
|
||||
"""64 KB ceiling — anything bigger doesn't even get parsed.
|
||||
Defends against memory amplification via giant ciphertexts."""
|
||||
# 100 KB body is well over the 64 KB cap.
|
||||
big = b"{" + b"x" * 100_000 + b"}"
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
content=big,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": str(len(big)),
|
||||
},
|
||||
)
|
||||
assert r.status_code in (400, 413), (
|
||||
f"oversize body should be rejected with 400/413, got {r.status_code}"
|
||||
)
|
||||
|
||||
|
||||
class TestReplicateEndpointRegistered:
|
||||
def test_route_present_in_app(self):
|
||||
"""Static check that the route is actually wired into the app.
|
||||
Catches a future refactor that drops the router include or
|
||||
deletes the endpoint by accident."""
|
||||
from main import app
|
||||
|
||||
paths_methods = set()
|
||||
for route in app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", set()) or set()
|
||||
for m in methods:
|
||||
paths_methods.add((m, path))
|
||||
|
||||
assert ("POST", "/api/mesh/dm/replicate-envelope") in paths_methods, (
|
||||
"POST /api/mesh/dm/replicate-envelope is not registered on the app"
|
||||
)
|
||||
@@ -1,196 +0,0 @@
|
||||
"""Issue #250 (tg12): Docker bridge local-operator trust must be bound to
|
||||
the frontend container's hostname, not the entire 172.16.0.0/12 range.
|
||||
|
||||
Previous behavior trusted ANY private-RFC1918 source IP on the bridge
|
||||
when ``SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR=1``. On a shared
|
||||
Docker host this granted local-operator privileges to any other
|
||||
container that could route to the backend's bridge — far broader than
|
||||
intended.
|
||||
|
||||
The fix narrows trust to source IPs that forward-resolve from one of the
|
||||
configured frontend container hostnames (default: the compose service
|
||||
name ``frontend`` plus the explicit ``container_name``
|
||||
``shadowbroker-frontend``). Operators with renamed containers can list
|
||||
the new names in ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS``.
|
||||
|
||||
These tests exercise the resolution helpers directly so that we don't
|
||||
need a live Docker daemon to validate the contract.
|
||||
"""
|
||||
import socket
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _trusted_bridge_frontend_hostnames — env parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTrustedHostnameParsing:
|
||||
def _fn(self):
|
||||
from auth import _trusted_bridge_frontend_hostnames
|
||||
return _trusted_bridge_frontend_hostnames
|
||||
|
||||
def test_default_covers_compose_service_and_container_name(self):
|
||||
with patch.dict("os.environ", {}, clear=False):
|
||||
# Make sure the env var is not set so we exercise the default.
|
||||
import os
|
||||
os.environ.pop("SHADOWBROKER_TRUSTED_FRONTEND_HOSTS", None)
|
||||
assert self._fn()() == ["frontend", "shadowbroker-frontend"]
|
||||
|
||||
def test_custom_list_via_env(self):
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": "my-ui,alt-frontend"},
|
||||
):
|
||||
assert self._fn()() == ["my-ui", "alt-frontend"]
|
||||
|
||||
def test_whitespace_trimmed(self):
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": " my-ui , alt-frontend "},
|
||||
):
|
||||
assert self._fn()() == ["my-ui", "alt-frontend"]
|
||||
|
||||
def test_empty_env_falls_back_to_default(self):
|
||||
# An empty string still falls back to the bundled defaults so a
|
||||
# misconfigured env var doesn't silently dismantle bridge trust.
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": ""},
|
||||
):
|
||||
# Per docs: empty string sets the env var to "" so os.environ.get
|
||||
# returns "" — that string is parsed and yields []. We assert
|
||||
# that empty parse yields [] (caller fail-closes from there).
|
||||
assert self._fn()() == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resolve_trusted_bridge_ips — DNS resolution with cache + fail-closed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResolveTrustedBridgeIps:
|
||||
def setup_method(self):
|
||||
# Reset the module-level cache before each test so prior tests
|
||||
# don't bleed state across cases.
|
||||
from auth import _DOCKER_BRIDGE_TRUST_CACHE
|
||||
_DOCKER_BRIDGE_TRUST_CACHE["ips"] = frozenset()
|
||||
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||
|
||||
def test_resolves_configured_hostnames(self):
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
|
||||
def fake_gethostbyname_ex(host):
|
||||
mapping = {
|
||||
"frontend": ("frontend", [], ["172.18.0.3"]),
|
||||
"shadowbroker-frontend": ("shadowbroker-frontend", [], ["172.18.0.3", "172.18.0.4"]),
|
||||
}
|
||||
if host not in mapping:
|
||||
raise socket.gaierror("no such host")
|
||||
return mapping[host]
|
||||
|
||||
with patch("socket.gethostbyname_ex", side_effect=fake_gethostbyname_ex):
|
||||
ips = _resolve_trusted_bridge_ips()
|
||||
assert ips == frozenset({"172.18.0.3", "172.18.0.4"})
|
||||
|
||||
def test_fail_closed_when_dns_returns_nothing(self):
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
|
||||
def always_fail(host):
|
||||
raise socket.gaierror("no resolver")
|
||||
|
||||
with patch("socket.gethostbyname_ex", side_effect=always_fail):
|
||||
ips = _resolve_trusted_bridge_ips()
|
||||
assert ips == frozenset()
|
||||
|
||||
def test_partial_resolution_is_kept(self):
|
||||
"""If one hostname resolves and another fails, we keep the
|
||||
successful one rather than discarding the whole set."""
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
|
||||
def partial(host):
|
||||
if host == "frontend":
|
||||
return ("frontend", [], ["172.18.0.3"])
|
||||
raise socket.gaierror("missing")
|
||||
|
||||
with patch("socket.gethostbyname_ex", side_effect=partial):
|
||||
ips = _resolve_trusted_bridge_ips()
|
||||
assert ips == frozenset({"172.18.0.3"})
|
||||
|
||||
def test_cache_short_circuits_repeated_dns_calls(self):
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
def counting(host):
|
||||
call_count["n"] += 1
|
||||
return ("frontend", [], ["172.18.0.3"])
|
||||
|
||||
with patch("socket.gethostbyname_ex", side_effect=counting):
|
||||
_resolve_trusted_bridge_ips()
|
||||
calls_after_first = call_count["n"]
|
||||
_resolve_trusted_bridge_ips()
|
||||
_resolve_trusted_bridge_ips()
|
||||
# Second + third calls hit the cache, not the DNS stub.
|
||||
assert call_count["n"] == calls_after_first
|
||||
|
||||
def test_cache_expires(self):
|
||||
from auth import _resolve_trusted_bridge_ips, _DOCKER_BRIDGE_TRUST_CACHE
|
||||
|
||||
with patch("socket.gethostbyname_ex", return_value=("frontend", [], ["172.18.0.3"])):
|
||||
_resolve_trusted_bridge_ips()
|
||||
# Force expiry.
|
||||
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||
with patch("socket.gethostbyname_ex", return_value=("frontend", [], ["172.18.0.9"])) as stub:
|
||||
ips = _resolve_trusted_bridge_ips()
|
||||
assert stub.called
|
||||
assert "172.18.0.9" in ips
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_docker_bridge_host — composite of the helpers above
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestIsDockerBridgeHost:
|
||||
def setup_method(self):
|
||||
from auth import _DOCKER_BRIDGE_TRUST_CACHE
|
||||
_DOCKER_BRIDGE_TRUST_CACHE["ips"] = frozenset()
|
||||
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||
|
||||
def test_trusts_resolved_frontend_ip(self):
|
||||
from auth import _is_docker_bridge_host
|
||||
|
||||
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||
assert _is_docker_bridge_host("172.18.0.3") is True
|
||||
|
||||
def test_rejects_arbitrary_bridge_ip(self):
|
||||
"""A rogue container on the same bridge but at a different IP
|
||||
must NOT be trusted, even though it falls in 172.16.0.0/12."""
|
||||
from auth import _is_docker_bridge_host
|
||||
|
||||
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||
assert _is_docker_bridge_host("172.18.0.99") is False
|
||||
|
||||
def test_rejects_public_ip_without_dns_work(self):
|
||||
"""Public IPs skip DNS resolution entirely (perf + safety)."""
|
||||
from auth import _is_docker_bridge_host
|
||||
|
||||
with patch("auth._resolve_trusted_bridge_ips") as stub:
|
||||
assert _is_docker_bridge_host("8.8.8.8") is False
|
||||
stub.assert_not_called()
|
||||
|
||||
def test_rejects_non_ip_input(self):
|
||||
from auth import _is_docker_bridge_host
|
||||
|
||||
assert _is_docker_bridge_host("") is False
|
||||
assert _is_docker_bridge_host("not-an-ip") is False
|
||||
assert _is_docker_bridge_host("frontend") is False
|
||||
|
||||
def test_fails_closed_when_dns_returns_empty(self):
|
||||
"""If Docker DNS can't resolve any frontend hostname, the bridge
|
||||
is not trusted — even for IPs that would have been trusted under
|
||||
the old 172.16.0.0/12 blanket policy."""
|
||||
from auth import _is_docker_bridge_host
|
||||
|
||||
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset()):
|
||||
assert _is_docker_bridge_host("172.18.0.3") is False
|
||||
@@ -1,258 +0,0 @@
|
||||
"""Cumulative fuel/CO2 tracking via per-aircraft observation timestamps.
|
||||
|
||||
Background
|
||||
----------
|
||||
Users want the running total of fuel burned per aircraft — not just the
|
||||
rate. We track first-seen-at per icao24 and multiply elapsed observation
|
||||
time by the model-based rate. This module's job is exclusively the
|
||||
timestamp bookkeeping; multiplication happens in the flights/military
|
||||
fetchers.
|
||||
|
||||
These tests pin:
|
||||
|
||||
* First sighting returns 0 (no airtime yet).
|
||||
* Repeated sightings within ``REOPEN_GAP_S`` accumulate elapsed time.
|
||||
* Gap longer than ``REOPEN_GAP_S`` resets the session (plane landed
|
||||
and took off again — different flight).
|
||||
* ``MAX_SESSION_SECONDS`` clamp protects against clock skew bugs.
|
||||
* ``prune()`` drops stale entries.
|
||||
* ``get_session_seconds`` reads without bumping last_seen.
|
||||
* Empty / None icao input is a defensive no-op.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_observations():
|
||||
from services.fetchers import flight_observations as obs
|
||||
obs._reset_for_tests()
|
||||
yield
|
||||
obs._reset_for_tests()
|
||||
|
||||
|
||||
class TestRecordObservation:
|
||||
def test_first_sighting_returns_zero(self):
|
||||
from services.fetchers.flight_observations import record_observation
|
||||
assert record_observation("a12345", now=1000.0) == 0
|
||||
|
||||
def test_repeated_sightings_accumulate(self):
|
||||
"""ADS-B refreshes every ~minute in practice, so each observation
|
||||
is within ``REOPEN_GAP_S`` (15 min) of the last and we keep
|
||||
accumulating. Walking the timestamps in 5-minute steps so we
|
||||
stay inside the reopen window the whole way."""
|
||||
from services.fetchers.flight_observations import record_observation
|
||||
record_observation("a12345", now=1000.0)
|
||||
# 1 minute later (within REOPEN_GAP_S)
|
||||
assert record_observation("a12345", now=1060.0) == 60
|
||||
# Step through 5-minute spaced refreshes — first_seen_at stays
|
||||
# at 1000.0 the whole time, and we approach a 1-hour airtime.
|
||||
assert record_observation("a12345", now=1360.0) == 360
|
||||
assert record_observation("a12345", now=1660.0) == 660
|
||||
assert record_observation("a12345", now=1960.0) == 960
|
||||
assert record_observation("a12345", now=2260.0) == 1260
|
||||
assert record_observation("a12345", now=2560.0) == 1560
|
||||
assert record_observation("a12345", now=2860.0) == 1860
|
||||
assert record_observation("a12345", now=3160.0) == 2160
|
||||
assert record_observation("a12345", now=3460.0) == 2460
|
||||
assert record_observation("a12345", now=3760.0) == 2760
|
||||
assert record_observation("a12345", now=4060.0) == 3060
|
||||
assert record_observation("a12345", now=4360.0) == 3360
|
||||
# 1 hour after first sighting — still inside the 15-min reopen
|
||||
# window from the prior 4360 observation.
|
||||
assert record_observation("a12345", now=4600.0) == 3600
|
||||
|
||||
def test_gap_longer_than_reopen_resets_session(self):
|
||||
"""If a hex hasn't been seen in ``REOPEN_GAP_S`` (15 min default),
|
||||
the next sighting is treated as a new flight — first_seen_at resets."""
|
||||
from services.fetchers.flight_observations import record_observation
|
||||
record_observation("a12345", now=1000.0)
|
||||
record_observation("a12345", now=1500.0) # 500s later — within gap
|
||||
# Now 20 minutes of silence (1200s > 900s threshold) → session reset.
|
||||
assert record_observation("a12345", now=2700.0) == 0
|
||||
# And the next quick sighting starts accumulating from 2700 again.
|
||||
assert record_observation("a12345", now=2760.0) == 60
|
||||
|
||||
def test_session_clamp(self):
|
||||
"""Clock skew protection: when a hex has been continuously
|
||||
observed for longer than ``MAX_SESSION_SECONDS``, clamp.
|
||||
|
||||
Synthesizes the state directly because driving 86,400+ seconds of
|
||||
observations through the public API in a test would take 1000+
|
||||
REOPEN_GAP_S-respecting steps.
|
||||
"""
|
||||
from services.fetchers import flight_observations as obs
|
||||
from services.fetchers.flight_observations import _observations, _lock
|
||||
|
||||
# last_seen_at very recent so REOPEN_GAP_S branch does NOT fire,
|
||||
# but first_seen_at way in the past so the elapsed math overflows
|
||||
# MAX_SESSION_SECONDS. Clamp must kick in.
|
||||
big_now = float(obs.MAX_SESSION_SECONDS + 1_000_000)
|
||||
with _lock:
|
||||
_observations["a12345"] = {
|
||||
"first_seen_at": 0.0,
|
||||
"last_seen_at": big_now - 60, # 60s ago — well inside gap window
|
||||
}
|
||||
elapsed = obs.record_observation("a12345", now=big_now)
|
||||
assert elapsed == obs.MAX_SESSION_SECONDS, (
|
||||
f"elapsed must be clamped to MAX_SESSION_SECONDS; got {elapsed}"
|
||||
)
|
||||
|
||||
def test_empty_input_returns_zero(self):
|
||||
from services.fetchers.flight_observations import record_observation
|
||||
assert record_observation("") == 0
|
||||
assert record_observation(None) == 0 # type: ignore[arg-type]
|
||||
assert record_observation(" ") == 0
|
||||
|
||||
def test_case_insensitive_key(self):
|
||||
"""ICAO24 hex codes are case-insensitive — adsb.lol lowercases
|
||||
them, OpenSky may not. Normalize so both refer to the same airframe."""
|
||||
from services.fetchers.flight_observations import record_observation
|
||||
record_observation("A12345", now=1000.0)
|
||||
# Different case must hit the same entry.
|
||||
assert record_observation("a12345", now=1060.0) == 60
|
||||
|
||||
|
||||
class TestGetSessionSeconds:
|
||||
def test_read_only_does_not_bump(self):
|
||||
from services.fetchers.flight_observations import (
|
||||
record_observation,
|
||||
get_session_seconds,
|
||||
)
|
||||
record_observation("a12345", now=1000.0)
|
||||
record_observation("a12345", now=1060.0) # bumps last_seen
|
||||
|
||||
# Now read at t=2000. Without bumping, gap=2000-1060=940 > 900,
|
||||
# so a recording call would reset. But the read should NOT reset.
|
||||
seconds_at_2000 = get_session_seconds("a12345", now=2000.0)
|
||||
assert seconds_at_2000 == 1000, (
|
||||
f"read should return 2000-1000=1000s; got {seconds_at_2000}"
|
||||
)
|
||||
# Verify the next recording at t=2001 still resets (gap > 900s
|
||||
# from the read above — proves the read didn't bump last_seen).
|
||||
from services.fetchers.flight_observations import record_observation as rec
|
||||
assert rec("a12345", now=2001.0) == 0 # session reset
|
||||
|
||||
def test_unknown_hex_returns_zero(self):
|
||||
from services.fetchers.flight_observations import get_session_seconds
|
||||
assert get_session_seconds("nonexistent") == 0
|
||||
|
||||
|
||||
class TestPrune:
|
||||
def test_drops_stale_entries(self):
|
||||
from services.fetchers import flight_observations as obs
|
||||
|
||||
obs.record_observation("active", now=10_000.0)
|
||||
obs.record_observation("stale", now=1.0)
|
||||
|
||||
dropped = obs.prune(now=10_000.0)
|
||||
|
||||
assert dropped == 1
|
||||
# Active entry survives:
|
||||
assert obs.get_session_seconds("active", now=10_001.0) == 1
|
||||
# Stale entry was dropped — next obs starts fresh:
|
||||
assert obs.record_observation("stale", now=10_002.0) == 0
|
||||
|
||||
def test_no_op_when_nothing_stale(self):
|
||||
from services.fetchers import flight_observations as obs
|
||||
obs.record_observation("hex1", now=1000.0)
|
||||
obs.record_observation("hex2", now=1000.0)
|
||||
|
||||
dropped = obs.prune(now=1500.0)
|
||||
|
||||
assert dropped == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: emissions enrichment in _classify_and_publish honors the
|
||||
# cumulative tracker.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEmissionsCumulativeIntegration:
|
||||
def _reset_store(self):
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
with _data_lock:
|
||||
for key in (
|
||||
"flights", "commercial_flights", "private_flights",
|
||||
"private_jets", "military_flights", "tracked_flights",
|
||||
):
|
||||
latest_data[key] = []
|
||||
|
||||
def test_first_publish_zero_cumulative(self, monkeypatch):
|
||||
"""On the first observation, cumulative values are 0 — but the
|
||||
rate fields and observed_seconds are still present in the dict."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish([
|
||||
{
|
||||
"hex": "test001",
|
||||
"flight": "JBU711",
|
||||
"r": "N1",
|
||||
"t": "C172", # Cessna 172, 9 GPH
|
||||
"lat": 40.0,
|
||||
"lon": -100.0,
|
||||
"alt_baro": 3000,
|
||||
"gs": 100,
|
||||
}
|
||||
])
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
emi = published[0].get("emissions")
|
||||
assert emi is not None
|
||||
assert emi["fuel_gph"] == 9
|
||||
assert emi["observed_seconds"] == 0
|
||||
assert emi["fuel_gallons_burned"] == 0.0
|
||||
assert emi["co2_kg_emitted"] == 0.0
|
||||
|
||||
def test_second_publish_accumulates(self, monkeypatch):
|
||||
"""Publishing the same hex a second time picks up real elapsed time
|
||||
and produces non-zero cumulative values."""
|
||||
import time as _time_real
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers import flight_observations as obs
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
# Manually seed an observation 1 hour in the past so the next
|
||||
# publish picks up ~3600s elapsed.
|
||||
with obs._lock:
|
||||
obs._observations["test002"] = {
|
||||
"first_seen_at": _time_real.time() - 3600,
|
||||
"last_seen_at": _time_real.time() - 60,
|
||||
}
|
||||
|
||||
flights_module._classify_and_publish([
|
||||
{
|
||||
"hex": "test002",
|
||||
"flight": "JBU711",
|
||||
"r": "N1",
|
||||
"t": "C172", # 9 GPH
|
||||
"lat": 40.0,
|
||||
"lon": -100.0,
|
||||
"alt_baro": 3000,
|
||||
"gs": 100,
|
||||
}
|
||||
])
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
emi = published[0].get("emissions")
|
||||
# Roughly 1 hour observed → 9 gal burned.
|
||||
assert 3500 <= emi["observed_seconds"] <= 3700
|
||||
assert 8.7 <= emi["fuel_gallons_burned"] <= 9.3
|
||||
# CO2 = 9 gph * 9.57 kg/gal = 86.1 kg/hr.
|
||||
assert 84 <= emi["co2_kg_emitted"] <= 88
|
||||
@@ -1,354 +0,0 @@
|
||||
"""Per-flight source attribution.
|
||||
|
||||
Background
|
||||
----------
|
||||
Pre-fix, adsb.lol records (the primary source for most flights) carried
|
||||
no source marker. OpenSky records got ``is_opensky: True`` and
|
||||
supplementals got ``supplemental_source``, so any UI that wanted to show
|
||||
which provider a flight came from saw OpenSky/airplanes.live records as
|
||||
explicitly tagged and adsb.lol records as "unlabeled" — making it look
|
||||
like adsb.lol wasn't even being used.
|
||||
|
||||
This caused user confusion ("only military planes have adsb.lol
|
||||
telemetry") that was diagnostic noise, not a real bug. The actual fix:
|
||||
stamp ``source`` at every fetch site so the downstream consumer can
|
||||
attribute the provider with no guesswork.
|
||||
|
||||
These tests pin:
|
||||
|
||||
* adsb.lol regional records get ``source: "adsb.lol"`` at fetch time
|
||||
(synthesized via the published flight dict).
|
||||
* OpenSky records get ``source: "OpenSky"`` (alongside the existing
|
||||
``is_opensky: True`` for backwards compat).
|
||||
* Supplementals (airplanes.live, adsb.fi) flow through with their
|
||||
``supplemental_source`` honored.
|
||||
* The military fetcher tags ``source`` on military_flights and uavs.
|
||||
* The published flight dict carries ``source`` so downstream code
|
||||
can render attribution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _classify_and_publish — source field flows into published flight dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClassifyAndPublishSource:
|
||||
def _reset_store(self):
|
||||
"""Clear store before each test so we get deterministic state."""
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
with _data_lock:
|
||||
for key in (
|
||||
"flights", "commercial_flights", "private_flights",
|
||||
"private_jets", "military_flights", "tracked_flights",
|
||||
):
|
||||
latest_data[key] = []
|
||||
return latest_data
|
||||
|
||||
def test_adsb_lol_record_tagged_in_published_flight(self, monkeypatch):
|
||||
"""A raw adsb.lol record (carrying ``source: 'adsb.lol'`` from the
|
||||
fetch site) flows through ``_classify_and_publish`` and the
|
||||
published flight dict carries the same ``source`` field."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
|
||||
# Patch route + type lookups so they don't try to hit the network.
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "ad7701",
|
||||
"flight": "JBU711",
|
||||
"r": "N967JT",
|
||||
"t": "A321",
|
||||
"lat": 40.0,
|
||||
"lon": -100.0,
|
||||
"alt_baro": 36000,
|
||||
"gs": 401.6,
|
||||
"nac_p": 9,
|
||||
"source": "adsb.lol", # stamped at fetch site
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
# nac_p still flows through too — sanity check that adding source
|
||||
# didn't break the existing GPS jamming signal.
|
||||
assert published[0]["nac_p"] == 9
|
||||
|
||||
def test_opensky_record_tagged_in_published_flight(self, monkeypatch):
|
||||
"""OpenSky-sourced records carry ``source: 'OpenSky'`` (plus the
|
||||
existing ``is_opensky: True`` for back-compat)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "a12345",
|
||||
"flight": "UAL100",
|
||||
"r": "N100UA",
|
||||
"t": "Unknown",
|
||||
"lat": 41.0,
|
||||
"lon": -87.0,
|
||||
"alt_baro": 35000,
|
||||
"gs": 450,
|
||||
# No nac_p — OpenSky doesn't carry it.
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "OpenSky"
|
||||
|
||||
def test_supplemental_source_propagates(self, monkeypatch):
|
||||
"""Supplemental records (airplanes.live, adsb.fi) have their
|
||||
legacy ``supplemental_source`` field promoted to the unified
|
||||
``source`` field in the published dict — so consumers don't have
|
||||
to inspect two different keys."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "b22222",
|
||||
"flight": "DAL200",
|
||||
"r": "N200DL",
|
||||
"t": "B738",
|
||||
"lat": 42.0,
|
||||
"lon": -90.0,
|
||||
"alt_baro": 32000,
|
||||
"gs": 420,
|
||||
"supplemental_source": "airplanes.live",
|
||||
# No explicit "source" — should fall through to
|
||||
# supplemental_source.
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "airplanes.live"
|
||||
|
||||
def test_explicit_source_wins_over_supplemental_source(self, monkeypatch):
|
||||
"""If both fields are present, explicit ``source`` wins (it's the
|
||||
newer canonical tag)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "c33333",
|
||||
"flight": "AAL300",
|
||||
"r": "N300AA",
|
||||
"t": "A321",
|
||||
"lat": 33.0,
|
||||
"lon": -97.0,
|
||||
"alt_baro": 34000,
|
||||
"gs": 430,
|
||||
"source": "adsb.lol",
|
||||
"supplemental_source": "adsb.fi",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
|
||||
def test_untagged_record_defaults_to_adsb_lol(self, monkeypatch):
|
||||
"""A record with neither ``source`` nor ``supplemental_source``
|
||||
(e.g. synthesized by a test, or a fetcher that hasn't been
|
||||
migrated yet) defaults to ``"adsb.lol"`` since that's been the
|
||||
primary source historically. Defensive default — better than
|
||||
empty string."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "d44444",
|
||||
"flight": "SWA400",
|
||||
"r": "N400SW",
|
||||
"t": "B737",
|
||||
"lat": 32.0,
|
||||
"lon": -110.0,
|
||||
"alt_baro": 30000,
|
||||
"gs": 410,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# adsb.lol regional fetcher tags at fetch time
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAdsbLolRegionalTagging:
|
||||
def test_fetch_region_stamps_source_on_each_aircraft(self, monkeypatch):
|
||||
"""The wrapper around the adsb.lol regional endpoint stamps
|
||||
``source: 'adsb.lol'`` on every record before returning, so the
|
||||
downstream merge step sees attribution survive even when the
|
||||
record gets reshuffled (e.g. dedupe-by-hex during OpenSky merge)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
|
||||
# Fake response — 3 aircraft, none have a source field originally.
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
def json(self):
|
||||
return {
|
||||
"ac": [
|
||||
{"hex": "a1", "lat": 40.0, "lon": -100.0, "nac_p": 8},
|
||||
{"hex": "a2", "lat": 40.1, "lon": -100.1, "nac_p": 9},
|
||||
{"hex": "a3", "lat": 40.2, "lon": -100.2, "nac_p": 10},
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
results = flights_module._fetch_adsb_lol_regions()
|
||||
|
||||
assert len(results) >= 3
|
||||
# Every aircraft we got back must be tagged.
|
||||
sources = {a.get("source") for a in results}
|
||||
assert sources == {"adsb.lol"}, (
|
||||
f"adsb.lol regional fetcher must stamp source on every record; "
|
||||
f"got: {sources}"
|
||||
)
|
||||
|
||||
def test_fetch_region_failure_returns_empty_without_crashing(self, monkeypatch):
|
||||
"""If adsb.lol returns non-200, the fetcher returns [] gracefully —
|
||||
downstream code already handles this. Sanity check that the source
|
||||
tagging doesn't introduce a new failure mode."""
|
||||
from services.fetchers import flights as flights_module
|
||||
|
||||
class FakeResp:
|
||||
status_code = 500
|
||||
def json(self): return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
results = flights_module._fetch_adsb_lol_regions()
|
||||
|
||||
assert results == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Military fetcher tags source on output dicts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMilitarySourceTagging:
|
||||
def test_military_output_carries_source_field(self, monkeypatch):
|
||||
"""Each entry in ``military_flights`` should carry a ``source``
|
||||
field. Pre-fix the only military attribution was inferring from
|
||||
which endpoint we hit; now it's explicit."""
|
||||
from services.fetchers import military as mil_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
# Reset relevant store state.
|
||||
with _data_lock:
|
||||
latest_data["military_flights"] = []
|
||||
latest_data["uavs"] = []
|
||||
latest_data["tracked_flights"] = []
|
||||
|
||||
# Stub _store.is_any_active so the fetch doesn't early-return.
|
||||
# The military module imports the function inline at call time,
|
||||
# so we have to patch it on the _store module itself rather than
|
||||
# on the military module.
|
||||
from services.fetchers import _store as store_module
|
||||
monkeypatch.setattr(store_module, "is_any_active", lambda *_: True)
|
||||
|
||||
# Stub fetch_with_curl to return one synthetic military aircraft
|
||||
# from adsb.lol, none from airplanes.live.
|
||||
class _RespMil:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {
|
||||
"ac": [
|
||||
{
|
||||
"hex": "ae6c1d",
|
||||
"flight": "CRUSH52",
|
||||
"r": "170281",
|
||||
"t": "C30J",
|
||||
"lat": 47.594,
|
||||
"lon": -124.879,
|
||||
"alt_baro": 9025,
|
||||
"gs": 162.8,
|
||||
"track": 334.5,
|
||||
"nac_p": 10,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
class _RespEmpty:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"ac": []}
|
||||
|
||||
def _fake_fetch(url, *a, **kw):
|
||||
if "adsb.lol" in url:
|
||||
return _RespMil()
|
||||
return _RespEmpty()
|
||||
|
||||
monkeypatch.setattr(mil_module, "fetch_with_curl", _fake_fetch)
|
||||
# Stubs for downstream enrichments that try to hit external state.
|
||||
monkeypatch.setattr(mil_module, "enrich_with_plane_alert", lambda mf: None)
|
||||
monkeypatch.setattr(mil_module, "_enrich_country", lambda hex_, flag: ("US", "USAF"))
|
||||
monkeypatch.setattr(mil_module, "_classify_military_type", lambda t: "transport")
|
||||
monkeypatch.setattr(mil_module, "_classify_uav", lambda m, c: (False, "", ""))
|
||||
monkeypatch.setattr(mil_module, "get_emissions_info", lambda model: None)
|
||||
monkeypatch.setattr(mil_module, "_mark_fresh", lambda *keys: None)
|
||||
|
||||
mil_module.fetch_military_flights()
|
||||
|
||||
with _data_lock:
|
||||
mil_published = list(latest_data.get("military_flights", []))
|
||||
|
||||
assert len(mil_published) == 1
|
||||
assert mil_published[0]["source"] == "adsb.lol"
|
||||
@@ -1,83 +0,0 @@
|
||||
"""GDELT's ``data.gdeltproject.org`` is a CNAME to a Google Cloud Storage
|
||||
bucket. GCS responds with the wildcard ``*.storage.googleapis.com``
|
||||
certificate, which legitimately does NOT cover the GDELT custom
|
||||
domain, so Python's TLS verification refuses the connection. Some
|
||||
networks happen to route through a path where this works; many
|
||||
(notably Docker Desktop's outbound NAT on local installs) do not.
|
||||
|
||||
The fix in ``services.geopolitics._gcs_direct_gdelt_url`` rewrites any
|
||||
URL pointing at ``data.gdeltproject.org`` to its GCS-direct equivalent
|
||||
(``storage.googleapis.com/data.gdeltproject.org/...``), where the
|
||||
standard GCS certificate is genuinely valid. ``api.gdeltproject.org``
|
||||
and every other host are left untouched.
|
||||
|
||||
These tests pin that behavior so a future refactor that drops the
|
||||
helper or accidentally rewrites the wrong host gets a loud failure.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_rewrites_data_gdeltproject_https():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
assert _gcs_direct_gdelt_url(
|
||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||
) == "https://storage.googleapis.com/data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||
|
||||
|
||||
def test_rewrites_data_gdeltproject_http():
|
||||
"""GDELT's lastupdate.txt sometimes lists URLs with http:// — we
|
||||
rewrite those too (the downstream call upgrades them to https)."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
assert _gcs_direct_gdelt_url(
|
||||
"http://data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||
) == "http://storage.googleapis.com/data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||
|
||||
|
||||
def test_rewrites_preserve_query_string_and_path():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
url = "https://data.gdeltproject.org/some/deep/path?a=1&b=2&c=hello%20world"
|
||||
rewritten = _gcs_direct_gdelt_url(url)
|
||||
assert rewritten == (
|
||||
"https://storage.googleapis.com/data.gdeltproject.org"
|
||||
"/some/deep/path?a=1&b=2&c=hello%20world"
|
||||
)
|
||||
|
||||
|
||||
def test_does_not_touch_api_gdeltproject_org():
|
||||
"""The API host is NOT a CNAME to GCS; rewriting it would break the
|
||||
actual GDELT API endpoint."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
url = "https://api.gdeltproject.org/api/v2/doc/doc?query=carrier"
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
|
||||
|
||||
def test_does_not_touch_other_hosts():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
for url in (
|
||||
"https://en.wikipedia.org/wiki/Boeing_747",
|
||||
"https://query.wikidata.org/sparql",
|
||||
"https://storage.googleapis.com/already-correct/path",
|
||||
"https://nominatim.openstreetmap.org/search",
|
||||
):
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
|
||||
|
||||
def test_does_not_partially_match_strings():
|
||||
"""``data.gdeltproject.org`` is matched exactly; URLs that merely
|
||||
contain that substring elsewhere (in a query parameter, for example)
|
||||
are left alone. Otherwise we'd rewrite something like
|
||||
``https://example.com/?ref=data.gdeltproject.org/x`` which is wrong."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
# The match requires ``://`` immediately before the host, so a host
|
||||
# like ``example-data.gdeltproject.org`` would also be left alone
|
||||
# (treated as a different host, which is correct).
|
||||
url = "https://example-data.gdeltproject.org/path"
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
@@ -1,333 +0,0 @@
|
||||
"""GPS jamming detection — nac_p=0 counted, lowered thresholds.
|
||||
|
||||
Background
|
||||
----------
|
||||
Pre-fix, the detector had three stacked filters that together meant the
|
||||
``gps_jamming`` layer almost never lit up:
|
||||
|
||||
1. ``nac_p == 0`` aircraft were dropped on the theory that "0 = old
|
||||
transponder." But modern Mode-S Enhanced Surveillance transponders
|
||||
also fall back to ``nac_p == 0`` when they lose GPS lock entirely —
|
||||
which is *exactly* the jamming signature we want to catch.
|
||||
2. ``GPS_JAMMING_MIN_AIRCRAFT = 5`` per 1°x1° cell.
|
||||
3. ``GPS_JAMMING_MIN_RATIO = 0.30`` adjusted ratio.
|
||||
|
||||
Combined with the existing ``-1`` noise cushion (``adjusted = degraded - 1``)
|
||||
the bar to clear required dense, busy airspace — but jamming hotspots
|
||||
(eastern Med, eastern Ukraine, Iran/Iraq) tend to have sparser traffic
|
||||
precisely because pilots avoid them.
|
||||
|
||||
These tests pin the new behavior:
|
||||
|
||||
* ``nac_p == 0`` is now counted as degraded.
|
||||
* ``nac_p == None`` (no field — typical for OpenSky records) is still
|
||||
skipped — absence isn't evidence.
|
||||
* Thresholds lowered to 3 aircraft / 0.20 ratio.
|
||||
* Public function signature accepts overrides so callers / future
|
||||
operators can re-tune without code edits.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# nac_p == 0 inclusion (the headline fix)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNacpZeroCounted:
|
||||
def test_cell_dominated_by_nacp_zero_now_fires(self):
|
||||
"""Three aircraft all reporting nac_p=0 in one cell, plus two
|
||||
with valid GPS. Pre-fix the three nac_p=0 records were skipped
|
||||
entirely (cell would have total=2, degraded=0, no zone). Post-fix
|
||||
they count as degraded — this IS the jamming signature."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
# All in 1°x1° cell at int(lat)=40, int(lng)=-100
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 0},
|
||||
{"hex": "a2", "lat": 40.5, "lng": -100.5, "nac_p": 0},
|
||||
{"hex": "a3", "lat": 40.9, "lng": -100.9, "nac_p": 0},
|
||||
{"hex": "b1", "lat": 40.2, "lng": -100.3, "nac_p": 9},
|
||||
{"hex": "b2", "lat": 40.7, "lng": -100.7, "nac_p": 11},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# total=5, degraded=3, adjusted=2, ratio=0.40 > 0.20 → zone fires.
|
||||
assert len(zones) == 1
|
||||
assert zones[0]["degraded"] == 3
|
||||
assert zones[0]["total"] == 5
|
||||
assert zones[0]["ratio"] == 0.40
|
||||
# Grid-cell center coords.
|
||||
assert zones[0]["lat"] == 40.5
|
||||
assert zones[0]["lng"] == -99.5
|
||||
|
||||
def test_nacp_zero_alone_clears_min_aircraft(self):
|
||||
"""A cell with exactly 3 aircraft all reporting nac_p=0 must
|
||||
fire under the new MIN_AIRCRAFT=3 + MIN_RATIO=0.20 regime."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 50.1, "lng": 30.1, "nac_p": 0},
|
||||
{"hex": "a2", "lat": 50.5, "lng": 30.5, "nac_p": 0},
|
||||
{"hex": "a3", "lat": 50.9, "lng": 30.9, "nac_p": 0},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# total=3, degraded=3, adjusted=2, ratio=0.667 > 0.20 → fires.
|
||||
# severity is "medium" because 0.5 ≤ ratio < 0.75.
|
||||
assert len(zones) == 1
|
||||
assert zones[0]["severity"] == "medium"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# nac_p == None is still skipped (preserve OpenSky behavior)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoneStillSkipped:
|
||||
def test_none_records_dont_add_to_grid(self):
|
||||
"""OpenSky's /states/all doesn't include nac_p, so its records
|
||||
arrive with the field absent (``rf.get("nac_p") is None``). These
|
||||
records must NOT count toward total — absence-of-data isn't
|
||||
evidence of either jamming OR working GPS."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
# 3 jammed + 4 OpenSky-style (no nac_p). Pre-fix and post-fix
|
||||
# behavior should be identical here: None always skipped.
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 0},
|
||||
{"hex": "a2", "lat": 40.2, "lng": -100.2, "nac_p": 0},
|
||||
{"hex": "a3", "lat": 40.3, "lng": -100.3, "nac_p": 0},
|
||||
# OpenSky-style: no nac_p at all
|
||||
{"hex": "o1", "lat": 40.4, "lng": -100.4},
|
||||
{"hex": "o2", "lat": 40.5, "lng": -100.5},
|
||||
{"hex": "o3", "lat": 40.6, "lng": -100.6},
|
||||
{"hex": "o4", "lat": 40.7, "lng": -100.7},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# Only the 3 nac_p=0 records hit the grid. total=3, not 7.
|
||||
assert len(zones) == 1
|
||||
assert zones[0]["total"] == 3
|
||||
assert zones[0]["degraded"] == 3
|
||||
|
||||
def test_explicit_none_skipped(self):
|
||||
"""Same behavior when ``nac_p`` is present but set to None
|
||||
(defensive — adsb.lol shouldn't do this, but downstream
|
||||
normalizers might)."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 0.1, "lng": 0.1, "nac_p": None},
|
||||
{"hex": "a2", "lat": 0.2, "lng": 0.2, "nac_p": None},
|
||||
{"hex": "a3", "lat": 0.3, "lng": 0.3, "nac_p": None},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# No records counted → no zones.
|
||||
assert zones == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lowered MIN_AIRCRAFT (5 → 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMinAircraftLowered:
|
||||
def test_three_aircraft_cell_now_qualifies(self):
|
||||
"""Pre-fix MIN_AIRCRAFT=5 blocked sparse cells entirely. Post-fix
|
||||
the bar is 3 aircraft per cell, which is realistic for the actual
|
||||
jamming hotspots where traffic is thinner."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 33.1, "lng": 44.1, "nac_p": 3},
|
||||
{"hex": "a2", "lat": 33.2, "lng": 44.2, "nac_p": 5},
|
||||
{"hex": "a3", "lat": 33.3, "lng": 44.3, "nac_p": 7},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# total=3, degraded=3, adjusted=2, ratio=0.667 — fires under new
|
||||
# rules, would have been blocked by MIN_AIRCRAFT=5 pre-fix.
|
||||
assert len(zones) == 1
|
||||
|
||||
def test_two_aircraft_cell_still_blocked(self):
|
||||
"""We didn't lower the bar to 2 — that would create too much
|
||||
single-transponder noise. Two aircraft per cell still doesn't
|
||||
qualify."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 33.1, "lng": 44.1, "nac_p": 3},
|
||||
{"hex": "a2", "lat": 33.2, "lng": 44.2, "nac_p": 3},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
assert zones == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lowered MIN_RATIO (0.30 → 0.20)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMinRatioLowered:
|
||||
def test_ratio_between_old_and_new_threshold_fires(self):
|
||||
"""Construct a cell whose ratio sits in the (0.20, 0.30) window:
|
||||
fires under the new bar, would have been blocked pre-fix."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
# 10 aircraft, 4 degraded → adjusted=3, ratio=3/10=0.30.
|
||||
# Pre-fix threshold was > 0.30 strict — would NOT fire.
|
||||
# Post-fix threshold is > 0.20 — fires.
|
||||
feed = (
|
||||
[{"hex": f"d{i}", "lat": 40.1, "lng": -100.1, "nac_p": 3} for i in range(4)]
|
||||
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(6)]
|
||||
)
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
assert len(zones) == 1
|
||||
assert zones[0]["degraded"] == 4
|
||||
assert zones[0]["total"] == 10
|
||||
assert zones[0]["ratio"] == 0.30
|
||||
|
||||
def test_ratio_at_or_below_new_threshold_does_not_fire(self):
|
||||
"""Ratio of exactly 0.20 must NOT fire (strict ``>`` comparison)."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
# 15 aircraft, 4 degraded → adjusted=3, ratio=3/15=0.20. Strictly
|
||||
# not greater than 0.20, so doesn't qualify.
|
||||
feed = (
|
||||
[{"hex": f"d{i}", "lat": 40.1, "lng": -100.1, "nac_p": 3} for i in range(4)]
|
||||
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(11)]
|
||||
)
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
assert zones == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-existing noise cushion (-1) preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoiseCushionPreserved:
|
||||
def test_single_quirky_transponder_doesnt_fire(self):
|
||||
"""One degraded aircraft in a healthy cell shouldn't fire even
|
||||
under the relaxed thresholds. The ``-1`` adjustment in the
|
||||
detector exists for this reason."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = (
|
||||
[{"hex": "d1", "lat": 40.1, "lng": -100.1, "nac_p": 3}]
|
||||
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(10)]
|
||||
)
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
# total=11, degraded=1, adjusted=0 → cell short-circuits.
|
||||
assert zones == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants pinned (catches accidental rollback)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConstantsPinned:
|
||||
def test_min_aircraft_is_three(self):
|
||||
from services.constants import GPS_JAMMING_MIN_AIRCRAFT
|
||||
assert GPS_JAMMING_MIN_AIRCRAFT == 3, (
|
||||
"MIN_AIRCRAFT must be 3; raising it back to 5 brings back the "
|
||||
"'jamming never shows' bug."
|
||||
)
|
||||
|
||||
def test_min_ratio_is_0_20(self):
|
||||
from services.constants import GPS_JAMMING_MIN_RATIO
|
||||
assert GPS_JAMMING_MIN_RATIO == 0.20, (
|
||||
"MIN_RATIO must be 0.20; raising it back to 0.30 brings back "
|
||||
"the 'jamming never shows' bug."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Overrides honored
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOverridesHonored:
|
||||
def test_overrides_supersede_constants(self):
|
||||
"""The public signature accepts overrides so an operator can
|
||||
re-tune at the call site (e.g. for a more aggressive setup in
|
||||
an active conflict zone) without editing the module constants."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 3},
|
||||
{"hex": "a2", "lat": 40.2, "lng": -100.2, "nac_p": 3},
|
||||
]
|
||||
|
||||
# With defaults (min_aircraft=3) this is blocked. With override=2 it fires.
|
||||
assert detect_gps_jamming_zones(feed) == []
|
||||
zones = detect_gps_jamming_zones(feed, min_aircraft=2)
|
||||
assert len(zones) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# lon vs lng compatibility
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLonLngCompat:
|
||||
def test_lon_key_accepted(self):
|
||||
"""adsb.lol records arrive with ``lon`` (no g). The OpenSky merge
|
||||
normalizes to ``lng`` but raw records flowing into the detector
|
||||
may use either. Make sure both work."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
|
||||
feed = [
|
||||
{"hex": "a1", "lat": 40.1, "lon": -100.1, "nac_p": 0},
|
||||
{"hex": "a2", "lat": 40.2, "lon": -100.2, "nac_p": 0},
|
||||
{"hex": "a3", "lat": 40.3, "lon": -100.3, "nac_p": 0},
|
||||
]
|
||||
|
||||
zones = detect_gps_jamming_zones(feed)
|
||||
|
||||
assert len(zones) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Empty / malformed inputs don't crash
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRobustness:
|
||||
def test_empty_feed(self):
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
assert detect_gps_jamming_zones([]) == []
|
||||
|
||||
def test_none_feed(self):
|
||||
"""The wrapper at the call site passes ``raw_flights_snapshot``
|
||||
which could in principle be None on a startup race. Handle it."""
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
assert detect_gps_jamming_zones(None) == []
|
||||
|
||||
def test_records_missing_position_skipped(self):
|
||||
from services.fetchers.flights import detect_gps_jamming_zones
|
||||
feed = [
|
||||
{"hex": "noloc", "nac_p": 0},
|
||||
{"hex": "nolat", "lng": -100.0, "nac_p": 0},
|
||||
{"hex": "nolng", "lat": 40.0, "nac_p": 0},
|
||||
]
|
||||
assert detect_gps_jamming_zones(feed) == []
|
||||
@@ -1,67 +0,0 @@
|
||||
"""Regression tests for GitHub #375 production-readiness fixes."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestDevBindHost:
|
||||
def test_defaults_to_loopback(self, monkeypatch):
|
||||
monkeypatch.delenv("SHADOWBROKER_DEV_BIND_ALL", raising=False)
|
||||
from main import _dev_uvicorn_bind_host
|
||||
|
||||
assert _dev_uvicorn_bind_host() == "127.0.0.1"
|
||||
|
||||
@pytest.mark.parametrize("value", ("1", "true", "yes", "on", "TRUE"))
|
||||
def test_bind_all_opt_in(self, monkeypatch, value):
|
||||
monkeypatch.setenv("SHADOWBROKER_DEV_BIND_ALL", value)
|
||||
from main import _dev_uvicorn_bind_host
|
||||
|
||||
assert _dev_uvicorn_bind_host() == "0.0.0.0"
|
||||
|
||||
|
||||
class TestDataStoreSnapshots:
|
||||
def test_deepcopy_snapshot_isolated_from_store(self):
|
||||
from services.fetchers import _store
|
||||
|
||||
original = [{"title": "baseline"}]
|
||||
with _store._data_lock:
|
||||
_store.latest_data["news"] = list(original)
|
||||
snap = _store.get_latest_data_deepcopy_snapshot()
|
||||
snap["news"][0]["title"] = "mutated"
|
||||
with _store._data_lock:
|
||||
assert _store.latest_data["news"][0]["title"] == "baseline"
|
||||
|
||||
def test_subset_deepcopy_isolated(self):
|
||||
from services.fetchers import _store
|
||||
|
||||
with _store._data_lock:
|
||||
_store.latest_data["news"] = [{"title": "subset"}]
|
||||
snap = _store.get_latest_data_subset("news")
|
||||
snap["news"][0]["title"] = "changed"
|
||||
with _store._data_lock:
|
||||
assert _store.latest_data["news"][0]["title"] == "subset"
|
||||
|
||||
|
||||
class TestHeavyFetchExecutorRouting:
|
||||
def test_slow_tier_uses_slow_executor(self):
|
||||
from services.data_fetcher import (
|
||||
_SLOW_EXECUTOR,
|
||||
_SHARED_EXECUTOR,
|
||||
_executor_for_task_label,
|
||||
)
|
||||
|
||||
assert _executor_for_task_label("slow-tier-refresh") is _SLOW_EXECUTOR
|
||||
assert _executor_for_task_label("startup-heavy-warm") is _SLOW_EXECUTOR
|
||||
assert _executor_for_task_label("fast-tier-refresh") is _SHARED_EXECUTOR
|
||||
|
||||
|
||||
class TestLiveDataFullEndpoint:
|
||||
def test_live_data_supports_etag_304(self, client):
|
||||
r1 = client.get("/api/live-data")
|
||||
assert r1.status_code == 200
|
||||
etag = r1.headers.get("etag")
|
||||
assert etag
|
||||
r2 = client.get("/api/live-data", headers={"If-None-Match": etag})
|
||||
assert r2.status_code == 304
|
||||
assert r2.headers.get("etag") == etag
|
||||
@@ -1,29 +0,0 @@
|
||||
"""KiwiSDR mirror prefers HTTPS (#364)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from services.kiwisdr_fetcher import (
|
||||
_SOURCE_URL_HTTP,
|
||||
_SOURCE_URL_HTTPS,
|
||||
_fetch_mirror_payload_text,
|
||||
)
|
||||
|
||||
|
||||
def test_fetch_mirror_tries_https_before_http():
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(url)
|
||||
if url == _SOURCE_URL_HTTPS:
|
||||
raise ConnectionError("tls not available")
|
||||
res = MagicMock()
|
||||
res.status_code = 200
|
||||
res.text = "var kiwisdr_com = [];"
|
||||
return res
|
||||
|
||||
with patch("services.network_utils.fetch_with_curl", side_effect=fake_fetch):
|
||||
body = _fetch_mirror_payload_text()
|
||||
|
||||
assert body == "var kiwisdr_com = [];"
|
||||
assert calls == [_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user