mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-13 09:37:53 +02:00
Compare commits
65 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c8fdfbdceb | |||
| bb7d7f6f83 | |||
| a1af9c3595 | |||
| c8a8fc56f8 | |||
| e6aba86ce1 | |||
| d5609ac02f | |||
| 1d7fa5185a | |||
| fb97042c01 | |||
| 2616a6c9e3 | |||
| a930497e14 | |||
| 2dc1fcc778 | |||
| 896d1ae938 | |||
| 8dfa6a7199 | |||
| ef6b8ec181 | |||
| dcea325fba | |||
| 03b8053617 | |||
| 20807a2d62 | |||
| 79fbf9741b | |||
| a2f5d62926 | |||
| 5e0b2c037e | |||
| 69ef231e5a | |||
| 7a5f47ca9e | |||
| 5cd49542bf | |||
| f14d4feb6d | |||
| 19a8560a80 | |||
| 0d0e009867 | |||
| febcce9125 | |||
| 31ebcb5cd9 | |||
| b3fca3dc18 | |||
| 401f114e4f | |||
| 79b39e8985 | |||
| c3e38621fc | |||
| 9ef02dd06f | |||
| ba39d3b9aa | |||
| f91ddcf38b | |||
| 49151d8b9f | |||
| 767a2f6c00 | |||
| 2da739c9e8 | |||
| eca7f24e2c | |||
| 7bfaad17f0 | |||
| e3efcfd476 | |||
| 32b8421a1c | |||
| bc70cc3527 | |||
| 44e9b38ac2 | |||
| b01a69c172 | |||
| b041b5e97c | |||
| c54ea7fd9f | |||
| a3aa7b4dec | |||
| 19fb7f0b1e | |||
| 35cd4e4c71 | |||
| 31f79fd8e2 | |||
| fd7d6fa401 | |||
| 49621824b1 | |||
| 76750caa92 | |||
| c3ef9f4b9e | |||
| 5e6bb8511a | |||
| 0fee36e8f7 | |||
| e125467721 | |||
| 2b03b808ac | |||
| 2e14e75a0e | |||
| 084e563412 | |||
| 9ef6213284 | |||
| fb11e0881f | |||
| 7f96151e56 | |||
| d0299fc0a0 |
@@ -7,6 +7,28 @@ on:
|
|||||||
branches: [main]
|
branches: [main]
|
||||||
workflow_call:
|
workflow_call:
|
||||||
|
|
||||||
|
# CI flake mitigation:
|
||||||
|
# ci.yml is triggered TWICE per PR on the same commit — once directly via
|
||||||
|
# the `pull_request` trigger above ("Frontend Tests & Build" check) and once
|
||||||
|
# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests &
|
||||||
|
# Build" check). Both jobs land on the same Actions runner pool at the same
|
||||||
|
# time and fight for CPU/RAM. Under contention, React's reconciliation in
|
||||||
|
# `messagesViewFirstContact.test.tsx > removes an approved contact …`
|
||||||
|
# overruns its 5s waitFor timeout — that's the single failure mode we've
|
||||||
|
# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the
|
||||||
|
# fd7d6fa push. Backend tests and every other frontend test pass under
|
||||||
|
# the same conditions, which is what made this look random.
|
||||||
|
#
|
||||||
|
# Pinning a concurrency group on the SHA (PR head, or the pushed commit
|
||||||
|
# for main) serializes the two invocations so neither starves the other.
|
||||||
|
# We use cancel-in-progress: false so the second one queues instead of
|
||||||
|
# cancelling — cancelling could leave the PR check stuck "Expected" if
|
||||||
|
# only one of the two ever finishes. Total CI time grows by ~2 min in
|
||||||
|
# exchange for deterministic outcomes.
|
||||||
|
concurrency:
|
||||||
|
group: ci-${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
frontend:
|
frontend:
|
||||||
name: Frontend Tests & Build
|
name: Frontend Tests & Build
|
||||||
|
|||||||
+37
@@ -101,6 +101,14 @@ backend/data/*
|
|||||||
# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream
|
# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream
|
||||||
# Let's Encrypt renewal failures without disabling TLS validation entirely.
|
# Let's Encrypt renewal failures without disabling TLS validation entirely.
|
||||||
!backend/data/aisstream_spki_pins.json
|
!backend/data/aisstream_spki_pins.json
|
||||||
|
# Issue #231: pinned SHA-256 digests for known release archives. Used by
|
||||||
|
# the self-updater as a second-line integrity check when the release's
|
||||||
|
# SHA256SUMS.txt asset can't be fetched.
|
||||||
|
!backend/data/release_digests.json
|
||||||
|
# Issue #244/#245/#246: one-shot carrier-position seed shipped with each
|
||||||
|
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||||
|
# after that the cache reflects this install's own GDELT observations.
|
||||||
|
!backend/data/carrier_seed.json
|
||||||
|
|
||||||
# OS generated files
|
# OS generated files
|
||||||
.DS_Store
|
.DS_Store
|
||||||
@@ -253,3 +261,32 @@ backend/data/wormhole_stdout.log
|
|||||||
|
|
||||||
# Compressed snapshot archives (can be 100 MB+)
|
# Compressed snapshot archives (can be 100 MB+)
|
||||||
*.json.gz
|
*.json.gz
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# AI assistant / coding-agent scratch
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# Per-tool config + scratch directories. These are private to whichever
|
||||||
|
# coding agent the operator happens to be using and have no business in
|
||||||
|
# the repo. If a tool's instructions need to be canonical for the project,
|
||||||
|
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||||
|
# repo root.
|
||||||
|
|
||||||
|
# OpenAI Codex CLI
|
||||||
|
.codex/
|
||||||
|
.codex-app-schema/
|
||||||
|
.codex-app-ts/
|
||||||
|
|
||||||
|
# Per-agent instruction files dropped at repo root by various tools.
|
||||||
|
# These are operator-side preferences, not part of the project contract.
|
||||||
|
AGENTS.md
|
||||||
|
GEMINI.md
|
||||||
|
CLAUDE.md
|
||||||
|
.github/copilot-instructions.md
|
||||||
|
|
||||||
|
# Stale AI-generated test file that referenced fields that don't exist in
|
||||||
|
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||||
|
# ignored so it doesn't accidentally get committed if it shows up again
|
||||||
|
# from a tool that's working off an out-of-date understanding of the
|
||||||
|
# module. If a real test for that function is needed, write it under a
|
||||||
|
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||||
|
backend/tests/test_carrier_tracker_region_centers.py
|
||||||
|
|||||||
+42
-12
@@ -13,13 +13,22 @@
|
|||||||
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
||||||
# on GitLab) so the two sources stay in sync.
|
# on GitLab) so the two sources stay in sync.
|
||||||
#
|
#
|
||||||
|
# Pipelines on this repo were instant-failing for free-tier accounts until
|
||||||
|
# identity verification was added — the May 2026 bump in this comment is
|
||||||
|
# the marker commit that confirms runner allocation after verification.
|
||||||
|
#
|
||||||
# Auth notes:
|
# Auth notes:
|
||||||
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
||||||
# automatically. No credentials need to be configured.
|
# automatically. No credentials need to be configured.
|
||||||
# - The reverse mirror requires a GitHub personal access token stored
|
# - The reverse mirror authenticates to GitHub via a per-repo SSH
|
||||||
# as the GitLab CI/CD variable GITHUB_MIRROR_TOKEN (Protected + Masked).
|
# deploy key. The private half is stored as the File-type GitLab
|
||||||
# Scope: public_repo (or repo for private). If the variable isn't
|
# CI/CD variable GITHUB_MIRROR_SSH_KEY (Protected). The matching
|
||||||
# set the mirror job is skipped — image builds still run.
|
# public key is added to github.com/BigBodyCobain/Shadowbroker/
|
||||||
|
# settings/keys with write access. This is a tighter-scoped
|
||||||
|
# replacement for a personal access token: it can ONLY push to
|
||||||
|
# Shadowbroker, never expires, and rotating it is a one-click
|
||||||
|
# delete on GitHub's deploy-keys page. If the variable isn't set,
|
||||||
|
# the mirror job is skipped — image builds still run.
|
||||||
|
|
||||||
stages:
|
stages:
|
||||||
- build
|
- build
|
||||||
@@ -48,7 +57,11 @@ variables:
|
|||||||
- docker info
|
- docker info
|
||||||
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
||||||
- docker run --privileged --rm tonistiigi/binfmt --install all
|
- docker run --privileged --rm tonistiigi/binfmt --install all
|
||||||
- docker buildx create --use --name multiarch --driver docker-container
|
# buildx --driver docker-container can't read TLS from the env vars
|
||||||
|
# the GitLab dind service exports. Wrap them in a docker context and
|
||||||
|
# bind buildx to it. See https://docs.gitlab.com/ee/ci/docker/using_docker_build.html#use-docker-buildx
|
||||||
|
- docker context create tls-env
|
||||||
|
- docker buildx create --use --name multiarch --driver docker-container tls-env
|
||||||
|
|
||||||
# ── Backend image ────────────────────────────────────────────────────────
|
# ── Backend image ────────────────────────────────────────────────────────
|
||||||
build-backend:
|
build-backend:
|
||||||
@@ -93,18 +106,35 @@ build-frontend:
|
|||||||
- .gitlab-ci.yml
|
- .gitlab-ci.yml
|
||||||
|
|
||||||
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
||||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker.
|
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker via SSH
|
||||||
# Fast-forward-only — if GitLab main and GitHub main have diverged, this
|
# using a per-repo deploy key. Fast-forward-only by default — if GitLab
|
||||||
# fails loudly rather than silently overwriting either side.
|
# main and GitHub main have diverged, the push fails loudly rather than
|
||||||
|
# silently overwriting either side.
|
||||||
#
|
#
|
||||||
# Only runs if GITHUB_MIRROR_TOKEN is set as a CI/CD variable. See the
|
# Only runs if GITHUB_MIRROR_SSH_KEY is set as a File-type CI/CD variable.
|
||||||
# header comment of this file for setup instructions.
|
# See the header comment of this file for setup instructions.
|
||||||
mirror-to-github:
|
mirror-to-github:
|
||||||
stage: mirror
|
stage: mirror
|
||||||
image: alpine:3.20
|
image: alpine:3.20
|
||||||
needs: []
|
needs: []
|
||||||
before_script:
|
before_script:
|
||||||
- apk add --no-cache git openssh-client ca-certificates
|
- apk add --no-cache git openssh-client ca-certificates
|
||||||
|
- mkdir -p ~/.ssh
|
||||||
|
- chmod 700 ~/.ssh
|
||||||
|
# Install the deploy key. File-type CI variable exposes the path; copy
|
||||||
|
# to ~/.ssh/id_ed25519 with restrictive perms so ssh accepts it.
|
||||||
|
- cp "$GITHUB_MIRROR_SSH_KEY" ~/.ssh/id_ed25519
|
||||||
|
- chmod 600 ~/.ssh/id_ed25519
|
||||||
|
# Pin github.com's current host keys so we never trust a man-in-the-
|
||||||
|
# middle. Sourced from https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints
|
||||||
|
# (rotated 2023-03-24 after the previous RSA key leak).
|
||||||
|
- |
|
||||||
|
cat > ~/.ssh/known_hosts <<'EOF'
|
||||||
|
github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
|
||||||
|
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
|
||||||
|
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=
|
||||||
|
EOF
|
||||||
|
- chmod 644 ~/.ssh/known_hosts
|
||||||
script:
|
script:
|
||||||
- git config --global user.email "ci-mirror@gitlab.com"
|
- git config --global user.email "ci-mirror@gitlab.com"
|
||||||
- git config --global user.name "GitLab CI Mirror"
|
- git config --global user.name "GitLab CI Mirror"
|
||||||
@@ -115,7 +145,7 @@ mirror-to-github:
|
|||||||
- cd repo
|
- cd repo
|
||||||
- >
|
- >
|
||||||
git push
|
git push
|
||||||
"https://x-access-token:${GITHUB_MIRROR_TOKEN}@github.com/BigBodyCobain/Shadowbroker.git"
|
"git@github.com:BigBodyCobain/Shadowbroker.git"
|
||||||
"${CI_COMMIT_SHA}:refs/heads/main"
|
"${CI_COMMIT_SHA}:refs/heads/main"
|
||||||
rules:
|
rules:
|
||||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_TOKEN
|
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_SSH_KEY
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ ShadowBroker v0.9.7 ships **InfoNet** (decentralized intelligence mesh + Soverei
|
|||||||
| Channel | Privacy Status | Details |
|
| Channel | Privacy Status | Details |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| **Meshtastic / APRS** | **PUBLIC** | RF radio transmissions are public and interceptable by design. |
|
| **Meshtastic / APRS** | **PUBLIC** | RF radio transmissions are public and interceptable by design. |
|
||||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden. |
|
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden despite being designed through Tor and Reticulum (Work in progress). |
|
||||||
| **Dead Drop DMs** | **STRONGEST CURRENT LANE** | Token-based epoch mailbox with SAS word verification. Strongest lane in this build, but not yet confidently private. |
|
| **Dead Drop DMs** | **STRONGEST CURRENT LANE** | Token-based epoch mailbox with SAS word verification. Strongest lane in this build, but not yet confidently private. |
|
||||||
| **Sovereign Shell governance** | **PUBLIC LEDGER** | Petitions, votes, upgrade hashes, and dispute stakes are signed events on a public hashchain. Pseudonymous via gate persona, but governance actions are intentionally observable. |
|
| **Sovereign Shell governance** | **PUBLIC LEDGER** | Petitions, votes, upgrade hashes, and dispute stakes are signed events on a public hashchain. Pseudonymous via gate persona, but governance actions are intentionally observable. |
|
||||||
| **Privacy primitives (RingCT / stealth / DEX)** | **NOT YET WIRED** | Locked Protocol contracts are in place, but the cryptographic scheme has not been chosen. The privacy-core Rust crate is the integration target for a future sprint. |
|
| **Privacy primitives (RingCT / stealth / DEX)** | **NOT YET WIRED** | Locked Protocol contracts are in place, but the cryptographic scheme has not been chosen. The privacy-core Rust crate is the integration target for a future sprint. |
|
||||||
@@ -199,7 +199,7 @@ The first decentralized intelligence communication and governance layer built di
|
|||||||
|
|
||||||
**Communication layer (since v0.9.6):**
|
**Communication layer (since v0.9.6):**
|
||||||
|
|
||||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
* **InfoNet Experimental Testnet** — A global, obfuscated message relay using Tor and Reticulum. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||||
* **Mesh Chat Panel** — Three-tab interface: **INFONET** (gate chat with obfuscated transport), **MESH** (Meshtastic radio integration), **DEAD DROP** (peer-to-peer message exchange with token-based epoch mailboxes — strongest current lane).
|
* **Mesh Chat Panel** — Three-tab interface: **INFONET** (gate chat with obfuscated transport), **MESH** (Meshtastic radio integration), **DEAD DROP** (peer-to-peer message exchange with token-based epoch mailboxes — strongest current lane).
|
||||||
* **Gate Persona System** — Pseudonymous identities with Ed25519 signing keys, prekey bundles, SAS word contact verification, and abuse reporting.
|
* **Gate Persona System** — Pseudonymous identities with Ed25519 signing keys, prekey bundles, SAS word contact verification, and abuse reporting.
|
||||||
* **Mesh Terminal** — Built-in CLI: `send`, `dm`, market commands, gate state inspection. Draggable panel, minimizes to the top bar. Type `help` to see all commands.
|
* **Mesh Terminal** — Built-in CLI: `send`, `dm`, market commands, gate state inspection. Draggable panel, minimizes to the top bar. Type `help` to see all commands.
|
||||||
|
|||||||
+28
-7
@@ -11,6 +11,13 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
|||||||
|
|
||||||
# ── Optional ───────────────────────────────────────────────────
|
# ── Optional ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# AISHub REST fallback. Used when stream.aisstream.io is unreachable
|
||||||
|
# (e.g. their cert expires or server goes offline). Free tier requires
|
||||||
|
# registration at https://www.aishub.net/api. Poll cadence defaults to
|
||||||
|
# 20 min to stay courteous; tunable via AISHUB_POLL_INTERVAL_MINUTES.
|
||||||
|
# AISHUB_USERNAME=
|
||||||
|
# AISHUB_POLL_INTERVAL_MINUTES=20
|
||||||
|
|
||||||
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
||||||
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
||||||
|
|
||||||
@@ -24,14 +31,28 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
|||||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||||
# ALLOW_INSECURE_ADMIN=false
|
# ALLOW_INSECURE_ADMIN=false
|
||||||
|
|
||||||
# Default outbound User-Agent for all third-party HTTP fetchers.
|
# Per-install operator handle. Round 7a: every outbound third-party API
|
||||||
# Project-generic by default — does NOT include any personal contact info or
|
# call (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||||
# operator-specific identifier. Override only if you run a public relay and
|
# weather.gov, NUFORC, etc.) includes this handle in the User-Agent so
|
||||||
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
|
# upstreams can rate-limit / contact the specific install instead of
|
||||||
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
|
# treating every Shadowbroker user as one entity.
|
||||||
|
#
|
||||||
|
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||||
|
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||||
|
# Operators who want a meaningful handle (real name, org, GitHub login) can
|
||||||
|
# set it here. Special characters are sanitized to dashes.
|
||||||
|
# OPERATOR_HANDLE=
|
||||||
|
|
||||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
# Default outbound User-Agent for all third-party HTTP fetchers. Operators
|
||||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
|
# who run a public relay and want a completely custom UA can set this; it
|
||||||
|
# bypasses the per-operator helper entirely. Most installs should leave it
|
||||||
|
# unset and use OPERATOR_HANDLE instead.
|
||||||
|
# SHADOWBROKER_USER_AGENT=
|
||||||
|
|
||||||
|
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||||
|
# use the per-install handle (default) — set only if you have a registered
|
||||||
|
# Nominatim relay identity.
|
||||||
|
# NOMINATIM_USER_AGENT=
|
||||||
|
|
||||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||||
# These data sources phone home to politically/commercially sensitive
|
# These data sources phone home to politically/commercially sensitive
|
||||||
|
|||||||
+89
-9
@@ -45,6 +45,7 @@ from services.mesh.mesh_compatibility import (
|
|||||||
from services.mesh.mesh_crypto import (
|
from services.mesh.mesh_crypto import (
|
||||||
_derive_peer_key,
|
_derive_peer_key,
|
||||||
normalize_peer_url,
|
normalize_peer_url,
|
||||||
|
resolve_peer_key_for_url,
|
||||||
verify_signature,
|
verify_signature,
|
||||||
verify_node_binding,
|
verify_node_binding,
|
||||||
parse_public_key_algo,
|
parse_public_key_algo,
|
||||||
@@ -245,15 +246,90 @@ def _docker_bridge_local_operator_enabled() -> bool:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Issue #250 (tg12): the previous implementation returned True for any IP
|
||||||
|
# in the entire 172.16.0.0/12 range. Anyone with `docker run` access on
|
||||||
|
# the same daemon could spin up a container that automatically passed
|
||||||
|
# local-operator auth. The fix narrows trust to ONLY connections whose
|
||||||
|
# source IP matches the configured frontend container's hostname.
|
||||||
|
#
|
||||||
|
# Docker DNS resolves both the compose service name (``frontend``) and
|
||||||
|
# the explicit ``container_name`` (``shadowbroker-frontend``) to the
|
||||||
|
# frontend container's bridge IP. We forward-resolve both, cache the
|
||||||
|
# result for 30s, and only trust connections from those exact IPs.
|
||||||
|
#
|
||||||
|
# Operators on shared Docker hosts get the benefit of the narrower
|
||||||
|
# surface. Operators on single-user installs see no behavior change —
|
||||||
|
# their frontend container still resolves and is still trusted.
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE: dict = {"ips": frozenset(), "expires": 0.0}
|
||||||
|
_DOCKER_BRIDGE_TRUST_TTL = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
def _trusted_bridge_frontend_hostnames() -> list[str]:
|
||||||
|
"""Container hostnames whose IPs we treat as local-operator on the bridge.
|
||||||
|
|
||||||
|
Default covers both Docker Compose service name (``frontend``) and the
|
||||||
|
explicit ``container_name`` from the shipped docker-compose.yml
|
||||||
|
(``shadowbroker-frontend``). Operators with non-default names can
|
||||||
|
override via the ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS`` env var
|
||||||
|
(comma-separated, no spaces).
|
||||||
|
"""
|
||||||
|
raw = str(
|
||||||
|
os.environ.get(
|
||||||
|
"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS",
|
||||||
|
"frontend,shadowbroker-frontend",
|
||||||
|
)
|
||||||
|
).strip()
|
||||||
|
return [h.strip() for h in raw.split(",") if h.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_trusted_bridge_ips() -> frozenset[str]:
|
||||||
|
"""Resolve trusted frontend hostnames to a set of IPs, with caching.
|
||||||
|
|
||||||
|
Cached for 30s so we don't hit DNS on every request. The cache is
|
||||||
|
process-local — frontend container IP rotations during a backend's
|
||||||
|
lifetime will be picked up within 30s.
|
||||||
|
|
||||||
|
Returns frozenset() if Docker DNS can't resolve any of the configured
|
||||||
|
hostnames (fail-closed — when in doubt, refuse to trust the bridge).
|
||||||
|
"""
|
||||||
|
import socket
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
now = _time.time()
|
||||||
|
cache = _DOCKER_BRIDGE_TRUST_CACHE
|
||||||
|
if cache["expires"] > now:
|
||||||
|
return cache["ips"]
|
||||||
|
|
||||||
|
ips: set[str] = set()
|
||||||
|
for hostname in _trusted_bridge_frontend_hostnames():
|
||||||
|
try:
|
||||||
|
_, _, addrs = socket.gethostbyname_ex(hostname)
|
||||||
|
except (OSError, socket.gaierror):
|
||||||
|
continue
|
||||||
|
for addr in addrs:
|
||||||
|
ips.add(addr)
|
||||||
|
|
||||||
|
resolved = frozenset(ips)
|
||||||
|
cache["ips"] = resolved
|
||||||
|
cache["expires"] = now + _DOCKER_BRIDGE_TRUST_TTL
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
def _is_docker_bridge_host(host: str) -> bool:
|
def _is_docker_bridge_host(host: str) -> bool:
|
||||||
|
"""Return True only when the source IP matches our trusted frontend
|
||||||
|
container hostname(s).
|
||||||
|
|
||||||
|
Previously trusted any 172.16.0.0/12 IP unconditionally. See the
|
||||||
|
block comment above for the security rationale.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
ip = ipaddress.ip_address(host)
|
ip = ipaddress.ip_address(host)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
# Docker Desktop and the default compose bridge normally sit inside
|
# Public IPs are never our frontend container — skip DNS work for them.
|
||||||
# 172.16.0.0/12. Keep this narrower than "any private IP" so a user who
|
if not ip.is_private:
|
||||||
# intentionally binds the backend to LAN does not silently trust LAN clients.
|
return False
|
||||||
return ip in ipaddress.ip_network("172.16.0.0/12")
|
return host in _resolve_trusted_bridge_ips()
|
||||||
|
|
||||||
|
|
||||||
def _is_trusted_local_runtime_host(host: str) -> bool:
|
def _is_trusted_local_runtime_host(host: str) -> bool:
|
||||||
@@ -1328,11 +1404,15 @@ def _peer_hmac_url_from_request(request: Request) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||||
"""Verify HMAC-SHA256 peer authentication on push requests."""
|
"""Verify HMAC-SHA256 peer authentication on push requests.
|
||||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
|
||||||
if not secret:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
Issue #256: ``resolve_peer_key_for_url`` looks up a per-peer secret
|
||||||
|
in ``MESH_PEER_SECRETS`` first, then falls back to the global
|
||||||
|
``MESH_PEER_PUSH_SECRET``. When a peer URL is listed in the per-peer
|
||||||
|
map, only the listed secret is accepted for it — the global secret
|
||||||
|
is ignored, so any peer that knows only the global secret cannot
|
||||||
|
forge a request claiming to be that peer.
|
||||||
|
"""
|
||||||
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
||||||
if not provided:
|
if not provided:
|
||||||
return False
|
return False
|
||||||
@@ -1341,7 +1421,7 @@ def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
|||||||
allowed_peers = set(authenticated_push_peer_urls())
|
allowed_peers = set(authenticated_push_peer_urls())
|
||||||
if not peer_url or peer_url not in allowed_peers:
|
if not peer_url or peer_url not in allowed_peers:
|
||||||
return False
|
return False
|
||||||
peer_key = _derive_peer_key(secret, peer_url)
|
peer_key = resolve_peer_key_for_url(peer_url)
|
||||||
if not peer_key:
|
if not peer_key:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,120 @@
|
|||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"as_of": "2026-03-09",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker",
|
||||||
|
"source_url": "https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026",
|
||||||
|
"note": "One-shot bootstrap for first-run carrier positions. Once carrier_cache.json exists in the runtime data volume, this seed file is never read again. All subsequent updates come from GDELT (and any future sources) and are written to carrier_cache.json. A year from now, your runtime cache reflects whatever your install has observed since first launch — not these snapshot positions."
|
||||||
|
},
|
||||||
|
"carriers": {
|
||||||
|
"CVN-68": {
|
||||||
|
"lat": 47.5535,
|
||||||
|
"lng": -122.6400,
|
||||||
|
"heading": 90,
|
||||||
|
"desc": "Bremerton, WA (Maintenance)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-76": {
|
||||||
|
"lat": 47.5580,
|
||||||
|
"lng": -122.6360,
|
||||||
|
"heading": 90,
|
||||||
|
"desc": "Bremerton, WA (Decommissioning)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-69": {
|
||||||
|
"lat": 36.9465,
|
||||||
|
"lng": -76.3265,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-78": {
|
||||||
|
"lat": 18.0,
|
||||||
|
"lng": 39.5,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-74": {
|
||||||
|
"lat": 36.98,
|
||||||
|
"lng": -76.43,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-75": {
|
||||||
|
"lat": 36.0,
|
||||||
|
"lng": 15.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-77": {
|
||||||
|
"lat": 36.5,
|
||||||
|
"lng": -74.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-70": {
|
||||||
|
"lat": 32.6840,
|
||||||
|
"lng": -117.1290,
|
||||||
|
"heading": 180,
|
||||||
|
"desc": "San Diego, CA (Homeport)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-71": {
|
||||||
|
"lat": 32.6885,
|
||||||
|
"lng": -117.1280,
|
||||||
|
"heading": 180,
|
||||||
|
"desc": "San Diego, CA (Maintenance)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-72": {
|
||||||
|
"lat": 20.0,
|
||||||
|
"lng": 64.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
},
|
||||||
|
"CVN-73": {
|
||||||
|
"lat": 35.2830,
|
||||||
|
"lng": 139.6700,
|
||||||
|
"heading": 180,
|
||||||
|
"desc": "Yokosuka, Japan (Forward deployed)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
{
|
||||||
|
"_comment": [
|
||||||
|
"Baked-in SHA-256 digests for known Shadowbroker release archives.",
|
||||||
|
"",
|
||||||
|
"Issue #231: the self-updater previously skipped integrity verification",
|
||||||
|
"entirely whenever the MESH_UPDATE_SHA256 env var was unset (which is the",
|
||||||
|
"default — nothing in the install docs tells operators to set it). That",
|
||||||
|
"made the auto-update a supply-chain RCE on any compromise of the GitHub",
|
||||||
|
"release pipeline.",
|
||||||
|
"",
|
||||||
|
"The fix uses a multi-source verification chain mirroring the Tor bundle",
|
||||||
|
"digest approach in #201:",
|
||||||
|
"",
|
||||||
|
" 1. MESH_UPDATE_SHA256 env var (operator override, preserved)",
|
||||||
|
" 2. SHA256SUMS.txt asset published alongside each release (primary —",
|
||||||
|
" the maintainer's release process already publishes this)",
|
||||||
|
" 3. This baked-in digest list (second line of defense for releases",
|
||||||
|
" missing a SHA256SUMS asset, or when the asset can't be fetched)",
|
||||||
|
" 4. HTTPS-only fallback with a loud warning (preserves auto-update",
|
||||||
|
" flow during transient outages so users don't get stuck)",
|
||||||
|
"",
|
||||||
|
"Mismatch from a source that DID respond is fatal — the update is",
|
||||||
|
"refused and the existing install keeps running. Only the 'no source",
|
||||||
|
"reachable at all' case falls back to HTTPS-only.",
|
||||||
|
"",
|
||||||
|
"Format: each entry is keyed by release tag and maps asset filenames",
|
||||||
|
"to their canonical SHA-256 digest (hex, lowercase). The updater",
|
||||||
|
"compares the locally-computed digest of the downloaded asset against",
|
||||||
|
"the value here.",
|
||||||
|
"",
|
||||||
|
"When the maintainer ships a new release, add its digests here BEFORE",
|
||||||
|
"removing the old ones so operators on the old code still validate",
|
||||||
|
"against the previous entries during the transition."
|
||||||
|
],
|
||||||
|
"v0.9.79": {
|
||||||
|
"ShadowBroker_v0.9.79.zip": "f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47",
|
||||||
|
"ShadowBroker_0.9.79_x64-setup.exe": "f7b676ada45cac7da05868b0a353678c9ee700e3abcf456a7c0c038c36da446f",
|
||||||
|
"ShadowBroker_0.9.79_x64_en-US.msi": "e0713c3cdda184cfbea750bfac0d62a35678fec00847e6476f2cac8e7e42046e"
|
||||||
|
},
|
||||||
|
"v0.9.8": {
|
||||||
|
"ShadowBroker_v0.9.8.zip": "183bb5cd62b9b9349d95df5ef7696cb6ca810ab4b991fa9dab6f898af4c7a175",
|
||||||
|
"ShadowBroker_0.9.8_x64-setup.exe": "94a0309862e9c81c92cdcbfea8eec9dbb97eef19ded82b26217b397defbc810c",
|
||||||
|
"ShadowBroker_0.9.8_x64_en-US.msi": "fe22f9d51e4360d74c18a7250c2fbb9ed4fa4c7a884b3ac0d04a21115466386b"
|
||||||
|
},
|
||||||
|
"v0.9.81": {
|
||||||
|
"ShadowBroker_v0.9.81.zip": "f81f454bdc88e9a32c351df38212b8cfa624704d65764b971bb091eef62259c6",
|
||||||
|
"ShadowBroker_0.9.81_x64-setup.exe": "25e9a95d0d8ce959a7d08fe8e7406772ae24b596652793e81d1de5d02510a5a6",
|
||||||
|
"ShadowBroker_0.9.81_x64_en-US.msi": "34e655fc0c0f195ee4ac978f228a4b2b9d5565253b8771aca9ef4693409e9e70"
|
||||||
|
}
|
||||||
|
}
|
||||||
+105
-1
@@ -1,4 +1,108 @@
|
|||||||
|
"""Rate-limit key function for slowapi.
|
||||||
|
|
||||||
|
Issue #287 (tg12): the previous implementation used
|
||||||
|
``slowapi.util.get_remote_address`` which only ever returns
|
||||||
|
``request.client.host``. Behind the bundled Next.js proxy (or any other
|
||||||
|
reverse proxy), every connected operator's ``client.host`` is the
|
||||||
|
frontend container's bridge IP. ``@limiter.limit("120/minute")`` then
|
||||||
|
collapses into one shared bucket for everybody on the same backend —
|
||||||
|
one heavy tab can starve every other operator on the node.
|
||||||
|
|
||||||
|
This module replaces that key function with one that:
|
||||||
|
|
||||||
|
* Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted
|
||||||
|
frontend container (same allowlist used by the Docker bridge
|
||||||
|
local-operator trust path — see ``backend/auth.py`` ``#250``).
|
||||||
|
* Picks the FIRST entry in the XFF chain. That's the client end of
|
||||||
|
the proxy chain, which is the operator we want to bucket on.
|
||||||
|
* Falls back to ``request.client.host`` for any peer that isn't on
|
||||||
|
the trusted-frontend allowlist. Direct hits, unrelated containers,
|
||||||
|
and unknown hosts are bucketed exactly like before — there is no
|
||||||
|
way for an untrusted caller to spoof XFF and steal another
|
||||||
|
operator's rate-limit bucket.
|
||||||
|
|
||||||
|
Single-operator nodes are unaffected: the frontend resolves to one IP,
|
||||||
|
that IP is on the trust list, the XFF header is read, and you get one
|
||||||
|
bucket per operator (i.e. you).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from slowapi import Limiter
|
from slowapi import Limiter
|
||||||
from slowapi.util import get_remote_address
|
from slowapi.util import get_remote_address
|
||||||
|
|
||||||
limiter = Limiter(key_func=get_remote_address)
|
|
||||||
|
def _client_host(request: Any) -> str:
|
||||||
|
"""Return the immediate peer's IP, normalised to a lowercase string."""
|
||||||
|
client = getattr(request, "client", None)
|
||||||
|
if client is None:
|
||||||
|
return ""
|
||||||
|
host = getattr(client, "host", "") or ""
|
||||||
|
return host.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _first_forwarded_for(value: str) -> str:
|
||||||
|
"""Return the first non-empty entry from an ``X-Forwarded-For`` header.
|
||||||
|
|
||||||
|
RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The
|
||||||
|
client end is what we want to bucket on. Empty parts (which appear
|
||||||
|
in some malformed headers) are skipped so we don't end up keying on
|
||||||
|
an empty string.
|
||||||
|
"""
|
||||||
|
for raw in value.split(","):
|
||||||
|
candidate = raw.strip()
|
||||||
|
if candidate:
|
||||||
|
return candidate.lower()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _is_trusted_frontend_peer(host: str) -> bool:
|
||||||
|
"""True iff ``host`` is one of the resolved trusted-frontend IPs.
|
||||||
|
|
||||||
|
Imported lazily so this module stays usable in unit tests that
|
||||||
|
don't want to pull the whole auth module into scope.
|
||||||
|
"""
|
||||||
|
if not host:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
from auth import _resolve_trusted_bridge_ips
|
||||||
|
except Exception: # pragma: no cover - defensive
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
trusted_ips = _resolve_trusted_bridge_ips()
|
||||||
|
except Exception: # pragma: no cover - defensive
|
||||||
|
return False
|
||||||
|
return host in trusted_ips
|
||||||
|
|
||||||
|
|
||||||
|
def shadowbroker_rate_limit_key(request: Any) -> str:
|
||||||
|
"""slowapi key_func that is proxy-aware on trusted frontend peers only.
|
||||||
|
|
||||||
|
Behaviour matrix:
|
||||||
|
|
||||||
|
* Direct loopback / unknown peer → ``request.client.host``
|
||||||
|
(identical to slowapi's default ``get_remote_address``).
|
||||||
|
* Peer is a trusted frontend container AND ``X-Forwarded-For`` is
|
||||||
|
present → first XFF entry (the actual operator).
|
||||||
|
* Peer is a trusted frontend container but no XFF → fall back to
|
||||||
|
``request.client.host`` (the bridge IP). One shared bucket for
|
||||||
|
everyone in that case, same as before — but you only get there
|
||||||
|
if the trusted frontend forgot to forward XFF, which it won't.
|
||||||
|
"""
|
||||||
|
peer = _client_host(request)
|
||||||
|
if _is_trusted_frontend_peer(peer):
|
||||||
|
headers = getattr(request, "headers", None)
|
||||||
|
if headers is not None:
|
||||||
|
xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For")
|
||||||
|
if xff:
|
||||||
|
first = _first_forwarded_for(xff)
|
||||||
|
if first:
|
||||||
|
return first
|
||||||
|
# Untrusted peer (or trusted peer without XFF): match the original
|
||||||
|
# get_remote_address behaviour byte-for-byte.
|
||||||
|
return get_remote_address(request)
|
||||||
|
|
||||||
|
|
||||||
|
limiter = Limiter(key_func=shadowbroker_rate_limit_key)
|
||||||
|
|||||||
+511
-122
File diff suppressed because it is too large
Load Diff
@@ -7,13 +7,12 @@ py-modules = []
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "backend"
|
name = "backend"
|
||||||
version = "0.9.79"
|
version = "0.9.81"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"apscheduler==3.10.3",
|
"apscheduler==3.10.3",
|
||||||
"beautifulsoup4>=4.9.0",
|
"beautifulsoup4>=4.9.0",
|
||||||
"cachetools==5.5.2",
|
"cachetools==5.5.2",
|
||||||
"cloudscraper==1.2.71",
|
|
||||||
"cryptography>=41.0.0",
|
"cryptography>=41.0.0",
|
||||||
"defusedxml>=0.7.1",
|
"defusedxml>=0.7.1",
|
||||||
"fastapi==0.115.12",
|
"fastapi==0.115.12",
|
||||||
@@ -44,7 +43,7 @@ dev = ["pytest>=8.3.4", "pytest-asyncio==0.25.0", "ruff>=0.9.0", "black>=24.0.0"
|
|||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
# The current backend carries historical style debt in large legacy modules.
|
# The current backend carries historical style debt in large legacy modules.
|
||||||
# Keep CI focused on actionable correctness checks for the v0.9.79 release.
|
# Keep CI focused on actionable correctness checks for the v0.9.81 release.
|
||||||
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
|
|||||||
@@ -82,9 +82,40 @@ async def api_get_keys_meta(request: Request):
|
|||||||
return get_env_path_info()
|
return get_env_path_info()
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/settings/news-feeds")
|
@router.get(
|
||||||
|
"/api/settings/operator-handle",
|
||||||
|
dependencies=[Depends(require_local_operator)],
|
||||||
|
)
|
||||||
|
@limiter.limit("60/minute")
|
||||||
|
async def api_get_operator_handle(request: Request):
|
||||||
|
"""Round 7a: return the per-install operator handle so the frontend
|
||||||
|
can include it in browser-direct third-party API calls (Wikipedia /
|
||||||
|
Wikidata via lib/wikimediaClient). The handle is auto-generated on
|
||||||
|
first use; operators can override it via the OPERATOR_HANDLE setting
|
||||||
|
or the env var of the same name.
|
||||||
|
|
||||||
|
Gated on local-operator: legitimate browser usage goes through the
|
||||||
|
Next.js proxy which auto-attaches the admin key; remote scanners get
|
||||||
|
403. The handle itself isn't a secret (it's sent to every third-party
|
||||||
|
API the operator touches), but admin-gating it matches the rest of
|
||||||
|
the settings endpoints and follows least-privilege.
|
||||||
|
"""
|
||||||
|
from services.network_utils import get_operator_handle
|
||||||
|
return {"handle": get_operator_handle()}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/api/settings/news-feeds",
|
||||||
|
dependencies=[Depends(require_local_operator)],
|
||||||
|
)
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def api_get_news_feeds(request: Request):
|
async def api_get_news_feeds(request: Request):
|
||||||
|
"""Issue #252 (tg12): the curated feed inventory is configuration
|
||||||
|
state, not a public data feed. Gated on local-operator so the
|
||||||
|
Tauri shell, the Docker bridge frontend, and any caller with an
|
||||||
|
admin key all see the full list; anonymous LAN/internet callers
|
||||||
|
can no longer enumerate operator source URLs.
|
||||||
|
"""
|
||||||
from services.news_feed_config import get_feeds
|
from services.news_feed_config import get_feeds
|
||||||
return get_feeds()
|
return get_feeds()
|
||||||
|
|
||||||
@@ -118,9 +149,18 @@ async def api_reset_news_feeds(request: Request):
|
|||||||
@router.get("/api/settings/node")
|
@router.get("/api/settings/node")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def api_get_node_settings(request: Request):
|
async def api_get_node_settings(request: Request):
|
||||||
|
"""Issue #243 (tg12): node_mode and node_enabled are operational
|
||||||
|
posture. Anonymous callers receive an empty stub; authenticated
|
||||||
|
callers (local-operator or admin/scoped token) see the full
|
||||||
|
state. See the canonical handler in backend/main.py for the full
|
||||||
|
rationale.
|
||||||
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from auth import _scoped_view_authenticated
|
||||||
from services.node_settings import read_node_settings
|
from services.node_settings import read_node_settings
|
||||||
data = await asyncio.to_thread(read_node_settings)
|
data = await asyncio.to_thread(read_node_settings)
|
||||||
|
if not _scoped_view_authenticated(request, "node"):
|
||||||
|
return {}
|
||||||
return {
|
return {
|
||||||
**data,
|
**data,
|
||||||
"node_mode": _current_node_mode(),
|
"node_mode": _current_node_mode(),
|
||||||
@@ -210,9 +250,19 @@ async def api_set_meshtastic_mqtt_settings(request: Request, body: MeshtasticMqt
|
|||||||
return _meshtastic_runtime_snapshot()
|
return _meshtastic_runtime_snapshot()
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/settings/timemachine")
|
@router.get(
|
||||||
|
"/api/settings/timemachine",
|
||||||
|
dependencies=[Depends(require_local_operator)],
|
||||||
|
)
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def api_get_timemachine_settings(request: Request):
|
async def api_get_timemachine_settings(request: Request):
|
||||||
|
"""Issue #253 (tg12): archival-capture posture is operationally
|
||||||
|
sensitive — it tells a remote caller whether this deployment is
|
||||||
|
retaining replayable historical surveillance data. Gated on
|
||||||
|
local-operator so the Tauri shell and Docker bridge frontend
|
||||||
|
still see the toggle state, but anonymous LAN/internet callers
|
||||||
|
can no longer fingerprint Time Machine state.
|
||||||
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
from services.node_settings import read_node_settings
|
from services.node_settings import read_node_settings
|
||||||
data = await asyncio.to_thread(read_node_settings)
|
data = await asyncio.to_thread(read_node_settings)
|
||||||
|
|||||||
+202
-42
@@ -18,6 +18,12 @@ from auth import require_local_operator, require_openclaw_or_local
|
|||||||
from limiter import limiter
|
from limiter import limiter
|
||||||
from services.fetchers._store import latest_data as _latest_data
|
from services.fetchers._store import latest_data as _latest_data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _ai_intel_user_agent() -> str:
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("ai-intel")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -447,7 +453,7 @@ async def ai_satellite_images(
|
|||||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||||
json=search_payload,
|
json=search_payload,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (ai-intel)"},
|
headers={"User-Agent": _ai_intel_user_agent()},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
features = resp.json().get("features", [])
|
features = resp.json().get("features", [])
|
||||||
@@ -1584,7 +1590,7 @@ async def agent_tool_manifest(request: Request):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"version": "0.9.79",
|
"version": "0.9.81",
|
||||||
"access_tier": access_tier,
|
"access_tier": access_tier,
|
||||||
"available_commands": available_commands,
|
"available_commands": available_commands,
|
||||||
"transport": {
|
"transport": {
|
||||||
@@ -2220,7 +2226,7 @@ async def api_capabilities(request: Request):
|
|||||||
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"version": "0.9.79",
|
"version": "0.9.81",
|
||||||
"auth": {
|
"auth": {
|
||||||
"method": "HMAC-SHA256",
|
"method": "HMAC-SHA256",
|
||||||
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
||||||
@@ -2515,45 +2521,85 @@ async def api_capabilities(request: Request):
|
|||||||
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
||||||
# These endpoints manage the HMAC secret itself, so they MUST require
|
# These endpoints manage the HMAC secret itself, so they MUST require
|
||||||
# local operator access to prevent privilege escalation.
|
# local operator access to prevent privilege escalation.
|
||||||
|
#
|
||||||
|
# Issue #302 (tg12): pre-fix, GET /api/ai/connect-info had two problems:
|
||||||
|
#
|
||||||
|
# 1. ``?reveal=true`` made the full secret travel through every operator
|
||||||
|
# page-load that opened the Connect modal. Even gated to
|
||||||
|
# ``require_local_operator``, that put the secret into browser
|
||||||
|
# history, dev-tools network panels, browser disk caches, HAR
|
||||||
|
# exports, and screen captures. Every time the modal opened.
|
||||||
|
#
|
||||||
|
# 2. The same GET endpoint auto-bootstrapped (generated + persisted)
|
||||||
|
# the secret on first read. Side effects on a GET are a footgun:
|
||||||
|
# browser prefetchers, mirror tools, and casual curl-from-history
|
||||||
|
# would all silently mint+persist a fresh secret. (Gated, but
|
||||||
|
# still surprising — and noisy in the audit log.)
|
||||||
|
#
|
||||||
|
# Resolution:
|
||||||
|
#
|
||||||
|
# GET /api/ai/connect-info — always returns the MASKED
|
||||||
|
# secret. No ?reveal param.
|
||||||
|
# No auto-bootstrap; if the
|
||||||
|
# secret is missing,
|
||||||
|
# ``hmac_secret_set: false``
|
||||||
|
# tells the frontend to call
|
||||||
|
# /bootstrap.
|
||||||
|
#
|
||||||
|
# POST /api/ai/connect-info/bootstrap — NEW. Generates + persists the
|
||||||
|
# secret if missing. Idempotent.
|
||||||
|
# Returns metadata only, never
|
||||||
|
# the full secret.
|
||||||
|
#
|
||||||
|
# POST /api/ai/connect-info/reveal — NEW. Returns the full secret in
|
||||||
|
# the body with strict
|
||||||
|
# ``Cache-Control: no-store,
|
||||||
|
# no-cache, must-revalidate``
|
||||||
|
# + ``Pragma: no-cache`` so
|
||||||
|
# it does not land in browser
|
||||||
|
# caches. POST means it does
|
||||||
|
# not land in URL history.
|
||||||
|
#
|
||||||
|
# POST /api/ai/connect-info/regenerate — keeps existing one-time-reveal
|
||||||
|
# behavior (regenerate IS a
|
||||||
|
# deliberate destructive action
|
||||||
|
# the operator triggered, so
|
||||||
|
# displaying the new secret
|
||||||
|
# once is the only path that
|
||||||
|
# makes the operation useful).
|
||||||
|
# Same no-store headers added.
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
# Cache-Control headers that should accompany every response carrying the
|
||||||
@limiter.limit("30/minute")
|
# full HMAC secret. Reused across the reveal + regenerate endpoints so a
|
||||||
async def get_connect_info(request: Request, reveal: bool = False):
|
# future refactor that splits or renames them can't forget the headers.
|
||||||
"""Return connection details for the OpenClaw Connect modal.
|
_NO_STORE_HEADERS = {
|
||||||
|
"Cache-Control": "no-store, no-cache, must-revalidate, private",
|
||||||
|
"Pragma": "no-cache",
|
||||||
|
"Expires": "0",
|
||||||
|
}
|
||||||
|
|
||||||
The HMAC secret is masked by default. Pass ?reveal=true to see the full key.
|
|
||||||
Private keys are NEVER returned.
|
def _mask_hmac_secret(secret: str) -> str:
|
||||||
|
"""Return a fingerprint-style mask (first6 + bullets + last4) suitable
|
||||||
|
for display in the UI before the operator clicks Reveal."""
|
||||||
|
if not secret:
|
||||||
|
return ""
|
||||||
|
if len(secret) > 10:
|
||||||
|
return secret[:6] + "••••••••" + secret[-4:]
|
||||||
|
return "••••••••"
|
||||||
|
|
||||||
|
|
||||||
|
def _connect_info_metadata(settings) -> dict:
|
||||||
|
"""Return everything the Connect modal needs EXCEPT the secret itself.
|
||||||
|
|
||||||
|
Shared between GET /api/ai/connect-info (where the full secret is
|
||||||
|
masked) and POST /api/ai/connect-info/bootstrap (where the operator
|
||||||
|
just generated a secret but we don't return it inline — they have to
|
||||||
|
call /reveal to see it).
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
import secrets
|
|
||||||
from services.config import get_settings
|
|
||||||
|
|
||||||
settings = get_settings()
|
|
||||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
|
||||||
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||||
|
|
||||||
# Auto-generate if not set
|
|
||||||
if not hmac_secret:
|
|
||||||
hmac_secret = secrets.token_hex(24) # 48 chars
|
|
||||||
_write_env_value("OPENCLAW_HMAC_SECRET", hmac_secret)
|
|
||||||
# Clear settings cache so next read picks up the new value
|
|
||||||
get_settings.cache_clear()
|
|
||||||
|
|
||||||
masked = hmac_secret[:6] + "••••••••" + hmac_secret[-4:] if len(hmac_secret) > 10 else "••••••••"
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
|
||||||
"hmac_secret": hmac_secret if reveal else masked,
|
|
||||||
"hmac_secret_set": bool(hmac_secret),
|
|
||||||
"bootstrap_behavior": {
|
|
||||||
"auto_generates_when_missing": True,
|
|
||||||
"auto_generated_this_call": not bool(settings.OPENCLAW_HMAC_SECRET or ""),
|
|
||||||
"notes": [
|
|
||||||
"If no HMAC secret exists yet, this endpoint bootstraps one and persists it to .env.",
|
|
||||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"access_tier": access_tier,
|
"access_tier": access_tier,
|
||||||
"trust_model": {
|
"trust_model": {
|
||||||
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
||||||
@@ -2607,24 +2653,138 @@ async def get_connect_info(request: Request, reveal: bool = False):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("30/minute")
|
||||||
async def regenerate_hmac_secret(request: Request):
|
async def get_connect_info(request: Request):
|
||||||
"""Generate a new HMAC secret. Old secret immediately stops working."""
|
"""Return connection details for the OpenClaw Connect modal.
|
||||||
|
|
||||||
|
The HMAC secret is always returned as a fingerprint mask
|
||||||
|
(``first6 + bullets + last4``); the full value is only ever served by
|
||||||
|
``POST /api/ai/connect-info/reveal`` (see #302). When the secret has
|
||||||
|
not been bootstrapped yet, ``hmac_secret_set`` is false and the
|
||||||
|
frontend should call ``POST /api/ai/connect-info/bootstrap``.
|
||||||
|
|
||||||
|
Private keys are NEVER returned.
|
||||||
|
"""
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||||
|
"hmac_secret_set": bool(hmac_secret),
|
||||||
|
"bootstrap_behavior": {
|
||||||
|
"auto_generates_when_missing": False,
|
||||||
|
"notes": [
|
||||||
|
"Call POST /api/ai/connect-info/bootstrap to mint a secret on first use.",
|
||||||
|
"Call POST /api/ai/connect-info/reveal to see the full secret (no-store).",
|
||||||
|
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
**_connect_info_metadata(settings),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/ai/connect-info/bootstrap", dependencies=[Depends(require_local_operator)])
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def bootstrap_hmac_secret(request: Request):
|
||||||
|
"""Mint and persist the OpenClaw HMAC secret if it isn't already set.
|
||||||
|
|
||||||
|
Idempotent: if a secret already exists, returns ``generated: false``
|
||||||
|
and leaves the existing secret untouched. Never returns the secret
|
||||||
|
value in the response body — the operator calls
|
||||||
|
``POST /api/ai/connect-info/reveal`` to see it.
|
||||||
|
"""
|
||||||
import secrets
|
import secrets
|
||||||
from services.config import get_settings
|
from services.config import get_settings
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
existing = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||||
|
if existing:
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"generated": False,
|
||||||
|
"hmac_secret_set": True,
|
||||||
|
"masked_hmac_secret": _mask_hmac_secret(existing),
|
||||||
|
"detail": "HMAC secret already configured. Use /reveal to see it.",
|
||||||
|
}
|
||||||
|
|
||||||
new_secret = secrets.token_hex(24) # 48 chars
|
new_secret = secrets.token_hex(24) # 48 chars
|
||||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||||
get_settings.cache_clear()
|
get_settings.cache_clear()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"hmac_secret": new_secret,
|
"generated": True,
|
||||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
"hmac_secret_set": True,
|
||||||
|
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||||
|
"detail": "HMAC secret generated. Call /reveal to copy it into your OpenClaw config.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/ai/connect-info/reveal", dependencies=[Depends(require_local_operator)])
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def reveal_hmac_secret(request: Request):
|
||||||
|
"""Return the full HMAC secret in the response body.
|
||||||
|
|
||||||
|
POST (not GET) so the secret never lands in URL history, access logs,
|
||||||
|
or browser visit history. Strict ``Cache-Control: no-store`` headers
|
||||||
|
prevent intermediaries from persisting the response. Returns 404 if
|
||||||
|
no secret has been bootstrapped — the frontend should call
|
||||||
|
``POST /api/ai/connect-info/bootstrap`` first.
|
||||||
|
"""
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||||
|
if not hmac_secret:
|
||||||
|
raise HTTPException(
|
||||||
|
404,
|
||||||
|
"No HMAC secret configured. Call POST /api/ai/connect-info/bootstrap first.",
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
content={
|
||||||
|
"ok": True,
|
||||||
|
"hmac_secret": hmac_secret,
|
||||||
|
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||||
|
},
|
||||||
|
headers=_NO_STORE_HEADERS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||||
|
@limiter.limit("5/minute")
|
||||||
|
async def regenerate_hmac_secret(request: Request):
|
||||||
|
"""Generate a new HMAC secret. Old secret immediately stops working.
|
||||||
|
|
||||||
|
Returns the new secret in the response body — this is the only
|
||||||
|
operation where the full secret travels back through the response,
|
||||||
|
because regenerating IS a deliberate destructive action the operator
|
||||||
|
triggered and they need to see the new value once to update their
|
||||||
|
OpenClaw configuration. Strict ``Cache-Control: no-store`` headers
|
||||||
|
keep it from being persisted by browser caches, proxies, or HAR
|
||||||
|
capture tooling.
|
||||||
|
"""
|
||||||
|
import secrets
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
new_secret = secrets.token_hex(24) # 48 chars
|
||||||
|
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
content={
|
||||||
|
"ok": True,
|
||||||
|
"hmac_secret": new_secret,
|
||||||
|
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||||
|
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||||
|
},
|
||||||
|
headers=_NO_STORE_HEADERS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
async def set_access_tier(request: Request, body: dict):
|
async def set_access_tier(request: Request, body: dict):
|
||||||
|
|||||||
@@ -165,7 +165,13 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
|||||||
|
|
||||||
|
|
||||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
||||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)", **profile.headers}
|
# Round 7a: per-install operator handle. Mozilla/5.0 prefix retained
|
||||||
|
# because many CCTV endpoints sniff for a browser-like prefix.
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
headers = {
|
||||||
|
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||||
|
**profile.headers,
|
||||||
|
}
|
||||||
range_header = request.headers.get("range")
|
range_header = request.headers.get("range")
|
||||||
if range_header:
|
if range_header:
|
||||||
headers["Range"] = range_header
|
headers["Range"] = range_header
|
||||||
|
|||||||
+105
-10
@@ -98,6 +98,88 @@ def _current_etag(prefix: str = "") -> str:
|
|||||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||||
|
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||||
|
# filtering. Light reference layers (datacenters, military_bases,
|
||||||
|
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||||
|
# in these sets — they ship world-scale even when bounds are supplied so
|
||||||
|
# panning never reveals an "empty world" of static infrastructure.
|
||||||
|
#
|
||||||
|
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||||
|
# is byte-for-byte identical to the pre-#288 behavior.
|
||||||
|
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||||
|
"commercial_flights",
|
||||||
|
"military_flights",
|
||||||
|
"private_flights",
|
||||||
|
"private_jets",
|
||||||
|
"tracked_flights",
|
||||||
|
"ships",
|
||||||
|
"cctv",
|
||||||
|
"uavs",
|
||||||
|
"liveuamap",
|
||||||
|
"gps_jamming",
|
||||||
|
"sigint",
|
||||||
|
"trains",
|
||||||
|
)
|
||||||
|
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||||
|
"gdelt",
|
||||||
|
"firms_fires",
|
||||||
|
"kiwisdr",
|
||||||
|
"scanners",
|
||||||
|
"psk_reporter",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_full_bbox(s, w, n, e) -> bool:
|
||||||
|
return None not in (s, w, n, e)
|
||||||
|
|
||||||
|
|
||||||
|
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||||
|
"""Quantize bbox to 1° before mixing into the ETag.
|
||||||
|
|
||||||
|
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||||
|
quantizing here means small mouse drags don't blow the ETag cache
|
||||||
|
on the client. Full-world bounds collapse to a single suffix.
|
||||||
|
"""
|
||||||
|
if not _has_full_bbox(s, w, n, e):
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
ss = math.floor(float(s))
|
||||||
|
ww = math.floor(float(w))
|
||||||
|
nn = math.ceil(float(n))
|
||||||
|
ee = math.ceil(float(e))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return ""
|
||||||
|
# If the requested window covers basically the whole world, treat it as
|
||||||
|
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||||
|
# same ETag and benefit from the existing 304 path.
|
||||||
|
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||||
|
if lng_span >= 300 or lat_span >= 120:
|
||||||
|
return ""
|
||||||
|
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||||
|
s: float, w: float, n: float, e: float) -> dict:
|
||||||
|
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||||
|
|
||||||
|
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||||
|
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||||
|
pad and handles antimeridian crossings.
|
||||||
|
"""
|
||||||
|
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||||
|
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||||
|
# guarantees the response matches the no-params shape.
|
||||||
|
if lng_span >= 300 or lat_span >= 120:
|
||||||
|
return payload
|
||||||
|
for key in heavy_keys:
|
||||||
|
items = payload.get(key)
|
||||||
|
if not isinstance(items, list) or not items:
|
||||||
|
continue
|
||||||
|
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def _json_safe(value):
|
def _json_safe(value):
|
||||||
if isinstance(value, float):
|
if isinstance(value, float):
|
||||||
return value if math.isfinite(value) else None
|
return value if math.isfinite(value) else None
|
||||||
@@ -479,13 +561,14 @@ async def bootstrap_critical(request: Request):
|
|||||||
@limiter.limit("120/minute")
|
@limiter.limit("120/minute")
|
||||||
async def live_data_fast(
|
async def live_data_fast(
|
||||||
request: Request,
|
request: Request,
|
||||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||||
):
|
):
|
||||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||||
|
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||||
if request.headers.get("if-none-match") == etag:
|
if request.headers.get("if-none-match") == etag:
|
||||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||||
@@ -525,6 +608,11 @@ async def live_data_fast(
|
|||||||
payload = _cap_fast_startup_payload(payload)
|
payload = _cap_fast_startup_payload(payload)
|
||||||
else:
|
else:
|
||||||
payload = _cap_fast_dashboard_payload(payload)
|
payload = _cap_fast_dashboard_payload(payload)
|
||||||
|
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||||
|
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||||
|
# to the pre-#288 implementation.
|
||||||
|
if _has_full_bbox(s, w, n, e):
|
||||||
|
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||||
|
|
||||||
@@ -533,12 +621,13 @@ async def live_data_fast(
|
|||||||
@limiter.limit("60/minute")
|
@limiter.limit("60/minute")
|
||||||
async def live_data_slow(
|
async def live_data_slow(
|
||||||
request: Request,
|
request: Request,
|
||||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||||
):
|
):
|
||||||
etag = _current_etag(prefix="slow|full|")
|
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||||
|
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||||
if request.headers.get("if-none-match") == etag:
|
if request.headers.get("if-none-match") == etag:
|
||||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||||
@@ -592,6 +681,12 @@ async def live_data_slow(
|
|||||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||||
"freshness": freshness,
|
"freshness": freshness,
|
||||||
}
|
}
|
||||||
|
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||||
|
# are supplied. Static reference layers (datacenters, military bases,
|
||||||
|
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||||
|
# hides the infrastructure overlay the operator already has on screen.
|
||||||
|
if _has_full_bbox(s, w, n, e):
|
||||||
|
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||||
return Response(
|
return Response(
|
||||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||||
media_type="application/json",
|
media_type="application/json",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from services.data_fetcher import get_latest_data
|
|||||||
from services.schemas import HealthResponse
|
from services.schemas import HealthResponse
|
||||||
import os
|
import os
|
||||||
|
|
||||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.79")
|
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.81")
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -59,6 +59,12 @@ async def health_check(request: Request):
|
|||||||
# when the SPKI-pinned fallback is in effect. The data plane keeps
|
# when the SPKI-pinned fallback is in effect. The data plane keeps
|
||||||
# flowing (this is by design — see ais_proxy.js comments) but observers
|
# flowing (this is by design — see ais_proxy.js comments) but observers
|
||||||
# who care about MITM-protection posture deserve a visible signal.
|
# who care about MITM-protection posture deserve a visible signal.
|
||||||
|
#
|
||||||
|
# Plus connectivity health (added 2026-05-23 when stream.aisstream.io
|
||||||
|
# went fully offline): ``connected`` tells the frontend whether ship
|
||||||
|
# data is actually flowing. When false, a banner explains that ships
|
||||||
|
# are unavailable due to an upstream outage — better than the user
|
||||||
|
# silently seeing an empty ocean and assuming we broke something.
|
||||||
ais_status: dict = {}
|
ais_status: dict = {}
|
||||||
try:
|
try:
|
||||||
from services.ais_stream import ais_proxy_status
|
from services.ais_stream import ais_proxy_status
|
||||||
@@ -69,6 +75,15 @@ async def health_check(request: Request):
|
|||||||
# Don't override a worse top-level status if SLOs already failed,
|
# Don't override a worse top-level status if SLOs already failed,
|
||||||
# but escalate ok -> degraded so the field surfaces in dashboards.
|
# but escalate ok -> degraded so the field surfaces in dashboards.
|
||||||
top_status = "degraded"
|
top_status = "degraded"
|
||||||
|
# AIS_API_KEY not configured is "feature off", not "system broken" —
|
||||||
|
# so we only escalate when the operator opted into AIS (key set) AND
|
||||||
|
# the stream is currently offline.
|
||||||
|
if (
|
||||||
|
os.environ.get("AIS_API_KEY")
|
||||||
|
and ais_status.get("connected") is False
|
||||||
|
and top_status == "ok"
|
||||||
|
):
|
||||||
|
top_status = "degraded"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": top_status,
|
"status": top_status,
|
||||||
|
|||||||
@@ -223,11 +223,21 @@ async def oracle_markets_more(request: Request, category: str = "NEWS", offset:
|
|||||||
"has_more": offset + limit < len(cat_markets), "total": len(cat_markets)}
|
"has_more": offset + limit < len(cat_markets), "total": len(cat_markets)}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/mesh/oracle/resolve")
|
@router.post(
|
||||||
|
"/api/mesh/oracle/resolve",
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("5/minute")
|
||||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||||
async def oracle_resolve(request: Request):
|
async def oracle_resolve(request: Request):
|
||||||
"""Resolve a prediction market."""
|
"""Resolve a prediction market.
|
||||||
|
|
||||||
|
Issue #240 (tg12): requires admin authentication. The
|
||||||
|
``mesh_write_exempt`` decorator below is **metadata only** — it tags
|
||||||
|
the route as not requiring a mesh signed-write envelope, it does
|
||||||
|
NOT itself enforce caller authorization. The ``Depends(require_admin)``
|
||||||
|
on the route decorator is what actually gates access.
|
||||||
|
"""
|
||||||
from services.mesh.mesh_oracle import oracle_ledger
|
from services.mesh.mesh_oracle import oracle_ledger
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
market_title = body.get("market_title", "")
|
market_title = body.get("market_title", "")
|
||||||
@@ -327,11 +337,18 @@ async def oracle_predictions(request: Request, node_id: str = ""):
|
|||||||
active_predictions, authenticated=_scoped_view_authenticated(request, "mesh.audit"))
|
active_predictions, authenticated=_scoped_view_authenticated(request, "mesh.audit"))
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/mesh/oracle/resolve-stakes")
|
@router.post(
|
||||||
|
"/api/mesh/oracle/resolve-stakes",
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("5/minute")
|
||||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||||
async def oracle_resolve_stakes(request: Request):
|
async def oracle_resolve_stakes(request: Request):
|
||||||
"""Resolve all expired stake contests."""
|
"""Resolve all expired stake contests.
|
||||||
|
|
||||||
|
Issue #241 (tg12): requires admin authentication. See the note on
|
||||||
|
``oracle_resolve`` above — ``mesh_write_exempt`` is metadata only.
|
||||||
|
"""
|
||||||
from services.mesh.mesh_oracle import oracle_ledger
|
from services.mesh.mesh_oracle import oracle_ledger
|
||||||
resolutions = oracle_ledger.resolve_expired_stakes()
|
resolutions = oracle_ledger.resolve_expired_stakes()
|
||||||
return {"ok": True, "resolutions": resolutions, "count": len(resolutions)}
|
return {"ok": True, "resolutions": resolutions, "count": len(resolutions)}
|
||||||
|
|||||||
@@ -55,6 +55,12 @@ def _hydrate_gate_store_from_chain(events: list) -> int:
|
|||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def _hydrate_dm_relay_from_chain(events: list) -> int:
|
||||||
|
import main as _m
|
||||||
|
|
||||||
|
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/mesh/infonet/peer-push")
|
@router.post("/api/mesh/infonet/peer-push")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def infonet_peer_push(request: Request):
|
async def infonet_peer_push(request: Request):
|
||||||
@@ -82,9 +88,68 @@ async def infonet_peer_push(request: Request):
|
|||||||
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": []}
|
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": []}
|
||||||
result = infonet.ingest_events(events)
|
result = infonet.ingest_events(events)
|
||||||
_hydrate_gate_store_from_chain(events)
|
_hydrate_gate_store_from_chain(events)
|
||||||
|
_hydrate_dm_relay_from_chain(events)
|
||||||
return {"ok": True, **result}
|
return {"ok": True, **result}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/mesh/dm/replicate-envelope")
|
||||||
|
@limiter.limit("60/minute")
|
||||||
|
async def dm_replicate_envelope(request: Request):
|
||||||
|
"""Accept a DM envelope replicated from a peer relay (cross-node mailbox).
|
||||||
|
|
||||||
|
Companion endpoint to ``DMRelay.replicate_to_peers`` (outbound, in
|
||||||
|
``mesh_dm_relay.py``). The sender's relay POSTs an encrypted DM
|
||||||
|
envelope here after a successful local ``deposit``; this endpoint
|
||||||
|
re-enforces the per-(sender, recipient) anti-spam cap and stores
|
||||||
|
the envelope in the local mailbox if accepted.
|
||||||
|
|
||||||
|
The cap is the network rule: a hostile sender's relay can spool
|
||||||
|
extras locally, but every honest peer enforces the cap on inbound
|
||||||
|
replication. Recipient polling from any honest peer therefore
|
||||||
|
never sees more than ``MESH_DM_PENDING_PER_SENDER_LIMIT`` pending
|
||||||
|
from any one sender, no matter how many spam attempts were tried.
|
||||||
|
|
||||||
|
Same HMAC auth pattern as ``infonet_peer_push`` and ``gate_peer_push``.
|
||||||
|
"""
|
||||||
|
content_length = request.headers.get("content-length")
|
||||||
|
if content_length:
|
||||||
|
try:
|
||||||
|
# DM envelopes are bounded by MESH_DM_MAX_MSG_BYTES + envelope
|
||||||
|
# overhead; 64 KB is a generous ceiling.
|
||||||
|
if int(content_length) > 65_536:
|
||||||
|
return Response(
|
||||||
|
content='{"ok":false,"detail":"Request body too large (max 64KB)"}',
|
||||||
|
status_code=413, media_type="application/json",
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
body_bytes = await request.body()
|
||||||
|
if not _verify_peer_push_hmac(request, body_bytes):
|
||||||
|
return Response(
|
||||||
|
content='{"ok":false,"detail":"Invalid or missing peer HMAC"}',
|
||||||
|
status_code=403, media_type="application/json",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
body = json_mod.loads(body_bytes or b"{}")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return Response(
|
||||||
|
content='{"ok":false,"detail":"Invalid JSON body"}',
|
||||||
|
status_code=400, media_type="application/json",
|
||||||
|
)
|
||||||
|
envelope = body.get("envelope")
|
||||||
|
if not isinstance(envelope, dict):
|
||||||
|
return {"ok": False, "detail": "envelope must be an object"}
|
||||||
|
|
||||||
|
originating_peer = _peer_hmac_url_from_request(request) or ""
|
||||||
|
|
||||||
|
from services.mesh.mesh_dm_relay import dm_relay
|
||||||
|
result = dm_relay.accept_replica(
|
||||||
|
envelope=envelope,
|
||||||
|
originating_peer_url=originating_peer,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/mesh/gate/peer-push")
|
@router.post("/api/mesh/gate/peer-push")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def gate_peer_push(request: Request):
|
async def gate_peer_push(request: Request):
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ from services.mesh.mesh_signed_events import (
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
_INFONET_SYNC_RATE_LIMIT = "600/minute"
|
||||||
|
|
||||||
|
|
||||||
def _signed_body(request: Request) -> dict[str, Any]:
|
def _signed_body(request: Request) -> dict[str, Any]:
|
||||||
@@ -263,6 +264,19 @@ def _redact_public_event(event: dict) -> dict:
|
|||||||
return _redact_vote_gate(_redact_key_rotate_payload(_redact_gate_metadata(event)))
|
return _redact_vote_gate(_redact_key_rotate_payload(_redact_gate_metadata(event)))
|
||||||
|
|
||||||
|
|
||||||
|
def _infonet_private_transport_required() -> bool:
|
||||||
|
import main as _m
|
||||||
|
|
||||||
|
return bool(_m._infonet_private_transport_required())
|
||||||
|
|
||||||
|
|
||||||
|
def _infonet_sync_response_events(events: list[dict], request=None) -> list[dict]:
|
||||||
|
"""Build the sync event surface for the current transport policy."""
|
||||||
|
import main as _m
|
||||||
|
|
||||||
|
return _m._infonet_sync_response_events(events, request=request)
|
||||||
|
|
||||||
|
|
||||||
def _trusted_gate_reply_to(event: dict) -> str:
|
def _trusted_gate_reply_to(event: dict) -> str:
|
||||||
if not isinstance(event, dict):
|
if not isinstance(event, dict):
|
||||||
return ""
|
return ""
|
||||||
@@ -574,6 +588,12 @@ def _hydrate_gate_store_from_chain(events: list[dict]) -> int:
|
|||||||
pass
|
pass
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def _hydrate_dm_relay_from_chain(events: list[dict]) -> int:
|
||||||
|
import main as _m
|
||||||
|
|
||||||
|
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||||
|
|
||||||
# --- Safe type helpers ---
|
# --- Safe type helpers ---
|
||||||
|
|
||||||
def _safe_int(val, default=0):
|
def _safe_int(val, default=0):
|
||||||
@@ -1531,7 +1551,7 @@ async def infonet_locator(request: Request, limit: int = Query(32, ge=4, le=128)
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/api/mesh/infonet/sync")
|
@router.post("/api/mesh/infonet/sync")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||||
@mesh_write_exempt(MeshWriteExemption.PEER_GOSSIP)
|
@mesh_write_exempt(MeshWriteExemption.PEER_GOSSIP)
|
||||||
async def infonet_sync_post(
|
async def infonet_sync_post(
|
||||||
request: Request,
|
request: Request,
|
||||||
@@ -1584,8 +1604,7 @@ async def infonet_sync_post(
|
|||||||
elif matched_hash == GENESIS_HASH and len(locator) > 1:
|
elif matched_hash == GENESIS_HASH and len(locator) > 1:
|
||||||
forked = True
|
forked = True
|
||||||
|
|
||||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
events = _infonet_sync_response_events(events, request=request)
|
||||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
|
||||||
|
|
||||||
response = {
|
response = {
|
||||||
"events": events,
|
"events": events,
|
||||||
@@ -1646,7 +1665,7 @@ async def mesh_rns_status(request: Request):
|
|||||||
|
|
||||||
|
|
||||||
@router.get("/api/mesh/infonet/sync")
|
@router.get("/api/mesh/infonet/sync")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||||
async def infonet_sync(
|
async def infonet_sync(
|
||||||
request: Request,
|
request: Request,
|
||||||
after_hash: str = "",
|
after_hash: str = "",
|
||||||
@@ -1684,8 +1703,7 @@ async def infonet_sync(
|
|||||||
)
|
)
|
||||||
base = after_hash or GENESIS_HASH
|
base = after_hash or GENESIS_HASH
|
||||||
events = infonet.get_events_after(base, limit=limit)
|
events = infonet.get_events_after(base, limit=limit)
|
||||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
events = _infonet_sync_response_events(events, request=request)
|
||||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
|
||||||
return {
|
return {
|
||||||
"events": events,
|
"events": events,
|
||||||
"after_hash": base,
|
"after_hash": base,
|
||||||
@@ -1724,6 +1742,7 @@ async def infonet_ingest(request: Request):
|
|||||||
|
|
||||||
result = infonet.ingest_events(events)
|
result = infonet.ingest_events(events)
|
||||||
_hydrate_gate_store_from_chain(events)
|
_hydrate_gate_store_from_chain(events)
|
||||||
|
_hydrate_dm_relay_from_chain(events)
|
||||||
return {"ok": True, **result}
|
return {"ok": True, **result}
|
||||||
|
|
||||||
|
|
||||||
@@ -2279,6 +2298,12 @@ async def infonet_event(request: Request, event_id: str):
|
|||||||
)
|
)
|
||||||
return _strip_gate_for_access(evt, access)
|
return _strip_gate_for_access(evt, access)
|
||||||
return {"ok": False, "detail": "Event not found"}
|
return {"ok": False, "detail": "Event not found"}
|
||||||
|
if evt.get("event_type") == "dm_message":
|
||||||
|
return await _private_plane_refusal_response(
|
||||||
|
request,
|
||||||
|
status_code=403,
|
||||||
|
payload=_private_plane_access_denied_payload(),
|
||||||
|
)
|
||||||
if evt.get("event_type") == "gate_message":
|
if evt.get("event_type") == "gate_message":
|
||||||
gate_id = str(evt.get("payload", {}).get("gate", "") or evt.get("gate", "") or "").strip()
|
gate_id = str(evt.get("payload", {}).get("gate", "") or evt.get("gate", "") or "").strip()
|
||||||
access = _verify_gate_access(request, gate_id) if gate_id else ""
|
access = _verify_gate_access(request, gate_id) if gate_id else ""
|
||||||
@@ -2303,7 +2328,7 @@ async def infonet_node_events(
|
|||||||
from services.mesh.mesh_hashchain import infonet
|
from services.mesh.mesh_hashchain import infonet
|
||||||
|
|
||||||
events = infonet.get_events_by_node(node_id, limit=limit)
|
events = infonet.get_events_by_node(node_id, limit=limit)
|
||||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||||
events = _redact_public_node_history(
|
events = _redact_public_node_history(
|
||||||
events,
|
events,
|
||||||
@@ -2328,7 +2353,7 @@ async def infonet_events_by_type(
|
|||||||
else:
|
else:
|
||||||
events = list(reversed(infonet.events))
|
events = list(reversed(infonet.events))
|
||||||
events = events[offset : offset + limit]
|
events = events[offset : offset + limit]
|
||||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||||
return {
|
return {
|
||||||
"events": events,
|
"events": events,
|
||||||
|
|||||||
+87
-10
@@ -85,7 +85,30 @@ async def api_geocode_reverse(
|
|||||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/sentinel2/search")
|
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||||
|
# These three endpoints relay external Sentinel / Planetary Computer
|
||||||
|
# requests through the backend to avoid browser CORS blocks. They are
|
||||||
|
# operator-only helpers — they MUST NOT be callable by anonymous remote
|
||||||
|
# users, because:
|
||||||
|
#
|
||||||
|
# * /api/sentinel/token — caller supplies their own Sentinel client_id +
|
||||||
|
# client_secret. Without operator gating, the backend becomes a free
|
||||||
|
# anonymous OAuth-mint relay for any Copernicus account.
|
||||||
|
# * /api/sentinel/tile — same shape as the token route but for tile
|
||||||
|
# imagery. Without gating, the backend acts as an anonymous quota and
|
||||||
|
# bandwidth relay for Sentinel Hub Process API calls.
|
||||||
|
# * /api/sentinel2/search — hits the Planetary Computer STAC search API
|
||||||
|
# and falls back to Esri imagery. No caller credentials are involved,
|
||||||
|
# but the route is still an anonymous external-search relay. We gate
|
||||||
|
# it the same way for consistency with the rest of the operator-only
|
||||||
|
# helper surface.
|
||||||
|
#
|
||||||
|
# Gating is via require_local_operator (loopback / bridge / admin key),
|
||||||
|
# matching the same allowlist already used by /api/region-dossier and
|
||||||
|
# the other operator helpers further up this file. Single-operator nodes
|
||||||
|
# see no behavior change — their dashboard already lives on loopback or
|
||||||
|
# the trusted Docker bridge, so it still resolves.
|
||||||
|
@router.get("/api/sentinel2/search", dependencies=[Depends(require_local_operator)])
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
def api_sentinel2_search(
|
def api_sentinel2_search(
|
||||||
request: Request,
|
request: Request,
|
||||||
@@ -97,18 +120,60 @@ def api_sentinel2_search(
|
|||||||
return search_sentinel2_scene(lat, lng)
|
return search_sentinel2_scene(lat, lng)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/sentinel/token")
|
# Issue #298 (tg12): Sentinel credentials moved server-side
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Previously the frontend kept Copernicus CDSE client_id + client_secret in
|
||||||
|
# browser localStorage / sessionStorage and forwarded them on every tile
|
||||||
|
# request through this proxy. That exposed real third-party credentials to
|
||||||
|
# any same-origin script (XSS, malicious browser extension, dev-tools HAR
|
||||||
|
# export).
|
||||||
|
#
|
||||||
|
# Resolution order (first match wins):
|
||||||
|
# 1. Request body — kept for back-compat. A small number of legacy
|
||||||
|
# operator setups may still post credentials; we don't break them.
|
||||||
|
# 2. Backend .env — SENTINEL_CLIENT_ID / SENTINEL_CLIENT_SECRET, managed
|
||||||
|
# through the existing /api/settings/api-keys flow (admin-gated).
|
||||||
|
#
|
||||||
|
# The frontend in ``sentinelHub.ts`` no longer reads browser storage and no
|
||||||
|
# longer forwards credentials — every dashboard request now lands in (2).
|
||||||
|
# The require_local_operator gate (added in #303/PR #303) stays — both layers
|
||||||
|
# are independent: the gate blocks anonymous callers, the env fallback lets
|
||||||
|
# legitimate (gated) callers omit credentials from the body.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def _resolve_sentinel_credentials(body_id: str, body_secret: str) -> tuple[str, str]:
|
||||||
|
"""Return (client_id, client_secret) using body values when present,
|
||||||
|
otherwise falling back to backend .env. Empty strings if neither is set."""
|
||||||
|
import os as _os
|
||||||
|
cid = (body_id or "").strip() or (_os.environ.get("SENTINEL_CLIENT_ID", "") or "").strip()
|
||||||
|
csec = (body_secret or "").strip() or (_os.environ.get("SENTINEL_CLIENT_SECRET", "") or "").strip()
|
||||||
|
return cid, csec
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/sentinel/token", dependencies=[Depends(require_local_operator)])
|
||||||
@limiter.limit("60/minute")
|
@limiter.limit("60/minute")
|
||||||
async def api_sentinel_token(request: Request):
|
async def api_sentinel_token(request: Request):
|
||||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block)."""
|
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block).
|
||||||
|
|
||||||
|
Credentials are resolved by ``_resolve_sentinel_credentials`` — body
|
||||||
|
fields are honored for back-compat, otherwise the backend .env values
|
||||||
|
populated through ``/api/settings/api-keys`` are used.
|
||||||
|
"""
|
||||||
import requests as req
|
import requests as req
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
params = parse_qs(body.decode("utf-8"))
|
params = parse_qs(body.decode("utf-8"))
|
||||||
client_id = params.get("client_id", [""])[0]
|
body_id = params.get("client_id", [""])[0]
|
||||||
client_secret = params.get("client_secret", [""])[0]
|
body_secret = params.get("client_secret", [""])[0]
|
||||||
|
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||||
if not client_id or not client_secret:
|
if not client_id or not client_secret:
|
||||||
raise HTTPException(400, "client_id and client_secret required")
|
# Friendly, non-hostile error — points the operator at the place
|
||||||
|
# they configure other API keys instead of just saying "required".
|
||||||
|
raise HTTPException(
|
||||||
|
400,
|
||||||
|
"Sentinel client_id/client_secret are not configured. "
|
||||||
|
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||||
|
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||||
|
)
|
||||||
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||||
try:
|
try:
|
||||||
resp = await asyncio.to_thread(req.post, token_url,
|
resp = await asyncio.to_thread(req.post, token_url,
|
||||||
@@ -152,7 +217,7 @@ import os as _os
|
|||||||
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/sentinel/tile")
|
@router.post("/api/sentinel/tile", dependencies=[Depends(require_local_operator)])
|
||||||
@limiter.limit("300/minute")
|
@limiter.limit("300/minute")
|
||||||
async def api_sentinel_tile(request: Request):
|
async def api_sentinel_tile(request: Request):
|
||||||
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
||||||
@@ -163,8 +228,11 @@ async def api_sentinel_tile(request: Request):
|
|||||||
except Exception:
|
except Exception:
|
||||||
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
||||||
|
|
||||||
client_id = body.get("client_id", "")
|
# Issue #298: same resolution order as /api/sentinel/token — body
|
||||||
client_secret = body.get("client_secret", "")
|
# values for back-compat, otherwise backend .env.
|
||||||
|
body_id = body.get("client_id", "")
|
||||||
|
body_secret = body.get("client_secret", "")
|
||||||
|
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||||
preset = body.get("preset", "TRUE-COLOR")
|
preset = body.get("preset", "TRUE-COLOR")
|
||||||
date_str = body.get("date", "")
|
date_str = body.get("date", "")
|
||||||
z = body.get("z", 0)
|
z = body.get("z", 0)
|
||||||
@@ -172,7 +240,16 @@ async def api_sentinel_tile(request: Request):
|
|||||||
y = body.get("y", 0)
|
y = body.get("y", 0)
|
||||||
|
|
||||||
if not client_id or not client_secret or not date_str:
|
if not client_id or not client_secret or not date_str:
|
||||||
raise HTTPException(400, "client_id, client_secret, and date required")
|
# Distinguish "no creds" from "no date" so the operator knows
|
||||||
|
# what to fix. Same friendly pointer as the /token route.
|
||||||
|
if not client_id or not client_secret:
|
||||||
|
raise HTTPException(
|
||||||
|
400,
|
||||||
|
"Sentinel client_id/client_secret are not configured. "
|
||||||
|
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||||
|
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||||
|
)
|
||||||
|
raise HTTPException(400, "date required")
|
||||||
|
|
||||||
now = _time.time()
|
now = _time.time()
|
||||||
credential_fp = _credential_fingerprint(client_id, client_secret)
|
credential_fp = _credential_fingerprint(client_id, client_secret)
|
||||||
|
|||||||
@@ -160,8 +160,13 @@ router = APIRouter()
|
|||||||
|
|
||||||
# --- Constants ---
|
# --- Constants ---
|
||||||
|
|
||||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
# "is this on?" boolean. Transport choice, anonymous-mode state, and
|
||||||
|
# the named privacy profile were all leaking actionable recon to
|
||||||
|
# unauthenticated callers and are now gated behind authenticated reads.
|
||||||
|
# See the matching block in backend/main.py for the full rationale.
|
||||||
|
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||||
|
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||||
|
|||||||
@@ -20,7 +20,17 @@ OUT_PATH = Path(__file__).parent.parent / "data" / "power_plants.json"
|
|||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
||||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
# Round 7a: release-time data refresher. Uses the per-operator UA if
|
||||||
|
# available, otherwise a release-script-specific identifier. This
|
||||||
|
# script is run by the maintainer at release time, NOT at runtime,
|
||||||
|
# so an aggregate UA is acceptable; we still use the helper so the
|
||||||
|
# behavior matches the rest of the project.
|
||||||
|
try:
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
ua = outbound_user_agent("release-script-power-plants")
|
||||||
|
except Exception:
|
||||||
|
ua = "Shadowbroker/0.9 (release-script-power-plants; +https://github.com/BigBodyCobain/Shadowbroker/issues)"
|
||||||
|
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||||
raw = resp.read().decode("utf-8")
|
raw = resp.read().decode("utf-8")
|
||||||
|
|
||||||
|
|||||||
@@ -92,18 +92,37 @@ SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token
|
|||||||
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
||||||
|
|
||||||
if [[ -n "$TEXT_FILES" ]]; then
|
if [[ -n "$TEXT_FILES" ]]; then
|
||||||
|
# Known-public exclusions: lines matching `<host-or-ip> ssh-<algo> <key>`
|
||||||
|
# are SSH known_hosts entries — the host's PUBLIC fingerprint, which is
|
||||||
|
# by definition safe to commit (the whole point of pinning known_hosts
|
||||||
|
# is to publish the fingerprint widely so MITM is detectable). Filter
|
||||||
|
# these out before flagging the file.
|
||||||
|
KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA'
|
||||||
|
|
||||||
# Use grep with file list, skip missing/binary, limit output
|
# Use grep with file list, skip missing/binary, limit output
|
||||||
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
||||||
if [[ -n "$CONTENT_HITS" ]]; then
|
if [[ -n "$CONTENT_HITS" ]]; then
|
||||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
REAL_HITS=""
|
||||||
echo "$CONTENT_HITS" | while read -r f; do
|
REAL_REPORT=""
|
||||||
echo -e " ${RED}$f${NC}"
|
while IFS= read -r f; do
|
||||||
# Show first matching line for context
|
[[ -z "$f" ]] && continue
|
||||||
grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | head -2 | while read -r line; do
|
# Re-grep this file, but filter out known_hosts-style lines.
|
||||||
echo -e " ${YELLOW}$line${NC}"
|
FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true)
|
||||||
done
|
if [[ -n "$FILE_HITS" ]]; then
|
||||||
done
|
REAL_HITS+="$f"$'\n'
|
||||||
FOUND=1
|
REAL_REPORT+=" ${RED}$f${NC}"$'\n'
|
||||||
|
# Show first 2 matching lines for context
|
||||||
|
while IFS= read -r line; do
|
||||||
|
[[ -z "$line" ]] && continue
|
||||||
|
REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n'
|
||||||
|
done < <(echo "$FILE_HITS" | head -2)
|
||||||
|
fi
|
||||||
|
done <<< "$CONTENT_HITS"
|
||||||
|
if [[ -n "$REAL_HITS" ]]; then
|
||||||
|
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||||
|
echo -en "$REAL_REPORT"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -350,19 +350,58 @@ _proxy_process = None
|
|||||||
# path during an upstream cert outage. Surfaced via ais_proxy_status() for
|
# path during an upstream cert outage. Surfaced via ais_proxy_status() for
|
||||||
# /api/health.
|
# /api/health.
|
||||||
_proxy_status: dict = {}
|
_proxy_status: dict = {}
|
||||||
|
# Upstream-connectivity telemetry (added when stream.aisstream.io went fully
|
||||||
|
# offline on 2026-05-23). ``_last_msg_at`` is the unix timestamp of the most
|
||||||
|
# recent vessel message received from the proxy. ``_proxy_spawn_count`` is
|
||||||
|
# how many times we've started the node proxy; combined with no recent
|
||||||
|
# messages it tells us the proxy is respawning in a tight loop because the
|
||||||
|
# upstream is unreachable. Surfaced via ais_proxy_status() so the operator
|
||||||
|
# can see "AIS is dead" instead of guessing whether it's their map filter,
|
||||||
|
# their api key, or upstream.
|
||||||
|
_last_msg_at: float = 0.0
|
||||||
|
_proxy_spawn_count: int = 0
|
||||||
_VESSEL_TRAIL_INTERVAL_S = 120
|
_VESSEL_TRAIL_INTERVAL_S = 120
|
||||||
_VESSEL_TRAIL_MAX_POINTS = 240
|
_VESSEL_TRAIL_MAX_POINTS = 240
|
||||||
|
|
||||||
|
|
||||||
def ais_proxy_status() -> dict:
|
# How stale "last vessel message" can be before we consider the stream
|
||||||
"""Return a copy of the latest ais_proxy.js status (issue #258).
|
# disconnected. AISStream typically pushes multiple messages/sec, so a 60s
|
||||||
|
# gap means something's wrong upstream or in transit.
|
||||||
|
_AIS_CONNECTED_FRESHNESS_S = 60
|
||||||
|
|
||||||
Currently surfaces ``degraded_tls`` (bool) which is true when the
|
|
||||||
proxy is using SPKI-pinned fallback because AISStream's cert expired.
|
def ais_proxy_status() -> dict:
|
||||||
Returns an empty dict when no status has been received yet.
|
"""Return a copy of the latest ais_proxy.js status + connectivity health.
|
||||||
|
|
||||||
|
Fields:
|
||||||
|
* ``degraded_tls`` (bool, issue #258) — true when the proxy is using
|
||||||
|
SPKI-pinned fallback because AISStream's cert expired.
|
||||||
|
* ``connected`` (bool) — true when we received a vessel message in
|
||||||
|
the last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||||
|
* ``last_msg_age_seconds`` (int | None) — seconds since the last
|
||||||
|
vessel message; None if we've never received one.
|
||||||
|
* ``proxy_spawn_count`` (int) — how many times we've spawned the
|
||||||
|
node proxy. Sustained increases here without ``connected`` means
|
||||||
|
we're respawning in a tight loop because upstream is dead.
|
||||||
|
|
||||||
|
Returns an empty dict when called before the AIS subsystem starts
|
||||||
|
(e.g. during tests or when no API key is set).
|
||||||
"""
|
"""
|
||||||
with _vessels_lock:
|
with _vessels_lock:
|
||||||
return dict(_proxy_status)
|
status = dict(_proxy_status)
|
||||||
|
last = _last_msg_at
|
||||||
|
spawns = _proxy_spawn_count
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if last > 0:
|
||||||
|
last_age = int(now - last)
|
||||||
|
status["last_msg_age_seconds"] = last_age
|
||||||
|
status["connected"] = last_age <= _AIS_CONNECTED_FRESHNESS_S
|
||||||
|
else:
|
||||||
|
status["last_msg_age_seconds"] = None
|
||||||
|
status["connected"] = False
|
||||||
|
status["proxy_spawn_count"] = spawns
|
||||||
|
return status
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@@ -588,8 +627,10 @@ def _ais_stream_loop():
|
|||||||
env=proxy_env,
|
env=proxy_env,
|
||||||
**popen_kwargs,
|
**popen_kwargs,
|
||||||
)
|
)
|
||||||
|
global _proxy_spawn_count
|
||||||
with _vessels_lock:
|
with _vessels_lock:
|
||||||
_proxy_process = process
|
_proxy_process = process
|
||||||
|
_proxy_spawn_count += 1
|
||||||
|
|
||||||
# Drain stderr in a background thread to prevent deadlock
|
# Drain stderr in a background thread to prevent deadlock
|
||||||
import threading
|
import threading
|
||||||
@@ -645,9 +686,15 @@ def _ais_stream_loop():
|
|||||||
if not mmsi:
|
if not mmsi:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Telemetry: stamp the timestamp of the most recent real
|
||||||
|
# vessel message. ais_proxy_status() reads this to decide
|
||||||
|
# whether the stream is currently "connected" — i.e. has
|
||||||
|
# any data flowed in the last 60s.
|
||||||
|
global _last_msg_at
|
||||||
with _vessels_lock:
|
with _vessels_lock:
|
||||||
|
_last_msg_at = time.time()
|
||||||
if mmsi not in _vessels:
|
if mmsi not in _vessels:
|
||||||
_vessels[mmsi] = {"_updated": time.time()}
|
_vessels[mmsi] = {"_updated": _last_msg_at}
|
||||||
vessel = _vessels[mmsi]
|
vessel = _vessels[mmsi]
|
||||||
|
|
||||||
# Update position from PositionReport or StandardClassBPositionReport
|
# Update position from PositionReport or StandardClassBPositionReport
|
||||||
|
|||||||
@@ -150,6 +150,31 @@ API_REGISTRY = [
|
|||||||
"url": "https://finnhub.io/register",
|
"url": "https://finnhub.io/register",
|
||||||
"required": False,
|
"required": False,
|
||||||
},
|
},
|
||||||
|
# Issue #298 (tg12): Sentinel Hub / Copernicus Data Space Ecosystem
|
||||||
|
# credentials were previously held in browser localStorage / sessionStorage
|
||||||
|
# by the Settings panel. Moved server-side to the same .env-backed
|
||||||
|
# store every other third-party API key lives in. The Sentinel proxy
|
||||||
|
# routes (POST /api/sentinel/token, /tile) now fall back to these
|
||||||
|
# env values when the request body omits credentials — see
|
||||||
|
# backend/routers/tools.py for the resolution order.
|
||||||
|
{
|
||||||
|
"id": "sentinel_client_id",
|
||||||
|
"env_key": "SENTINEL_CLIENT_ID",
|
||||||
|
"name": "Sentinel Hub / Copernicus — Client ID",
|
||||||
|
"description": "OAuth2 client ID for Copernicus Data Space Ecosystem (CDSE). Required for the Sentinel-2 imagery overlay and the right-click Sentinel-2 Intel Card. Sign in at dataspace.copernicus.eu and create OAuth credentials.",
|
||||||
|
"category": "Imagery",
|
||||||
|
"url": "https://dataspace.copernicus.eu/",
|
||||||
|
"required": False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "sentinel_client_secret",
|
||||||
|
"env_key": "SENTINEL_CLIENT_SECRET",
|
||||||
|
"name": "Sentinel Hub / Copernicus — Client Secret",
|
||||||
|
"description": "OAuth2 client secret paired with the Client ID above. Used by the backend to mint short-lived access tokens against the CDSE identity provider. Stored in the backend .env; never sent to the browser.",
|
||||||
|
"category": "Imagery",
|
||||||
|
"url": "https://dataspace.copernicus.eu/",
|
||||||
|
"required": False,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
ALLOWED_ENV_KEYS = {
|
ALLOWED_ENV_KEYS = {
|
||||||
|
|||||||
+407
-173
@@ -1,46 +1,90 @@
|
|||||||
"""
|
"""
|
||||||
Carrier Strike Group OSINT Tracker
|
Carrier Strike Group OSINT Tracker
|
||||||
===================================
|
===================================
|
||||||
Scrapes multiple OSINT sources to maintain current estimated positions
|
Maintains estimated positions for US Navy Carrier Strike Groups with
|
||||||
for US Navy Carrier Strike Groups. Updates on startup + 00:00 & 12:00 UTC.
|
honest provenance and freshness signals.
|
||||||
|
|
||||||
Sources:
|
Issues #244 / #245 / #246 (tg12 external audit):
|
||||||
1. GDELT News API — recent carrier movement headlines
|
|
||||||
2. WikiVoyage / public port-call databases
|
The previous implementation baked a snapshot of USNI News Fleet &
|
||||||
3. Fallback — last-known or static OSINT estimates
|
Marine Tracker positions (March 9, 2026) into the registry as
|
||||||
|
``fallback_lat``/``fallback_lng`` and stamped ``updated = now()``
|
||||||
|
every time the dossier was rendered. That presented stale editorial
|
||||||
|
data as live state. It also persisted GDELT-derived positions to the
|
||||||
|
on-disk cache with no freshness signal, so a single news mention from
|
||||||
|
months ago could keep overriding the (already-stale) registry default
|
||||||
|
indefinitely.
|
||||||
|
|
||||||
|
Architecture after this PR:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
backend/data/carrier_seed.json read-only, shipped with image,
|
||||||
|
used ONCE on first-ever startup
|
||||||
|
to bootstrap carrier_cache.json.
|
||||||
|
|
||||||
|
backend/data/carrier_cache.json mutable, lives in the runtime data
|
||||||
|
volume, written by every GDELT
|
||||||
|
refresh + any future source.
|
||||||
|
|
||||||
|
Startup flow:
|
||||||
|
|
||||||
|
1. ``carrier_cache.json`` exists? → load it.
|
||||||
|
2. Otherwise, copy ``carrier_seed.json`` → ``carrier_cache.json``,
|
||||||
|
then load it. (This happens once, ever, per install.)
|
||||||
|
3. Background: GDELT fetch runs. Any carrier mentioned in fresh news
|
||||||
|
gets its entry replaced with the news-derived position.
|
||||||
|
``position_source_at`` is set to the news article timestamp.
|
||||||
|
|
||||||
|
Freshness is a *labelling* decision, not an eviction decision:
|
||||||
|
|
||||||
|
- ``position_source_at`` within the configurable freshness window
|
||||||
|
(default 14 days) → ``position_confidence = "recent"``.
|
||||||
|
- Older than that → ``position_confidence = "stale"``.
|
||||||
|
- Bootstrapped from the seed file (never updated) → ``"seed"``.
|
||||||
|
- No cache entry at all (e.g. a carrier added to the registry after
|
||||||
|
first install) → carrier renders at its homeport with
|
||||||
|
``"homeport_default"``.
|
||||||
|
|
||||||
|
Carriers are never hidden, never teleported, never disappeared. The
|
||||||
|
position the user sees is always the last position the system actually
|
||||||
|
observed, with an honest "as-of" timestamp the UI can render however
|
||||||
|
it likes. A year from now, the runtime cache reflects whatever this
|
||||||
|
install has observed via GDELT — not the seed snapshot.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import os
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
import random
|
import random
|
||||||
from datetime import datetime, timezone
|
import shutil
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
# Carrier registry: hull number → metadata + fallback position
|
# Carrier registry: hull number → identity only.
|
||||||
|
#
|
||||||
|
# Issue #244 (tg12): the previous registry carried hard-coded
|
||||||
|
# ``fallback_lat``/``fallback_lng`` that were dated editorial
|
||||||
|
# snapshots from a 2026-03-09 article. Those fields are DELETED. The
|
||||||
|
# registry is now identity + homeport only; positions are sourced
|
||||||
|
# exclusively from carrier_cache.json (and via that, from the
|
||||||
|
# bootstrap seed or live OSINT).
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
CARRIER_REGISTRY: Dict[str, dict] = {
|
CARRIER_REGISTRY: Dict[str, dict] = {
|
||||||
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
|
|
||||||
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
|
|
||||||
# --- Bremerton, WA (Naval Base Kitsap) ---
|
# --- Bremerton, WA (Naval Base Kitsap) ---
|
||||||
# Distinct pier positions along Sinclair Inlet so carriers don't stack
|
|
||||||
"CVN-68": {
|
"CVN-68": {
|
||||||
"name": "USS Nimitz (CVN-68)",
|
"name": "USS Nimitz (CVN-68)",
|
||||||
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
||||||
"homeport": "Bremerton, WA",
|
"homeport": "Bremerton, WA",
|
||||||
"homeport_lat": 47.5535,
|
"homeport_lat": 47.5535,
|
||||||
"homeport_lng": -122.6400,
|
"homeport_lng": -122.6400,
|
||||||
"fallback_lat": 47.5535,
|
|
||||||
"fallback_lng": -122.6400,
|
|
||||||
"fallback_heading": 90,
|
|
||||||
"fallback_desc": "Bremerton, WA (Maintenance)",
|
|
||||||
},
|
},
|
||||||
"CVN-76": {
|
"CVN-76": {
|
||||||
"name": "USS Ronald Reagan (CVN-76)",
|
"name": "USS Ronald Reagan (CVN-76)",
|
||||||
@@ -48,23 +92,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Bremerton, WA",
|
"homeport": "Bremerton, WA",
|
||||||
"homeport_lat": 47.5580,
|
"homeport_lat": 47.5580,
|
||||||
"homeport_lng": -122.6360,
|
"homeport_lng": -122.6360,
|
||||||
"fallback_lat": 47.5580,
|
|
||||||
"fallback_lng": -122.6360,
|
|
||||||
"fallback_heading": 90,
|
|
||||||
"fallback_desc": "Bremerton, WA (Decommissioning)",
|
|
||||||
},
|
},
|
||||||
# --- Norfolk, VA (Naval Station Norfolk) ---
|
# --- Norfolk, VA (Naval Station Norfolk) ---
|
||||||
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
|
|
||||||
"CVN-69": {
|
"CVN-69": {
|
||||||
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
||||||
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
||||||
"homeport": "Norfolk, VA",
|
"homeport": "Norfolk, VA",
|
||||||
"homeport_lat": 36.9465,
|
"homeport_lat": 36.9465,
|
||||||
"homeport_lng": -76.3265,
|
"homeport_lng": -76.3265,
|
||||||
"fallback_lat": 36.9465,
|
|
||||||
"fallback_lng": -76.3265,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)",
|
|
||||||
},
|
},
|
||||||
"CVN-78": {
|
"CVN-78": {
|
||||||
"name": "USS Gerald R. Ford (CVN-78)",
|
"name": "USS Gerald R. Ford (CVN-78)",
|
||||||
@@ -72,10 +107,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Norfolk, VA",
|
"homeport": "Norfolk, VA",
|
||||||
"homeport_lat": 36.9505,
|
"homeport_lat": 36.9505,
|
||||||
"homeport_lng": -76.3250,
|
"homeport_lng": -76.3250,
|
||||||
"fallback_lat": 18.0,
|
|
||||||
"fallback_lng": 39.5,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
|
||||||
},
|
},
|
||||||
"CVN-74": {
|
"CVN-74": {
|
||||||
"name": "USS John C. Stennis (CVN-74)",
|
"name": "USS John C. Stennis (CVN-74)",
|
||||||
@@ -83,10 +114,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Norfolk, VA",
|
"homeport": "Norfolk, VA",
|
||||||
"homeport_lat": 36.9540,
|
"homeport_lat": 36.9540,
|
||||||
"homeport_lng": -76.3235,
|
"homeport_lng": -76.3235,
|
||||||
"fallback_lat": 36.98,
|
|
||||||
"fallback_lng": -76.43,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)",
|
|
||||||
},
|
},
|
||||||
"CVN-75": {
|
"CVN-75": {
|
||||||
"name": "USS Harry S. Truman (CVN-75)",
|
"name": "USS Harry S. Truman (CVN-75)",
|
||||||
@@ -94,10 +121,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Norfolk, VA",
|
"homeport": "Norfolk, VA",
|
||||||
"homeport_lat": 36.9580,
|
"homeport_lat": 36.9580,
|
||||||
"homeport_lng": -76.3220,
|
"homeport_lng": -76.3220,
|
||||||
"fallback_lat": 36.0,
|
|
||||||
"fallback_lng": 15.0,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
|
||||||
},
|
},
|
||||||
"CVN-77": {
|
"CVN-77": {
|
||||||
"name": "USS George H.W. Bush (CVN-77)",
|
"name": "USS George H.W. Bush (CVN-77)",
|
||||||
@@ -105,23 +128,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Norfolk, VA",
|
"homeport": "Norfolk, VA",
|
||||||
"homeport_lat": 36.9620,
|
"homeport_lat": 36.9620,
|
||||||
"homeport_lng": -76.3210,
|
"homeport_lng": -76.3210,
|
||||||
"fallback_lat": 36.5,
|
|
||||||
"fallback_lng": -74.0,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
|
||||||
},
|
},
|
||||||
# --- San Diego, CA (Naval Base San Diego) ---
|
# --- San Diego, CA (Naval Base San Diego) ---
|
||||||
# Carrier piers along the east shore of San Diego Bay, spread N-S
|
|
||||||
"CVN-70": {
|
"CVN-70": {
|
||||||
"name": "USS Carl Vinson (CVN-70)",
|
"name": "USS Carl Vinson (CVN-70)",
|
||||||
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
||||||
"homeport": "San Diego, CA",
|
"homeport": "San Diego, CA",
|
||||||
"homeport_lat": 32.6840,
|
"homeport_lat": 32.6840,
|
||||||
"homeport_lng": -117.1290,
|
"homeport_lng": -117.1290,
|
||||||
"fallback_lat": 32.6840,
|
|
||||||
"fallback_lng": -117.1290,
|
|
||||||
"fallback_heading": 180,
|
|
||||||
"fallback_desc": "San Diego, CA (Homeport)",
|
|
||||||
},
|
},
|
||||||
"CVN-71": {
|
"CVN-71": {
|
||||||
"name": "USS Theodore Roosevelt (CVN-71)",
|
"name": "USS Theodore Roosevelt (CVN-71)",
|
||||||
@@ -129,10 +143,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "San Diego, CA",
|
"homeport": "San Diego, CA",
|
||||||
"homeport_lat": 32.6885,
|
"homeport_lat": 32.6885,
|
||||||
"homeport_lng": -117.1280,
|
"homeport_lng": -117.1280,
|
||||||
"fallback_lat": 32.6885,
|
|
||||||
"fallback_lng": -117.1280,
|
|
||||||
"fallback_heading": 180,
|
|
||||||
"fallback_desc": "San Diego, CA (Maintenance)",
|
|
||||||
},
|
},
|
||||||
"CVN-72": {
|
"CVN-72": {
|
||||||
"name": "USS Abraham Lincoln (CVN-72)",
|
"name": "USS Abraham Lincoln (CVN-72)",
|
||||||
@@ -140,10 +150,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "San Diego, CA",
|
"homeport": "San Diego, CA",
|
||||||
"homeport_lat": 32.6925,
|
"homeport_lat": 32.6925,
|
||||||
"homeport_lng": -117.1275,
|
"homeport_lng": -117.1275,
|
||||||
"fallback_lat": 20.0,
|
|
||||||
"fallback_lng": 64.0,
|
|
||||||
"fallback_heading": 0,
|
|
||||||
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
|
||||||
},
|
},
|
||||||
# --- Yokosuka, Japan (CFAY) ---
|
# --- Yokosuka, Japan (CFAY) ---
|
||||||
"CVN-73": {
|
"CVN-73": {
|
||||||
@@ -152,16 +158,18 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
|||||||
"homeport": "Yokosuka, Japan",
|
"homeport": "Yokosuka, Japan",
|
||||||
"homeport_lat": 35.2830,
|
"homeport_lat": 35.2830,
|
||||||
"homeport_lng": 139.6700,
|
"homeport_lng": 139.6700,
|
||||||
"fallback_lat": 35.2830,
|
|
||||||
"fallback_lng": 139.6700,
|
|
||||||
"fallback_heading": 180,
|
|
||||||
"fallback_desc": "Yokosuka, Japan (Forward deployed)",
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
# Region → approximate center coordinates
|
# Region → approximate center coordinates.
|
||||||
# Used to map textual geographic descriptions to lat/lng
|
#
|
||||||
|
# Issue #245 (tg12): converting a region name straight into precise
|
||||||
|
# map coordinates is false precision. We still use this table to
|
||||||
|
# infer a coarse position from a headline mention, but the resulting
|
||||||
|
# carrier object is now stamped ``position_confidence = "approximate"``
|
||||||
|
# so the UI can render an uncertainty radius / dimmed icon. The
|
||||||
|
# centroid is a best-effort midpoint of the named body of water.
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
REGION_COORDS: Dict[str, tuple] = {
|
REGION_COORDS: Dict[str, tuple] = {
|
||||||
# Oceans & Seas
|
# Oceans & Seas
|
||||||
@@ -220,9 +228,39 @@ REGION_COORDS: Dict[str, tuple] = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
# Cache file for persisting positions between restarts
|
# Files
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
CACHE_FILE = Path(__file__).parent.parent / "carrier_cache.json"
|
#
|
||||||
|
# The seed lives in the read-only image data dir (it ships with each
|
||||||
|
# release). The cache lives in the same data dir but is written at
|
||||||
|
# runtime; under Docker compose this dir is volume-mounted so the
|
||||||
|
# cache persists across container restarts, which is the whole point
|
||||||
|
# of the seed-then-observe model — the user's runtime observations
|
||||||
|
# survive image upgrades.
|
||||||
|
SEED_FILE = Path(__file__).parent.parent / "data" / "carrier_seed.json"
|
||||||
|
CACHE_FILE = Path(__file__).parent.parent / "data" / "carrier_cache.json"
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# Freshness window for position_confidence labeling. Issue #246 (tg12):
|
||||||
|
# previously persisted cache entries had no freshness signal at all.
|
||||||
|
# After this change, the position itself is preserved (we never lose
|
||||||
|
# what was last observed) but the confidence label flips from
|
||||||
|
# "recent" to "stale" once the underlying source is older than this
|
||||||
|
# window. Operator-overridable via env var.
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
_DEFAULT_FRESHNESS_WINDOW_DAYS = 14
|
||||||
|
|
||||||
|
|
||||||
|
def _freshness_window_days() -> int:
|
||||||
|
raw = str(os.environ.get("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "") or "").strip()
|
||||||
|
if not raw:
|
||||||
|
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||||
|
try:
|
||||||
|
n = int(raw)
|
||||||
|
return n if n > 0 else _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||||
|
|
||||||
|
|
||||||
_carrier_positions: Dict[str, dict] = {}
|
_carrier_positions: Dict[str, dict] = {}
|
||||||
_positions_lock = threading.Lock()
|
_positions_lock = threading.Lock()
|
||||||
@@ -234,25 +272,159 @@ _GDELT_REQUEST_DELAY_SECONDS = 1.25
|
|||||||
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
||||||
|
|
||||||
|
|
||||||
|
def _now_iso() -> str:
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_iso(ts: str) -> Optional[datetime]:
|
||||||
|
if not ts:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
# Python's fromisoformat accepts +00:00 but not 'Z' until 3.11.
|
||||||
|
normalized = ts.replace("Z", "+00:00")
|
||||||
|
dt = datetime.fromisoformat(normalized)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_position_confidence(entry: dict, *, now: Optional[datetime] = None) -> str:
|
||||||
|
"""Return the public confidence label for a carrier cache entry.
|
||||||
|
|
||||||
|
Order of precedence:
|
||||||
|
- explicit "homeport_default" / "seed" labels are preserved.
|
||||||
|
- dated entries (with position_source_at) are "recent" if within
|
||||||
|
the configured freshness window, else "stale".
|
||||||
|
- missing position_source_at falls through to "stale".
|
||||||
|
"""
|
||||||
|
raw_label = str(entry.get("position_confidence", "") or "").strip()
|
||||||
|
# Explicit "kind of provenance" labels are preserved as-is. They
|
||||||
|
# describe HOW we got the position, not WHEN — a fresh headline-to-
|
||||||
|
# centroid match (#245) is still imprecise no matter how recently
|
||||||
|
# it was observed, and the seed (#244) is always the seed.
|
||||||
|
if raw_label in {"seed", "homeport_default", "approximate"}:
|
||||||
|
# Approximate entries can still age into "stale_approximate" if
|
||||||
|
# they fall out of the freshness window — that distinction lets
|
||||||
|
# the UI render a different badge for old-and-imprecise vs
|
||||||
|
# recent-and-imprecise. seed/homeport_default never age (they
|
||||||
|
# were never timestamped against real observations).
|
||||||
|
if raw_label == "approximate":
|
||||||
|
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||||
|
if source_at is not None:
|
||||||
|
reference = now or datetime.now(timezone.utc)
|
||||||
|
if reference - source_at > timedelta(days=_freshness_window_days()):
|
||||||
|
return "stale_approximate"
|
||||||
|
return raw_label
|
||||||
|
|
||||||
|
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||||
|
if not source_at:
|
||||||
|
return "stale"
|
||||||
|
|
||||||
|
reference = now or datetime.now(timezone.utc)
|
||||||
|
window = timedelta(days=_freshness_window_days())
|
||||||
|
if reference - source_at <= window:
|
||||||
|
return "recent"
|
||||||
|
return "stale"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_seed() -> Dict[str, dict]:
|
||||||
|
"""Load the read-only seed file shipped with the image.
|
||||||
|
|
||||||
|
Returns a hull→entry dict (no _meta wrapper). Missing or malformed
|
||||||
|
seed files yield an empty dict — the caller falls back to homeport
|
||||||
|
defaults.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not SEED_FILE.exists():
|
||||||
|
logger.info("Carrier seed file not present at %s; first-run will fall back to homeport defaults", SEED_FILE)
|
||||||
|
return {}
|
||||||
|
raw = json.loads(SEED_FILE.read_text(encoding="utf-8"))
|
||||||
|
carriers = raw.get("carriers", {}) if isinstance(raw, dict) else {}
|
||||||
|
if not isinstance(carriers, dict):
|
||||||
|
return {}
|
||||||
|
logger.info("Carrier seed loaded: %d entries from %s", len(carriers), SEED_FILE)
|
||||||
|
return carriers
|
||||||
|
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||||
|
logger.warning("Failed to load carrier seed file %s: %s", SEED_FILE, e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def _load_cache() -> Dict[str, dict]:
|
def _load_cache() -> Dict[str, dict]:
|
||||||
"""Load cached carrier positions from disk."""
|
"""Load the mutable cache (last-known positions persisted between restarts)."""
|
||||||
try:
|
try:
|
||||||
if CACHE_FILE.exists():
|
if CACHE_FILE.exists():
|
||||||
data = json.loads(CACHE_FILE.read_text())
|
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||||
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
|
if isinstance(data, dict):
|
||||||
return data
|
logger.info("Carrier cache loaded: %d carriers from %s", len(data), CACHE_FILE)
|
||||||
|
return data
|
||||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||||
logger.warning(f"Failed to load carrier cache: {e}")
|
logger.warning("Failed to load carrier cache: %s", e)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def _save_cache(positions: Dict[str, dict]):
|
def _save_cache(positions: Dict[str, dict]) -> None:
|
||||||
"""Persist carrier positions to disk."""
|
"""Persist the mutable cache. Atomic write (temp + rename) so a crash
|
||||||
|
mid-write can't leave the file truncated."""
|
||||||
try:
|
try:
|
||||||
CACHE_FILE.write_text(json.dumps(positions, indent=2))
|
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
logger.info(f"Carrier cache saved: {len(positions)} carriers")
|
tmp = CACHE_FILE.with_suffix(CACHE_FILE.suffix + ".tmp")
|
||||||
|
tmp.write_text(json.dumps(positions, indent=2), encoding="utf-8")
|
||||||
|
# On Windows os.replace is atomic and overwrites existing files.
|
||||||
|
os.replace(tmp, CACHE_FILE)
|
||||||
|
logger.info("Carrier cache saved: %d carriers", len(positions))
|
||||||
except (IOError, OSError) as e:
|
except (IOError, OSError) as e:
|
||||||
logger.warning(f"Failed to save carrier cache: {e}")
|
logger.warning("Failed to save carrier cache: %s", e)
|
||||||
|
|
||||||
|
|
||||||
|
def _homeport_entry_for(hull: str) -> Optional[dict]:
|
||||||
|
"""Return a homeport-default cache entry for a hull, or None if the
|
||||||
|
hull is not in the registry."""
|
||||||
|
info = CARRIER_REGISTRY.get(hull)
|
||||||
|
if not info:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"lat": info["homeport_lat"],
|
||||||
|
"lng": info["homeport_lng"],
|
||||||
|
"heading": 0,
|
||||||
|
"desc": f"{info['homeport']} (no observations yet)",
|
||||||
|
"source": f"Homeport default ({info['homeport']})",
|
||||||
|
"source_url": info.get("wiki", ""),
|
||||||
|
"position_source_at": _now_iso(),
|
||||||
|
"position_confidence": "homeport_default",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _bootstrap_cache_if_missing() -> Dict[str, dict]:
|
||||||
|
"""One-shot: if no cache exists, materialize one from the seed file.
|
||||||
|
|
||||||
|
Returns the cache contents (hull→entry). On first-ever startup,
|
||||||
|
this writes ``carrier_cache.json`` so subsequent restarts skip the
|
||||||
|
seed entirely. Operator-deleted caches re-bootstrap the same way —
|
||||||
|
operators can use that to "reset" carrier positions, but it's an
|
||||||
|
explicit operator action.
|
||||||
|
"""
|
||||||
|
if CACHE_FILE.exists():
|
||||||
|
return _load_cache()
|
||||||
|
|
||||||
|
seed = _load_seed()
|
||||||
|
if not seed:
|
||||||
|
# No seed file either. Build a homeport-default cache so the
|
||||||
|
# first save_cache call still produces something honest.
|
||||||
|
homeports: Dict[str, dict] = {}
|
||||||
|
for hull in CARRIER_REGISTRY:
|
||||||
|
entry = _homeport_entry_for(hull)
|
||||||
|
if entry is not None:
|
||||||
|
homeports[hull] = entry
|
||||||
|
if homeports:
|
||||||
|
_save_cache(homeports)
|
||||||
|
return homeports
|
||||||
|
|
||||||
|
# Persist the seed as the first cache so subsequent runs skip this branch.
|
||||||
|
_save_cache(seed)
|
||||||
|
logger.info("Carrier cache bootstrapped from seed (first-ever startup)")
|
||||||
|
return dict(seed)
|
||||||
|
|
||||||
|
|
||||||
def _match_region(text: str) -> Optional[tuple]:
|
def _match_region(text: str) -> Optional[tuple]:
|
||||||
@@ -270,10 +442,8 @@ def _match_carrier(text: str) -> Optional[str]:
|
|||||||
for hull, info in CARRIER_REGISTRY.items():
|
for hull, info in CARRIER_REGISTRY.items():
|
||||||
hull_check = hull.lower().replace("-", "")
|
hull_check = hull.lower().replace("-", "")
|
||||||
name_parts = info["name"].lower()
|
name_parts = info["name"].lower()
|
||||||
# Match hull number (e.g., "CVN-78", "CVN78")
|
|
||||||
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
||||||
return hull
|
return hull
|
||||||
# Match ship name (e.g., "Ford", "Eisenhower", "Vinson")
|
|
||||||
ship_name = name_parts.split("(")[0].strip()
|
ship_name = name_parts.split("(")[0].strip()
|
||||||
last_name = ship_name.split()[-1] if ship_name else ""
|
last_name = ship_name.split()[-1] if ship_name else ""
|
||||||
if last_name and len(last_name) > 3 and last_name in text_lower:
|
if last_name and len(last_name) > 3 and last_name in text_lower:
|
||||||
@@ -323,8 +493,9 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
|||||||
articles = data.get("articles", [])
|
articles = data.get("articles", [])
|
||||||
for art in articles:
|
for art in articles:
|
||||||
title = art.get("title", "")
|
title = art.get("title", "")
|
||||||
url = art.get("url", "")
|
article_url = art.get("url", "")
|
||||||
results.append({"title": title, "url": url})
|
article_at = art.get("seendate") or art.get("date") or ""
|
||||||
|
results.append({"title": title, "url": article_url, "seendate": article_at})
|
||||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||||
logger.debug(f"GDELT search failed for '{term}': {e}")
|
logger.debug(f"GDELT search failed for '{term}': {e}")
|
||||||
continue
|
continue
|
||||||
@@ -340,108 +511,175 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _gdelt_seendate_to_iso(seendate: str) -> Optional[str]:
|
||||||
|
"""GDELT returns YYYYMMDDhhmmss (UTC). Convert to ISO8601 for
|
||||||
|
position_source_at. Returns None if the input is unparseable."""
|
||||||
|
raw = (seendate or "").strip()
|
||||||
|
if len(raw) < 8 or not raw.isdigit():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(raw[:14] if len(raw) >= 14 else raw[:8] + "000000", "%Y%m%d%H%M%S")
|
||||||
|
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
||||||
"""Parse carrier positions from news article titles and descriptions."""
|
"""Parse carrier positions from news article titles.
|
||||||
|
|
||||||
|
Issue #245 (tg12): the position is a region centroid, which is
|
||||||
|
coarse — we now stamp ``position_confidence = "approximate"`` so
|
||||||
|
the UI can render that uncertainty. Issue #244: the
|
||||||
|
``position_source_at`` field is the news article's actual seen
|
||||||
|
date, NOT now(), so the freshness check correctly flips entries
|
||||||
|
to "stale" once they age past the configured window.
|
||||||
|
"""
|
||||||
updates: Dict[str, dict] = {}
|
updates: Dict[str, dict] = {}
|
||||||
|
|
||||||
for article in articles:
|
for article in articles:
|
||||||
title = article.get("title", "")
|
title = article.get("title", "")
|
||||||
|
|
||||||
# Try to match a carrier from the title
|
|
||||||
hull = _match_carrier(title)
|
hull = _match_carrier(title)
|
||||||
if not hull:
|
if not hull:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Try to match a region from the title
|
|
||||||
coords = _match_region(title)
|
coords = _match_region(title)
|
||||||
if not coords:
|
if not coords:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Only update if we haven't seen this carrier yet (first match wins — most recent)
|
# First match wins (most recent article, GDELT returns newest first
|
||||||
|
# per term).
|
||||||
if hull not in updates:
|
if hull not in updates:
|
||||||
|
iso_at = _gdelt_seendate_to_iso(str(article.get("seendate", ""))) or _now_iso()
|
||||||
updates[hull] = {
|
updates[hull] = {
|
||||||
"lat": coords[0],
|
"lat": coords[0],
|
||||||
"lng": coords[1],
|
"lng": coords[1],
|
||||||
|
"heading": 0,
|
||||||
"desc": title[:100],
|
"desc": title[:100],
|
||||||
"source": "GDELT News API",
|
"source": "GDELT News API (headline region match — approximate)",
|
||||||
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
||||||
"updated": datetime.now(timezone.utc).isoformat(),
|
"position_source_at": iso_at,
|
||||||
|
# Headline-to-centroid match is explicitly approximate.
|
||||||
|
"position_confidence": "approximate",
|
||||||
}
|
}
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Carrier update: {CARRIER_REGISTRY[hull]['name']} → {coords} (from: {title[:80]})"
|
"Carrier update: %s → %s (from: %s)",
|
||||||
|
CARRIER_REGISTRY[hull]["name"],
|
||||||
|
coords,
|
||||||
|
title[:80],
|
||||||
)
|
)
|
||||||
|
|
||||||
return updates
|
return updates
|
||||||
|
|
||||||
|
|
||||||
def _load_carrier_fallbacks() -> Dict[str, dict]:
|
def _enrich_for_rendering(hull: str, entry: dict, *, now: Optional[datetime] = None) -> dict:
|
||||||
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
|
"""Add live computed fields (confidence label, last_osint_update)
|
||||||
positions: Dict[str, dict] = {}
|
on top of the persisted cache entry. The persisted entry is left
|
||||||
for hull, info in CARRIER_REGISTRY.items():
|
untouched; this function builds the public-facing object.
|
||||||
positions[hull] = {
|
"""
|
||||||
"name": info["name"],
|
info = CARRIER_REGISTRY.get(hull, {})
|
||||||
"lat": info["fallback_lat"],
|
confidence = _compute_position_confidence(entry, now=now)
|
||||||
"lng": info["fallback_lng"],
|
return {
|
||||||
"heading": info["fallback_heading"],
|
"name": entry.get("name", info.get("name", hull)),
|
||||||
"desc": info["fallback_desc"],
|
"lat": entry["lat"],
|
||||||
"wiki": info["wiki"],
|
"lng": entry["lng"],
|
||||||
"source": "USNI News Fleet & Marine Tracker",
|
"heading": entry.get("heading", 0),
|
||||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
"desc": entry.get("desc", ""),
|
||||||
"updated": datetime.now(timezone.utc).isoformat(),
|
"wiki": entry.get("wiki", info.get("wiki", "")),
|
||||||
}
|
"source": entry.get("source", "OSINT estimated position"),
|
||||||
|
"source_url": entry.get("source_url", ""),
|
||||||
# Overlay cached positions from previous runs (may have GDELT data)
|
"position_source_at": entry.get("position_source_at", ""),
|
||||||
cached = _load_cache()
|
"position_confidence": confidence,
|
||||||
for hull, cached_pos in cached.items():
|
# Existing field preserved for backward compatibility with the
|
||||||
if hull in positions:
|
# current frontend ShipPopup; now reflects the SOURCE's observed
|
||||||
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get(
|
# time (not now()), so "last reported X days ago" is honest.
|
||||||
"source", ""
|
"last_osint_update": entry.get("position_source_at", ""),
|
||||||
).startswith("News"):
|
# Convenience boolean for the UI: true when the position is
|
||||||
positions[hull].update(
|
# NOT live OSINT (used to render dimmed icons / badges).
|
||||||
{
|
"is_fallback": confidence in {"seed", "stale", "stale_approximate", "homeport_default"},
|
||||||
"lat": cached_pos["lat"],
|
}
|
||||||
"lng": cached_pos["lng"],
|
|
||||||
"desc": cached_pos.get("desc", positions[hull]["desc"]),
|
|
||||||
"source": cached_pos.get("source", "Cached OSINT"),
|
|
||||||
"updated": cached_pos.get("updated", ""),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return positions
|
|
||||||
|
|
||||||
|
|
||||||
def update_carrier_positions():
|
def update_carrier_positions() -> None:
|
||||||
"""Main update function — called on startup and every 12h.
|
"""Refresh carrier positions.
|
||||||
|
|
||||||
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
|
Phase 1 (instant): publish whatever's in carrier_cache.json (or
|
||||||
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
|
bootstrap from seed on first-ever run), so the map has carriers
|
||||||
|
immediately.
|
||||||
|
|
||||||
|
Phase 2 (slow): query GDELT and replace position entries for any
|
||||||
|
carrier mentioned in fresh news. Persist back to cache.
|
||||||
"""
|
"""
|
||||||
global _last_update
|
global _last_update
|
||||||
|
|
||||||
# --- Phase 1: instant fallback + cache ---
|
# --- Phase 1: instant cache (bootstrap from seed on first-ever run) ---
|
||||||
positions = _load_carrier_fallbacks()
|
positions = _bootstrap_cache_if_missing()
|
||||||
|
|
||||||
|
# Ensure every registered hull has SOMETHING in the cache. A hull
|
||||||
|
# the seed didn't cover (e.g. added after install) renders at its
|
||||||
|
# homeport with "homeport_default" confidence.
|
||||||
|
for hull in CARRIER_REGISTRY:
|
||||||
|
if hull not in positions:
|
||||||
|
entry = _homeport_entry_for(hull)
|
||||||
|
if entry is not None:
|
||||||
|
positions[hull] = entry
|
||||||
|
|
||||||
with _positions_lock:
|
with _positions_lock:
|
||||||
# Only overwrite if positions are currently empty (first startup).
|
|
||||||
# If we already have data from a previous cycle, keep it while GDELT runs.
|
|
||||||
if not _carrier_positions:
|
if not _carrier_positions:
|
||||||
_carrier_positions.update(positions)
|
_carrier_positions.update(positions)
|
||||||
_last_update = datetime.now(timezone.utc)
|
_last_update = datetime.now(timezone.utc)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)"
|
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||||
|
len(positions),
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- Phase 2: slow GDELT enrichment ---
|
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||||
|
#
|
||||||
|
# USNI publishes a weekly editorial tracker with each carrier's
|
||||||
|
# actual operating area, parsed from explicit prose like
|
||||||
|
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||||
|
# These positions are tagged ``position_confidence: "recent"`` because
|
||||||
|
# they reflect actual reporting, not headline-keyword centroids.
|
||||||
|
# USNI updates are preferred over GDELT — they're authoritative on
|
||||||
|
# US Navy positions where GDELT is just article-title text mining.
|
||||||
|
try:
|
||||||
|
from services.fetchers.usni_fleet_tracker import (
|
||||||
|
fetch_latest_fleet_tracker_positions,
|
||||||
|
)
|
||||||
|
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||||
|
for hull, pos in usni_positions.items():
|
||||||
|
positions[hull] = pos
|
||||||
|
logger.info(
|
||||||
|
"Carrier USNI update: %s → %s",
|
||||||
|
CARRIER_REGISTRY[hull]["name"],
|
||||||
|
pos.get("desc", ""),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||||
|
|
||||||
|
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||||
|
#
|
||||||
|
# Used only to backfill carriers USNI didn't mention this week. The
|
||||||
|
# position is stamped ``approximate`` so the UI knows it's a
|
||||||
|
# headline-centroid match (Issue #245).
|
||||||
try:
|
try:
|
||||||
articles = _fetch_gdelt_carrier_news()
|
articles = _fetch_gdelt_carrier_news()
|
||||||
news_positions = _parse_carrier_positions_from_news(articles)
|
news_positions = _parse_carrier_positions_from_news(articles)
|
||||||
for hull, pos in news_positions.items():
|
for hull, pos in news_positions.items():
|
||||||
if hull in positions:
|
# Only overwrite if the existing entry is NOT a recent USNI
|
||||||
positions[hull].update(pos)
|
# observation. A "recent" USNI position is higher-confidence
|
||||||
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
|
# than a GDELT headline-centroid match — don't let GDELT
|
||||||
|
# demote a real position to an approximate one.
|
||||||
|
existing = positions.get(hull, {})
|
||||||
|
existing_conf = _compute_position_confidence(existing)
|
||||||
|
if existing_conf == "recent":
|
||||||
|
continue
|
||||||
|
positions[hull] = pos
|
||||||
|
logger.info(
|
||||||
|
"Carrier OSINT: updated %s from GDELT news",
|
||||||
|
CARRIER_REGISTRY[hull]["name"],
|
||||||
|
)
|
||||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||||
logger.warning(f"GDELT carrier fetch failed: {e}")
|
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||||
|
|
||||||
# Save and update the global state with enriched positions
|
|
||||||
with _positions_lock:
|
with _positions_lock:
|
||||||
_carrier_positions.clear()
|
_carrier_positions.clear()
|
||||||
_carrier_positions.update(positions)
|
_carrier_positions.update(positions)
|
||||||
@@ -449,21 +687,15 @@ def update_carrier_positions():
|
|||||||
|
|
||||||
_save_cache(positions)
|
_save_cache(positions)
|
||||||
|
|
||||||
sources = {}
|
confidences: Dict[str, int] = {}
|
||||||
for p in positions.values():
|
for entry in positions.values():
|
||||||
src = p.get("source", "unknown")
|
label = _compute_position_confidence(entry)
|
||||||
sources[src] = sources.get(src, 0) + 1
|
confidences[label] = confidences.get(label, 0) + 1
|
||||||
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
|
logger.info("Carrier tracker: %d carriers updated. Confidence: %s", len(positions), confidences)
|
||||||
|
|
||||||
|
|
||||||
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||||
"""Offset carriers that share identical coordinates so they don't stack.
|
"""Offset carriers that share identical coordinates so they don't stack."""
|
||||||
|
|
||||||
At port: offset along the pier axis (~500m / 0.004° apart).
|
|
||||||
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
|
|
||||||
so they're visibly separate but clearly operating together.
|
|
||||||
"""
|
|
||||||
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
groups: dict[str, list[int]] = defaultdict(list)
|
groups: dict[str, list[int]] = defaultdict(list)
|
||||||
@@ -475,7 +707,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
|||||||
if len(indices) < 2:
|
if len(indices) < 2:
|
||||||
continue
|
continue
|
||||||
n = len(indices)
|
n = len(indices)
|
||||||
# Determine if this is a port (near a homeport) or at sea
|
|
||||||
sample = result[indices[0]]
|
sample = result[indices[0]]
|
||||||
at_port = any(
|
at_port = any(
|
||||||
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
||||||
@@ -484,7 +715,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if at_port:
|
if at_port:
|
||||||
# Use each carrier's distinct homeport pier coordinates
|
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
carrier = result[idx]
|
carrier = result[idx]
|
||||||
hull = None
|
hull = None
|
||||||
@@ -497,8 +727,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
|||||||
carrier["lat"] = info["homeport_lat"]
|
carrier["lat"] = info["homeport_lat"]
|
||||||
carrier["lng"] = info["homeport_lng"]
|
carrier["lng"] = info["homeport_lng"]
|
||||||
else:
|
else:
|
||||||
# At sea: spread in a line perpendicular to travel (~0.08° apart)
|
spacing = 0.08
|
||||||
spacing = 0.08 # ~9km — close enough to see they're together
|
|
||||||
start_offset = -(n - 1) * spacing / 2
|
start_offset = -(n - 1) * spacing / 2
|
||||||
for j, idx in enumerate(indices):
|
for j, idx in enumerate(indices):
|
||||||
result[idx]["lng"] += start_offset + j * spacing
|
result[idx]["lng"] += start_offset + j * spacing
|
||||||
@@ -507,36 +736,44 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
|||||||
|
|
||||||
|
|
||||||
def get_carrier_positions() -> List[dict]:
|
def get_carrier_positions() -> List[dict]:
|
||||||
"""Return current carrier positions for the data pipeline."""
|
"""Return current carrier positions for the data pipeline.
|
||||||
|
|
||||||
|
Each entry has the full provenance + freshness fields; the UI can
|
||||||
|
decide how to render them. Carriers are never hidden — only
|
||||||
|
labeled.
|
||||||
|
"""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
with _positions_lock:
|
with _positions_lock:
|
||||||
result = []
|
result: List[dict] = []
|
||||||
for hull, pos in _carrier_positions.items():
|
for hull, entry in _carrier_positions.items():
|
||||||
info = CARRIER_REGISTRY.get(hull, {})
|
enriched = _enrich_for_rendering(hull, entry, now=now)
|
||||||
result.append(
|
result.append(
|
||||||
{
|
{
|
||||||
"name": pos.get("name", info.get("name", hull)),
|
"name": enriched["name"],
|
||||||
"type": "carrier",
|
"type": "carrier",
|
||||||
"lat": pos["lat"],
|
"lat": enriched["lat"],
|
||||||
"lng": pos["lng"],
|
"lng": enriched["lng"],
|
||||||
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
|
"heading": None, # OSINT cannot determine true heading.
|
||||||
"sog": 0,
|
"sog": 0,
|
||||||
"cog": 0,
|
"cog": 0,
|
||||||
"country": "United States",
|
"country": "United States",
|
||||||
"desc": pos.get("desc", ""),
|
"desc": enriched["desc"],
|
||||||
"wiki": pos.get("wiki", info.get("wiki", "")),
|
"wiki": enriched["wiki"],
|
||||||
"estimated": True,
|
"estimated": True,
|
||||||
"source": pos.get("source", "OSINT estimated position"),
|
"source": enriched["source"],
|
||||||
"source_url": pos.get(
|
"source_url": enriched["source_url"],
|
||||||
"source_url", "https://news.usni.org/category/fleet-tracker"
|
"last_osint_update": enriched["last_osint_update"],
|
||||||
),
|
# New fields (additive — existing UI continues to work):
|
||||||
"last_osint_update": pos.get("updated", ""),
|
"position_source_at": enriched["position_source_at"],
|
||||||
|
"position_confidence": enriched["position_confidence"],
|
||||||
|
"is_fallback": enriched["is_fallback"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return _deconflict_positions(result)
|
return _deconflict_positions(result)
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily
|
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily.
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
_scheduler_thread: Optional[threading.Thread] = None
|
_scheduler_thread: Optional[threading.Thread] = None
|
||||||
_scheduler_stop = threading.Event()
|
_scheduler_stop = threading.Event()
|
||||||
@@ -544,7 +781,6 @@ _scheduler_stop = threading.Event()
|
|||||||
|
|
||||||
def _scheduler_loop():
|
def _scheduler_loop():
|
||||||
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
||||||
# Initial update on startup
|
|
||||||
try:
|
try:
|
||||||
update_carrier_positions()
|
update_carrier_positions()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -552,7 +788,6 @@ def _scheduler_loop():
|
|||||||
|
|
||||||
while not _scheduler_stop.is_set():
|
while not _scheduler_stop.is_set():
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
# Next target: 00:00 or 12:00 UTC, whichever is sooner
|
|
||||||
hour = now.hour
|
hour = now.hour
|
||||||
if hour < 12:
|
if hour < 12:
|
||||||
next_hour = 12
|
next_hour = 12
|
||||||
@@ -561,18 +796,17 @@ def _scheduler_loop():
|
|||||||
|
|
||||||
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
||||||
if next_hour == 24:
|
if next_hour == 24:
|
||||||
from datetime import timedelta
|
|
||||||
|
|
||||||
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
wait_seconds = (next_run - now).total_seconds()
|
wait_seconds = (next_run - now).total_seconds()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Carrier tracker: next update at {next_run.isoformat()} ({wait_seconds/3600:.1f}h)"
|
"Carrier tracker: next update at %s (%.1fh)",
|
||||||
|
next_run.isoformat(),
|
||||||
|
wait_seconds / 3600,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Wait until next scheduled time, or until stop event
|
|
||||||
if _scheduler_stop.wait(timeout=wait_seconds):
|
if _scheduler_stop.wait(timeout=wait_seconds):
|
||||||
break # Stop event was set
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
update_carrier_positions()
|
update_carrier_positions()
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class Settings(BaseSettings):
|
|||||||
MESH_ARTI_ENABLED: bool = False
|
MESH_ARTI_ENABLED: bool = False
|
||||||
MESH_ARTI_SOCKS_PORT: int = 9050
|
MESH_ARTI_SOCKS_PORT: int = 9050
|
||||||
MESH_RELAY_PEERS: str = ""
|
MESH_RELAY_PEERS: str = ""
|
||||||
|
MESH_PUBLIC_PEER_URL: str = ""
|
||||||
# Bootstrap seeds are discovery hints, not authoritative network roots.
|
# Bootstrap seeds are discovery hints, not authoritative network roots.
|
||||||
# Nodes promote healthy discovered peers from the store/manifest over time.
|
# Nodes promote healthy discovered peers from the store/manifest over time.
|
||||||
MESH_BOOTSTRAP_SEED_PEERS: str = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
MESH_BOOTSTRAP_SEED_PEERS: str = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||||
@@ -53,6 +54,12 @@ class Settings(BaseSettings):
|
|||||||
MESH_RELAY_FAILURE_COOLDOWN_S: int = 120
|
MESH_RELAY_FAILURE_COOLDOWN_S: int = 120
|
||||||
MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S: int = 15
|
MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S: int = 15
|
||||||
MESH_PEER_PUSH_SECRET: str = ""
|
MESH_PEER_PUSH_SECRET: str = ""
|
||||||
|
# Issue #256 (tg12): optional per-peer HMAC secret map. Comma-separated
|
||||||
|
# `url=secret` pairs. When a peer URL appears here, only that per-peer
|
||||||
|
# secret is accepted for it — the global MESH_PEER_PUSH_SECRET above is
|
||||||
|
# ignored for that specific URL. Single-peer installs and unmigrated
|
||||||
|
# multi-peer installs leave this empty and behavior is unchanged.
|
||||||
|
MESH_PEER_SECRETS: str = ""
|
||||||
MESH_RNS_APP_NAME: str = "shadowbroker"
|
MESH_RNS_APP_NAME: str = "shadowbroker"
|
||||||
MESH_RNS_ASPECT: str = "infonet"
|
MESH_RNS_ASPECT: str = "infonet"
|
||||||
MESH_RNS_IDENTITY_PATH: str = ""
|
MESH_RNS_IDENTITY_PATH: str = ""
|
||||||
@@ -110,6 +117,21 @@ class Settings(BaseSettings):
|
|||||||
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
||||||
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
||||||
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
||||||
|
# Anti-spam: cap on distinct UNACKED messages a single sender can have
|
||||||
|
# parked in a single recipient's mailbox at any one time. Once the
|
||||||
|
# recipient pulls (acks) a message, the sender's quota for that pair
|
||||||
|
# frees up. Default 2 — a sender who wants to deliver more must wait
|
||||||
|
# for the recipient to actually read the prior messages.
|
||||||
|
#
|
||||||
|
# This cap is enforced TWICE: once on the local deposit path (the
|
||||||
|
# sender's own node refuses to spool the 3rd message) AND once on
|
||||||
|
# the replication-acceptance path (honest peer relays refuse to
|
||||||
|
# accept inbound replicas that would put them over the cap). The
|
||||||
|
# double enforcement makes the rule a NETWORK rule — patching out
|
||||||
|
# the local check on a hostile sender's relay doesn't let extras
|
||||||
|
# propagate, because every honest peer enforces the same cap on
|
||||||
|
# inbound replication.
|
||||||
|
MESH_DM_PENDING_PER_SENDER_LIMIT: int = 2
|
||||||
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
||||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
||||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
||||||
@@ -289,6 +311,19 @@ class Settings(BaseSettings):
|
|||||||
# service operator can identify per-install traffic instead of a generic
|
# service operator can identify per-install traffic instead of a generic
|
||||||
# "ShadowBroker" aggregate.
|
# "ShadowBroker" aggregate.
|
||||||
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
||||||
|
# Per-install operator handle used in the User-Agent for EVERY third-party
|
||||||
|
# API the backend calls (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz,
|
||||||
|
# Broadcastify, weather.gov, NUFORC, etc.). The default is empty, in which
|
||||||
|
# case backend/services/network_utils.py auto-generates a stable
|
||||||
|
# pseudonymous handle like "operator-7f3a92" on first use and caches it.
|
||||||
|
# Operators who want to identify themselves with a real handle can set
|
||||||
|
# this; operators who want to stay pseudonymous can leave it empty.
|
||||||
|
#
|
||||||
|
# The handle is sent ONLY to public third-party APIs. It is NEVER mixed
|
||||||
|
# into mesh / Wormhole / Infonet identity (those have their own crypto
|
||||||
|
# identity layer; conflating the two would leak public attribution into
|
||||||
|
# private mesh state).
|
||||||
|
OPERATOR_HANDLE: str = ""
|
||||||
|
|
||||||
# SAR (Synthetic Aperture Radar) data layer
|
# SAR (Synthetic Aperture Radar) data layer
|
||||||
# Mode A — free catalog metadata, no account, default-on
|
# Mode A — free catalog metadata, no account, default-on
|
||||||
|
|||||||
@@ -11,8 +11,13 @@ DEFAULT_TRAIL_TTL_S = 300 # 5 min - trail TTL for non-tracked flights
|
|||||||
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
|
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
|
||||||
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
|
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
|
||||||
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
|
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
|
||||||
GPS_JAMMING_MIN_RATIO = 0.30 # 30% degraded aircraft to flag zone
|
# Tuned 2026-05: previously 0.30 / 5 aircraft which — combined with the
|
||||||
GPS_JAMMING_MIN_AIRCRAFT = 5 # Min aircraft in grid cell for statistical significance
|
# -1 noise cushion in the detector AND the pre-fix nac_p==0 filter that
|
||||||
|
# discarded jamming victims — meant the layer almost never lit up.
|
||||||
|
# Lowering the bar so genuine jamming zones with sparser ADS-B coverage
|
||||||
|
# clear (eastern Med, Russia/Ukraine border, Iran/Iraq).
|
||||||
|
GPS_JAMMING_MIN_RATIO = 0.20 # 20% degraded aircraft to flag zone
|
||||||
|
GPS_JAMMING_MIN_AIRCRAFT = 3 # Min aircraft in grid cell for statistical significance
|
||||||
|
|
||||||
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
|
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
|
||||||
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
|
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
|
||||||
|
|||||||
@@ -777,6 +777,39 @@ def start_scheduler():
|
|||||||
misfire_grace_time=60,
|
misfire_grace_time=60,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Flight observation pruning — drops icao24 → first_seen_at entries we
|
||||||
|
# haven't seen in an hour. Same cadence as AIS prune for symmetry; the
|
||||||
|
# per-tick scan is O(in-flight aircraft) so it's cheap.
|
||||||
|
from services.fetchers.flight_observations import prune as _prune_flight_observations
|
||||||
|
_scheduler.add_job(
|
||||||
|
lambda: _run_task_with_health(_prune_flight_observations, "prune_flight_observations"),
|
||||||
|
"interval",
|
||||||
|
minutes=5,
|
||||||
|
id="flight_observation_prune",
|
||||||
|
max_instances=1,
|
||||||
|
misfire_grace_time=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
# AISHub REST fallback — slow polling when the AISStream WebSocket
|
||||||
|
# primary is offline. Configurable interval via
|
||||||
|
# AISHUB_POLL_INTERVAL_MINUTES env (default 20 min). Operator must
|
||||||
|
# set AISHUB_USERNAME to opt in. The fetcher is gated internally on
|
||||||
|
# the primary being disconnected, so this job is cheap when the
|
||||||
|
# WebSocket is healthy (early-returns after a status check).
|
||||||
|
from services.fetchers.aishub_fallback import (
|
||||||
|
aishub_poll_interval_minutes,
|
||||||
|
fetch_aishub_vessels,
|
||||||
|
)
|
||||||
|
_aishub_interval = aishub_poll_interval_minutes()
|
||||||
|
_scheduler.add_job(
|
||||||
|
lambda: _run_task_with_health(fetch_aishub_vessels, "fetch_aishub_vessels"),
|
||||||
|
"interval",
|
||||||
|
minutes=_aishub_interval,
|
||||||
|
id="aishub_fallback",
|
||||||
|
max_instances=1,
|
||||||
|
misfire_grace_time=120,
|
||||||
|
)
|
||||||
|
|
||||||
# Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
|
# Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
|
||||||
# days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
|
# days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
|
||||||
# and broken upstream). Airline schedules change on a quarterly cycle,
|
# and broken upstream). Airline schedules change on a quarterly cycle,
|
||||||
@@ -960,16 +993,19 @@ def start_scheduler():
|
|||||||
misfire_grace_time=600,
|
misfire_grace_time=600,
|
||||||
)
|
)
|
||||||
|
|
||||||
# UAP sightings (NUFORC) — daily at 12:00 UTC
|
# UAP sightings (NUFORC) — weekly on Mondays at 12:00 UTC. The layer is a
|
||||||
|
# rolling last-60-days digest; refreshing once a week is enough cadence
|
||||||
|
# for human-readable map exploration and keeps load on nuforc.org light.
|
||||||
_scheduler.add_job(
|
_scheduler.add_job(
|
||||||
lambda: _run_task_with_health(
|
lambda: _run_task_with_health(
|
||||||
lambda: fetch_uap_sightings(force_refresh=True),
|
lambda: fetch_uap_sightings(force_refresh=True),
|
||||||
"fetch_uap_sightings",
|
"fetch_uap_sightings",
|
||||||
),
|
),
|
||||||
"cron",
|
"cron",
|
||||||
|
day_of_week="mon",
|
||||||
hour=12,
|
hour=12,
|
||||||
minute=0,
|
minute=0,
|
||||||
id="uap_sightings_daily",
|
id="uap_sightings_weekly",
|
||||||
max_instances=1,
|
max_instances=1,
|
||||||
misfire_grace_time=3600,
|
misfire_grace_time=3600,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,8 +16,15 @@ from typing import Any
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _feed_ingester_user_agent() -> str:
|
||||||
|
# Round 7a: per-install attribution for operator-curated feed URLs.
|
||||||
|
return outbound_user_agent("feed-ingester")
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# State
|
# State
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -157,7 +164,7 @@ def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
|||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
feed_url,
|
feed_url,
|
||||||
timeout=_FETCH_TIMEOUT,
|
timeout=_FETCH_TIMEOUT,
|
||||||
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
headers={"User-Agent": _feed_ingester_user_agent()},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|||||||
@@ -21,6 +21,13 @@ from typing import Any
|
|||||||
import defusedxml.ElementTree as ET
|
import defusedxml.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _aircraft_db_user_agent() -> str:
|
||||||
|
"""Round 7a: lazy import so the per-install operator handle is included."""
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("aircraft-database")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_BUCKET_LIST_URL = (
|
_BUCKET_LIST_URL = (
|
||||||
@@ -44,7 +51,7 @@ def _latest_snapshot_key() -> str:
|
|||||||
response = requests.get(
|
response = requests.get(
|
||||||
_BUCKET_LIST_URL,
|
_BUCKET_LIST_URL,
|
||||||
timeout=_LIST_TIMEOUT_S,
|
timeout=_LIST_TIMEOUT_S,
|
||||||
headers={"User-Agent": _USER_AGENT},
|
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
root = ET.fromstring(response.text)
|
root = ET.fromstring(response.text)
|
||||||
@@ -71,7 +78,7 @@ def _stream_csv_index(url: str) -> dict[str, dict[str, str]]:
|
|||||||
url,
|
url,
|
||||||
timeout=_DOWNLOAD_TIMEOUT_S,
|
timeout=_DOWNLOAD_TIMEOUT_S,
|
||||||
stream=True,
|
stream=True,
|
||||||
headers={"User-Agent": _USER_AGENT},
|
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||||
) as response:
|
) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
line_iter = (
|
line_iter = (
|
||||||
|
|||||||
@@ -0,0 +1,290 @@
|
|||||||
|
"""AISHub REST fallback for ship tracking when AISStream is unreachable.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
On 2026-05-23 ``stream.aisstream.io`` (the primary live AIS WebSocket feed)
|
||||||
|
went fully offline. Backend's only ship signal vanished. This module polls
|
||||||
|
``data.aishub.net``'s free REST API on a slow cadence (default 20 min) when
|
||||||
|
the WebSocket primary is disconnected, so the ships layer doesn't go fully
|
||||||
|
dark during upstream outages.
|
||||||
|
|
||||||
|
Why 20 minutes
|
||||||
|
--------------
|
||||||
|
AISHub's free tier is rate-limited and explicitly asks consumers to be
|
||||||
|
courteous. 20 minutes is well inside their limits, gives ships time to
|
||||||
|
move enough to look "alive" on the map, and won't drain their service.
|
||||||
|
Configurable via the ``AISHUB_POLL_INTERVAL_MINUTES`` env var (clamped to
|
||||||
|
[1, 360]).
|
||||||
|
|
||||||
|
Why slow vs primary
|
||||||
|
-------------------
|
||||||
|
This is degraded mode, not a replacement. A ship at 20 knots moves about
|
||||||
|
6 nautical miles in 20 minutes — visible on the map but coarser than the
|
||||||
|
real-time WebSocket signal. When AISStream comes back online, the
|
||||||
|
WebSocket data will overwrite these records via the same ``_vessels``
|
||||||
|
dict and ``source`` will flip from ``"aishub"`` back to upstream-live.
|
||||||
|
|
||||||
|
Opt-in
|
||||||
|
------
|
||||||
|
Operator must set ``AISHUB_USERNAME`` (free registration at
|
||||||
|
https://www.aishub.net/api). If unset, this fetcher is a no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from services.network_utils import fetch_with_curl
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
AISHUB_URL = "https://data.aishub.net/ws.php"
|
||||||
|
|
||||||
|
|
||||||
|
def aishub_username() -> str:
|
||||||
|
return str(os.environ.get("AISHUB_USERNAME", "")).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def aishub_fallback_enabled() -> bool:
|
||||||
|
"""Returns True only when the operator has registered with AISHub and
|
||||||
|
set ``AISHUB_USERNAME``. The presence of the username is the opt-in."""
|
||||||
|
return bool(aishub_username())
|
||||||
|
|
||||||
|
|
||||||
|
def aishub_poll_interval_minutes() -> int:
|
||||||
|
"""Default 20 minutes. Clamped to [1, 360] so a hostile or
|
||||||
|
misconfigured env var can't either hammer the upstream or silence the
|
||||||
|
fallback for a day."""
|
||||||
|
raw = os.environ.get("AISHUB_POLL_INTERVAL_MINUTES", "20")
|
||||||
|
try:
|
||||||
|
value = int(str(raw).strip())
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
value = 20
|
||||||
|
return max(1, min(360, value))
|
||||||
|
|
||||||
|
|
||||||
|
def _should_run_fallback() -> bool:
|
||||||
|
"""Only run when the primary WebSocket is disconnected. Avoids stomping
|
||||||
|
over fresher live data when AISStream is healthy.
|
||||||
|
|
||||||
|
Returns False if:
|
||||||
|
* AISHub isn't configured (no username)
|
||||||
|
* AISStream primary is currently connected (recent vessel messages)
|
||||||
|
|
||||||
|
Returns True only when AIS is configured-but-down. The
|
||||||
|
``proxy_spawn_count > 0`` guard means "the primary has at least tried
|
||||||
|
to run" — if the user set AISHUB_USERNAME but not AIS_API_KEY at all,
|
||||||
|
AISHub will still serve as a primary on its own slow cadence.
|
||||||
|
"""
|
||||||
|
if not aishub_fallback_enabled():
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
from services.ais_stream import ais_proxy_status
|
||||||
|
status = ais_proxy_status() or {}
|
||||||
|
except Exception:
|
||||||
|
return True # ais_stream not importable? still try AISHub.
|
||||||
|
# If the WebSocket primary is connected, skip the fallback — fresher
|
||||||
|
# data is already flowing.
|
||||||
|
if status.get("connected") is True:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_aishub_response(payload: str) -> list[dict]:
|
||||||
|
"""Parse the AISHub JSON response into a list of vessel records.
|
||||||
|
|
||||||
|
Successful response shape::
|
||||||
|
|
||||||
|
[
|
||||||
|
{"ERROR": false, "USERNAME": "...", "FORMAT": "1", "RECORDS": N},
|
||||||
|
[{"MMSI": ..., "LATITUDE": ..., "LONGITUDE": ..., ...}, ...]
|
||||||
|
]
|
||||||
|
|
||||||
|
Error response shape::
|
||||||
|
|
||||||
|
[{"ERROR": true, "ERROR_MESSAGE": "..."}]
|
||||||
|
|
||||||
|
Empty payload (e.g. silent rate-limit drop) returns ``[]``.
|
||||||
|
"""
|
||||||
|
if not payload or not payload.strip():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
data = json.loads(payload)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning("AISHub: response is not JSON: %s", e)
|
||||||
|
return []
|
||||||
|
if not isinstance(data, list) or not data:
|
||||||
|
return []
|
||||||
|
header = data[0] if isinstance(data[0], dict) else {}
|
||||||
|
if header.get("ERROR") is True:
|
||||||
|
logger.warning(
|
||||||
|
"AISHub: upstream error: %s",
|
||||||
|
header.get("ERROR_MESSAGE", "<unspecified>"),
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
if len(data) < 2 or not isinstance(data[1], list):
|
||||||
|
return []
|
||||||
|
return [row for row in data[1] if isinstance(row, dict)]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_record(row: dict) -> dict | None:
|
||||||
|
"""Map an AISHub vessel record to our internal vessel schema.
|
||||||
|
|
||||||
|
Returns None when the record can't be used (no MMSI, bad position,
|
||||||
|
sentinel "not available" lat/lng).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
mmsi = int(row.get("MMSI") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
if not mmsi:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
lat = float(row.get("LATITUDE"))
|
||||||
|
lng = float(row.get("LONGITUDE"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
# AIS uses 91/181 as "no position available" sentinels.
|
||||||
|
if abs(lat) > 90 or abs(lng) > 180:
|
||||||
|
return None
|
||||||
|
if lat == 91.0 or lng == 181.0:
|
||||||
|
return None
|
||||||
|
# SOG raw 102.3 is "speed not available"; sanitize to 0.
|
||||||
|
try:
|
||||||
|
sog_raw = float(row.get("SOG") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
sog_raw = 0.0
|
||||||
|
sog = 0.0 if sog_raw >= 102.2 else sog_raw
|
||||||
|
try:
|
||||||
|
cog = float(row.get("COG") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
cog = 0.0
|
||||||
|
try:
|
||||||
|
heading_raw = int(row.get("HEADING") or 511)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
heading_raw = 511
|
||||||
|
# AIS heading sentinel 511 = "not available" — fall back to COG.
|
||||||
|
heading = heading_raw if heading_raw != 511 else cog
|
||||||
|
try:
|
||||||
|
ais_type = int(row.get("TYPE") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
ais_type = 0
|
||||||
|
return {
|
||||||
|
"mmsi": mmsi,
|
||||||
|
"lat": lat,
|
||||||
|
"lng": lng,
|
||||||
|
"sog": sog,
|
||||||
|
"cog": cog,
|
||||||
|
"heading": heading,
|
||||||
|
"name": str(row.get("NAME") or "").strip() or "UNKNOWN",
|
||||||
|
"callsign": str(row.get("CALLSIGN") or "").strip(),
|
||||||
|
"destination": str(row.get("DEST") or "").strip().replace("@", "") or "",
|
||||||
|
"imo": int(row.get("IMO") or 0),
|
||||||
|
"ais_type_code": ais_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_aishub_vessels() -> int:
|
||||||
|
"""Poll AISHub and merge vessels into the shared ``_vessels`` store.
|
||||||
|
|
||||||
|
Returns the number of vessels updated (0 on skip, error, or no data).
|
||||||
|
Designed to be called by the APScheduler tier — see
|
||||||
|
``data_fetcher.py`` for the 20-minute interval job that wraps this.
|
||||||
|
"""
|
||||||
|
if not _should_run_fallback():
|
||||||
|
logger.debug("AISHub fallback skipped: primary connected or not configured")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
username = aishub_username()
|
||||||
|
url = (
|
||||||
|
f"{AISHUB_URL}?username={username}&format=1&output=json"
|
||||||
|
f"&compress=0"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = fetch_with_curl(url, timeout=30)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("AISHub fetch failed: %s", e)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not response or response.status_code != 200:
|
||||||
|
logger.warning(
|
||||||
|
"AISHub HTTP %s",
|
||||||
|
getattr(response, "status_code", "None"),
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rows = _parse_aishub_response(getattr(response, "text", "") or "")
|
||||||
|
if not rows:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Inline imports to avoid a circular dependency at module load time
|
||||||
|
# (ais_stream imports lots of things and is loaded by main.py).
|
||||||
|
from services.ais_stream import (
|
||||||
|
_vessels,
|
||||||
|
_vessels_lock,
|
||||||
|
_record_vessel_trail_locked,
|
||||||
|
classify_vessel,
|
||||||
|
get_country_from_mmsi,
|
||||||
|
)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
count = 0
|
||||||
|
with _vessels_lock:
|
||||||
|
for row in rows:
|
||||||
|
normalized = _normalize_record(row)
|
||||||
|
if normalized is None:
|
||||||
|
continue
|
||||||
|
mmsi = normalized["mmsi"]
|
||||||
|
vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
|
||||||
|
# Don't overwrite fresher live data: if the WebSocket pushed an
|
||||||
|
# update for this MMSI more recently than now-1s (race during
|
||||||
|
# the brief reconnection window) keep the live one.
|
||||||
|
last = float(vessel.get("_updated") or 0)
|
||||||
|
if last > now - 1:
|
||||||
|
continue
|
||||||
|
vessel.update(
|
||||||
|
{
|
||||||
|
"lat": normalized["lat"],
|
||||||
|
"lng": normalized["lng"],
|
||||||
|
"sog": normalized["sog"],
|
||||||
|
"cog": normalized["cog"],
|
||||||
|
"heading": normalized["heading"],
|
||||||
|
"_updated": now,
|
||||||
|
"source": "aishub",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if normalized["name"] and normalized["name"] != "UNKNOWN":
|
||||||
|
vessel["name"] = normalized["name"]
|
||||||
|
if normalized["callsign"]:
|
||||||
|
vessel["callsign"] = normalized["callsign"]
|
||||||
|
if normalized["destination"]:
|
||||||
|
vessel["destination"] = normalized["destination"]
|
||||||
|
if normalized["imo"]:
|
||||||
|
vessel["imo"] = normalized["imo"]
|
||||||
|
if normalized["ais_type_code"]:
|
||||||
|
vessel["ais_type_code"] = normalized["ais_type_code"]
|
||||||
|
vessel["type"] = classify_vessel(normalized["ais_type_code"], mmsi)
|
||||||
|
if not vessel.get("country"):
|
||||||
|
vessel["country"] = get_country_from_mmsi(mmsi)
|
||||||
|
_record_vessel_trail_locked(
|
||||||
|
mmsi,
|
||||||
|
normalized["lat"],
|
||||||
|
normalized["lng"],
|
||||||
|
normalized["sog"],
|
||||||
|
now,
|
||||||
|
)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
if count:
|
||||||
|
logger.info(
|
||||||
|
"AISHub fallback: merged %d vessels (poll interval %d min)",
|
||||||
|
count,
|
||||||
|
aishub_poll_interval_minutes(),
|
||||||
|
)
|
||||||
|
return count
|
||||||
@@ -15,7 +15,11 @@ import time
|
|||||||
import heapq
|
import heapq
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from services.network_utils import external_curl_fallback_enabled, fetch_with_curl
|
from services.network_utils import (
|
||||||
|
external_curl_fallback_enabled,
|
||||||
|
fetch_with_curl,
|
||||||
|
outbound_user_agent,
|
||||||
|
)
|
||||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||||
from services.fetchers.nuforc_enrichment import enrich_sighting
|
from services.fetchers.nuforc_enrichment import enrich_sighting
|
||||||
from services.fetchers.retry import with_retry
|
from services.fetchers.retry import with_retry
|
||||||
@@ -279,13 +283,13 @@ def fetch_weather_alerts():
|
|||||||
return
|
return
|
||||||
alerts = []
|
alerts = []
|
||||||
try:
|
try:
|
||||||
# weather.gov requires a User-Agent per their API policy, but it
|
# weather.gov requires a User-Agent per their API policy. Round 7a:
|
||||||
# need not identify the operator. Use a project-generic string and
|
# send the per-install operator handle so they can rate-limit per
|
||||||
# let the user override via SHADOWBROKER_USER_AGENT if needed.
|
# operator instead of treating "Shadowbroker" as one entity.
|
||||||
from services.network_utils import DEFAULT_USER_AGENT
|
from services.network_utils import outbound_user_agent
|
||||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": DEFAULT_USER_AGENT,
|
"User-Agent": outbound_user_agent("weather-gov"),
|
||||||
"Accept": "application/geo+json",
|
"Accept": "application/geo+json",
|
||||||
}
|
}
|
||||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||||
@@ -713,7 +717,12 @@ _NUFORC_LIVE_NONCE_RE = re.compile(
|
|||||||
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
||||||
)
|
)
|
||||||
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
||||||
_NUFORC_LIVE_USER_AGENT = "Mozilla/5.0 (ShadowBroker-OSINT NUFORC-fetcher)"
|
# Round 7a: NUFORC's site is sensitive to non-browser UAs but we send a
|
||||||
|
# per-install operator handle prefixed by Mozilla/5.0 so we're identifiable
|
||||||
|
# without being aggregately blocked. Operators who want stricter privacy
|
||||||
|
# can override the entire UA via SHADOWBROKER_USER_AGENT.
|
||||||
|
def _nuforc_live_user_agent() -> str:
|
||||||
|
return f"Mozilla/5.0 ({outbound_user_agent('nuforc-live')})"
|
||||||
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
||||||
|
|
||||||
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
||||||
@@ -957,7 +966,7 @@ def _photon_lookup(query: str) -> list[float] | None:
|
|||||||
res = fetch_with_curl(
|
res = fetch_with_curl(
|
||||||
url,
|
url,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": "ShadowBroker-OSINT/1.0 (NUFORC-UAP-layer)",
|
"User-Agent": outbound_user_agent("nuforc-uap-geocode"),
|
||||||
"Accept-Language": "en",
|
"Accept-Language": "en",
|
||||||
},
|
},
|
||||||
timeout=10,
|
timeout=10,
|
||||||
@@ -1053,7 +1062,7 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
|||||||
index_res = subprocess.run(
|
index_res = subprocess.run(
|
||||||
[
|
[
|
||||||
curl_bin, "-sL",
|
curl_bin, "-sL",
|
||||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
"-A", _nuforc_live_user_agent(),
|
||||||
"-c", str(cookie_jar),
|
"-c", str(cookie_jar),
|
||||||
"-b", str(cookie_jar),
|
"-b", str(cookie_jar),
|
||||||
index_url,
|
index_url,
|
||||||
@@ -1089,7 +1098,7 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
|||||||
ajax_res = subprocess.run(
|
ajax_res = subprocess.run(
|
||||||
[
|
[
|
||||||
curl_bin, "-sL",
|
curl_bin, "-sL",
|
||||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
"-A", _nuforc_live_user_agent(),
|
||||||
"-c", str(cookie_jar),
|
"-c", str(cookie_jar),
|
||||||
"-b", str(cookie_jar),
|
"-b", str(cookie_jar),
|
||||||
"-X", "POST",
|
"-X", "POST",
|
||||||
@@ -1374,10 +1383,21 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
|||||||
This is a resilience fallback for local/Windows runs where nuforc.org is
|
This is a resilience fallback for local/Windows runs where nuforc.org is
|
||||||
Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
|
Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
|
||||||
as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
|
as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
|
||||||
|
|
||||||
|
Date-cutoff guard: the kcimc/NUFORC HF dataset is a static snapshot whose
|
||||||
|
maintainer refreshes it sporadically. Without a cutoff, sorting by
|
||||||
|
occurred-desc and taking the top N rows returns whatever the mirror's
|
||||||
|
newest rows happen to be — which can be years old if the snapshot is
|
||||||
|
stale. We apply the same ``_NUFORC_RECENT_DAYS`` window the live path
|
||||||
|
uses (60 days). If the HF mirror has nothing inside the window we return
|
||||||
|
``[]`` rather than silently serving 3-year-old "newest" rows.
|
||||||
"""
|
"""
|
||||||
from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
|
from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
|
||||||
from services.geocode_validate import coord_in_country
|
from services.geocode_validate import coord_in_country
|
||||||
|
|
||||||
|
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||||
|
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
|
response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
|
||||||
if not response or response.status_code != 200:
|
if not response or response.status_code != 200:
|
||||||
@@ -1391,6 +1411,7 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
candidates: list[dict] = []
|
candidates: list[dict] = []
|
||||||
|
stale_rows_dropped = 0
|
||||||
try:
|
try:
|
||||||
reader = csv.DictReader(io.StringIO(response.text))
|
reader = csv.DictReader(io.StringIO(response.text))
|
||||||
for row in reader:
|
for row in reader:
|
||||||
@@ -1401,6 +1422,9 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
|||||||
)
|
)
|
||||||
if not occurred:
|
if not occurred:
|
||||||
continue
|
continue
|
||||||
|
if occurred < cutoff_str:
|
||||||
|
stale_rows_dropped += 1
|
||||||
|
continue
|
||||||
raw_location = _normalize_uap_location(
|
raw_location = _normalize_uap_location(
|
||||||
row.get("Location", "")
|
row.get("Location", "")
|
||||||
or row.get("City", "")
|
or row.get("City", "")
|
||||||
@@ -1435,6 +1459,19 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
|||||||
logger.warning("UAP sightings: HF fallback parse failed: %s", e)
|
logger.warning("UAP sightings: HF fallback parse failed: %s", e)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
# HF mirror returned rows, but none inside the rolling window. This is
|
||||||
|
# the smoking gun for "the public HF dataset hasn't been refreshed in
|
||||||
|
# years" — log loudly so the operator sees it instead of guessing.
|
||||||
|
logger.error(
|
||||||
|
"UAP sightings: HF fallback yielded 0 rows within last %d days "
|
||||||
|
"(dropped %d stale rows). HF mirror is likely stale; the layer "
|
||||||
|
"will be empty until the live NUFORC path recovers.",
|
||||||
|
_NUFORC_RECENT_DAYS,
|
||||||
|
stale_rows_dropped,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
|
candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
|
||||||
candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]
|
candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]
|
||||||
|
|
||||||
@@ -1506,13 +1543,29 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
|||||||
|
|
||||||
sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
|
sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
|
||||||
if sightings is None:
|
if sightings is None:
|
||||||
|
live_error: Exception | None = None
|
||||||
try:
|
try:
|
||||||
sightings = _build_recent_uap_sightings()
|
sightings = _build_recent_uap_sightings()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
live_error = e
|
||||||
logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
|
logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
|
||||||
sightings = _build_uap_sightings_from_hf_mirror()
|
sightings = _build_uap_sightings_from_hf_mirror()
|
||||||
if sightings:
|
if sightings:
|
||||||
_save_nuforc_sightings_cache(sightings)
|
_save_nuforc_sightings_cache(sightings)
|
||||||
|
elif live_error is not None:
|
||||||
|
# Both paths failed: live raised AND HF fallback returned empty
|
||||||
|
# (either the HF mirror is stale beyond the cutoff or the network
|
||||||
|
# is gone entirely). The previous code silently set the layer to
|
||||||
|
# ``[]`` and kept marking it fresh; that masked the failure for
|
||||||
|
# days. Surface it via assert_canary so the health registry shows
|
||||||
|
# the layer as broken instead of "fresh and empty".
|
||||||
|
from services.slo import assert_canary
|
||||||
|
assert_canary("uap_sightings", 0)
|
||||||
|
logger.error(
|
||||||
|
"UAP sightings: both live NUFORC and HF fallback produced 0 "
|
||||||
|
"rows; layer is unavailable. Live error: %s",
|
||||||
|
live_error,
|
||||||
|
)
|
||||||
|
|
||||||
with _data_lock:
|
with _data_lock:
|
||||||
latest_data["uap_sightings"] = sightings or []
|
latest_data["uap_sightings"] = sightings or []
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
"""Per-aircraft observation tracking for cumulative fuel/CO2 estimates.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
The pre-existing emissions enrichment attached a *rate* to each flight
|
||||||
|
(GPH and kg/hr) based on aircraft model. Users — reasonably — wanted the
|
||||||
|
running total: how much fuel HAS this plane burned since we started
|
||||||
|
seeing it? Multiplying the rate by elapsed observation time gets us
|
||||||
|
there, but it requires somewhere to remember "when did this icao24
|
||||||
|
first appear on our radar?"
|
||||||
|
|
||||||
|
Why this lives outside ``flight_trails``
|
||||||
|
----------------------------------------
|
||||||
|
``flight_trails`` is sized and pruned aggressively for map rendering
|
||||||
|
(5-minute TTL for untracked aircraft, 200 trail points max). That's
|
||||||
|
wrong for cumulative burn: if a plane has been airborne 2 hours but
|
||||||
|
its trail was pruned 30 min in, the "first trail point" timestamp is
|
||||||
|
30 min ago, not 2h ago. Worse, when the trail expires and re-creates,
|
||||||
|
the cumulative counter would reset mid-flight.
|
||||||
|
|
||||||
|
This module tracks observation lifecycle separately:
|
||||||
|
|
||||||
|
* When a hex is first observed: start a new flight session.
|
||||||
|
* While observed regularly (gap < ``REOPEN_GAP_S``): keep accumulating.
|
||||||
|
* When unseen for longer than ``REOPEN_GAP_S``: treat next sighting as
|
||||||
|
a new session (the plane landed and took off again, or it's a
|
||||||
|
different leg). Reset ``first_seen_at``.
|
||||||
|
* Stale sessions are pruned every ``PRUNE_INTERVAL_S`` so memory stays
|
||||||
|
bounded.
|
||||||
|
|
||||||
|
The user explicitly asked for this counting semantic: "as soon as a
|
||||||
|
plane appears there should be a counter that keeps a running count of
|
||||||
|
the fuel being burned... If there is no estimate take off time then it
|
||||||
|
can just be from the time the server starts to keep a log of whats in
|
||||||
|
the air."
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
# Gap between sightings that resets the session. ADS-B refreshes the
|
||||||
|
# whole aircraft list every minute or two, so anything over a few
|
||||||
|
# minutes means the plane left our coverage window (landed, transit
|
||||||
|
# through dead zone, etc). 15 minutes is conservative.
|
||||||
|
REOPEN_GAP_S = 15 * 60
|
||||||
|
|
||||||
|
# Don't accumulate runaway memory: drop entries unseen for an hour.
|
||||||
|
PRUNE_AFTER_S = 60 * 60
|
||||||
|
|
||||||
|
# Cap on accumulated airtime per session so a single bug elsewhere
|
||||||
|
# (e.g. ts clock skew) can't produce comically large numbers.
|
||||||
|
MAX_SESSION_SECONDS = 24 * 3600 # 24h — longest realistic civilian leg
|
||||||
|
|
||||||
|
|
||||||
|
_observations: dict[str, dict[str, float]] = {}
|
||||||
|
_lock = threading.Lock()
|
||||||
|
_last_prune_at = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def record_observation(icao_hex: str, *, now: float | None = None) -> int:
|
||||||
|
"""Record a sighting of ``icao_hex`` and return airtime so far (seconds).
|
||||||
|
|
||||||
|
Returns 0 for the first-ever sighting (no elapsed time yet) or when
|
||||||
|
``icao_hex`` is falsy. The caller can multiply the returned seconds
|
||||||
|
by ``rate_per_hour / 3600`` to get cumulative consumption.
|
||||||
|
"""
|
||||||
|
if not icao_hex:
|
||||||
|
return 0
|
||||||
|
key = str(icao_hex).strip().lower()
|
||||||
|
if not key:
|
||||||
|
return 0
|
||||||
|
current = float(now if now is not None else time.time())
|
||||||
|
|
||||||
|
with _lock:
|
||||||
|
entry = _observations.get(key)
|
||||||
|
if entry is None:
|
||||||
|
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||||
|
return 0
|
||||||
|
# Use explicit ``is None`` checks instead of ``or`` short-circuit:
|
||||||
|
# ``0.0`` is a legitimate timestamp value (e.g. test fixtures
|
||||||
|
# seeding a far-past first_seen_at to exercise the clamp) but
|
||||||
|
# ``0.0 or fallback`` collapses to ``fallback`` because 0.0 is
|
||||||
|
# falsy. Bit me on my own test — leaving the safer form here.
|
||||||
|
last_raw = entry.get("last_seen_at")
|
||||||
|
last_seen = float(last_raw) if last_raw is not None else current
|
||||||
|
gap = current - last_seen
|
||||||
|
if gap > REOPEN_GAP_S:
|
||||||
|
# Treat as a new flight session — the plane landed/disappeared
|
||||||
|
# long enough that the prior cumulative count is no longer
|
||||||
|
# the same flight.
|
||||||
|
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||||
|
return 0
|
||||||
|
first_raw = entry.get("first_seen_at")
|
||||||
|
first = float(first_raw) if first_raw is not None else current
|
||||||
|
# Clamp absurd values from clock skew or bad input.
|
||||||
|
elapsed = max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||||
|
entry["last_seen_at"] = current
|
||||||
|
return elapsed
|
||||||
|
|
||||||
|
|
||||||
|
def prune(*, now: float | None = None) -> int:
|
||||||
|
"""Drop entries we haven't seen in ``PRUNE_AFTER_S`` seconds.
|
||||||
|
|
||||||
|
Returns number of entries dropped. Safe to call from a scheduler tick;
|
||||||
|
cheap (single dict scan) so cadence doesn't matter much.
|
||||||
|
"""
|
||||||
|
current = float(now if now is not None else time.time())
|
||||||
|
dropped = 0
|
||||||
|
with _lock:
|
||||||
|
stale_keys = []
|
||||||
|
for k, v in _observations.items():
|
||||||
|
last_raw = v.get("last_seen_at")
|
||||||
|
last = float(last_raw) if last_raw is not None else 0.0
|
||||||
|
if current - last > PRUNE_AFTER_S:
|
||||||
|
stale_keys.append(k)
|
||||||
|
for k in stale_keys:
|
||||||
|
del _observations[k]
|
||||||
|
dropped += 1
|
||||||
|
return dropped
|
||||||
|
|
||||||
|
|
||||||
|
def get_session_seconds(icao_hex: str, *, now: float | None = None) -> int:
|
||||||
|
"""Read-only accessor: airtime for a known icao without bumping last-seen.
|
||||||
|
|
||||||
|
Used by tests and external consumers (e.g. when rendering a snapshot
|
||||||
|
of all in-flight aircraft, you want the current value, not to update
|
||||||
|
last_seen_at as a side effect).
|
||||||
|
"""
|
||||||
|
if not icao_hex:
|
||||||
|
return 0
|
||||||
|
key = str(icao_hex).strip().lower()
|
||||||
|
with _lock:
|
||||||
|
entry = _observations.get(key)
|
||||||
|
if entry is None:
|
||||||
|
return 0
|
||||||
|
current = float(now if now is not None else time.time())
|
||||||
|
first_raw = entry.get("first_seen_at")
|
||||||
|
first = float(first_raw) if first_raw is not None else current
|
||||||
|
return max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_for_tests() -> None:
|
||||||
|
"""Drop all observations. Test helper only."""
|
||||||
|
with _lock:
|
||||||
|
_observations.clear()
|
||||||
@@ -17,6 +17,7 @@ from services.network_utils import fetch_with_curl
|
|||||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||||
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
|
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
|
||||||
from services.fetchers.emissions import get_emissions_info
|
from services.fetchers.emissions import get_emissions_info
|
||||||
|
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||||
from services.fetchers.retry import with_retry
|
from services.fetchers.retry import with_retry
|
||||||
from services.fetchers.route_database import lookup_route
|
from services.fetchers.route_database import lookup_route
|
||||||
from services.fetchers.aircraft_database import lookup_aircraft_type
|
from services.fetchers.aircraft_database import lookup_aircraft_type
|
||||||
@@ -29,6 +30,88 @@ _RE_AIRLINE_CODE_1 = re.compile(r"^([A-Z]{3})\d")
|
|||||||
_RE_AIRLINE_CODE_2 = re.compile(r"^([A-Z]{3})[A-Z\d]")
|
_RE_AIRLINE_CODE_2 = re.compile(r"^([A-Z]{3})[A-Z\d]")
|
||||||
|
|
||||||
|
|
||||||
|
def detect_gps_jamming_zones(
|
||||||
|
raw_flights: list[dict],
|
||||||
|
*,
|
||||||
|
min_aircraft: int | None = None,
|
||||||
|
min_ratio: float | None = None,
|
||||||
|
nacp_threshold: int | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Detect GPS interference zones from a snapshot of raw ADS-B aircraft.
|
||||||
|
|
||||||
|
Methodology mirrors GPSJam.org / Flightradar24: bin aircraft into 1°x1°
|
||||||
|
grid cells, flag cells where the fraction of aircraft reporting degraded
|
||||||
|
NACp clears a threshold.
|
||||||
|
|
||||||
|
Inputs
|
||||||
|
------
|
||||||
|
raw_flights:
|
||||||
|
Iterable of dicts. Each item is expected to carry ``lat``, ``lng``
|
||||||
|
(or ``lon``), and ``nac_p``. Records missing position OR missing
|
||||||
|
``nac_p`` entirely (typical for OpenSky-sourced flights) are
|
||||||
|
skipped — absence-of-data isn't evidence of anything.
|
||||||
|
|
||||||
|
nac_p == 0 IS counted as degraded. Pre-fix code skipped it on the theory
|
||||||
|
that "0 = old transponder, never computed accuracy." That's only half
|
||||||
|
right: modern Mode-S Enhanced Surveillance transponders also fall back
|
||||||
|
to nac_p=0 when they lose GPS lock entirely — which is exactly the
|
||||||
|
jamming signature we're trying to detect. Filtering 0 out was discarding
|
||||||
|
the strongest evidence.
|
||||||
|
|
||||||
|
Denoising:
|
||||||
|
1. Require ``min_aircraft`` per grid cell for statistical validity.
|
||||||
|
2. Subtract 1 from degraded count per cell (GPSJam's technique) so
|
||||||
|
a single quirky transponder can't flag an entire zone.
|
||||||
|
3. Require ratio ``adjusted_degraded / total > min_ratio``.
|
||||||
|
|
||||||
|
All thresholds default to the module-level constants but can be
|
||||||
|
overridden for testing.
|
||||||
|
"""
|
||||||
|
min_aircraft = GPS_JAMMING_MIN_AIRCRAFT if min_aircraft is None else int(min_aircraft)
|
||||||
|
min_ratio = GPS_JAMMING_MIN_RATIO if min_ratio is None else float(min_ratio)
|
||||||
|
nacp_threshold = (
|
||||||
|
GPS_JAMMING_NACP_THRESHOLD if nacp_threshold is None else int(nacp_threshold)
|
||||||
|
)
|
||||||
|
|
||||||
|
jamming_grid: dict[str, dict[str, int]] = {}
|
||||||
|
for rf in raw_flights or []:
|
||||||
|
rlat = rf.get("lat")
|
||||||
|
rlng = rf.get("lng") if rf.get("lng") is not None else rf.get("lon")
|
||||||
|
if rlat is None or rlng is None:
|
||||||
|
continue
|
||||||
|
nacp = rf.get("nac_p")
|
||||||
|
if nacp is None:
|
||||||
|
continue
|
||||||
|
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||||
|
cell = jamming_grid.setdefault(grid_key, {"degraded": 0, "total": 0})
|
||||||
|
cell["total"] += 1
|
||||||
|
if nacp < nacp_threshold:
|
||||||
|
cell["degraded"] += 1
|
||||||
|
|
||||||
|
jamming_zones: list[dict] = []
|
||||||
|
for gk, counts in jamming_grid.items():
|
||||||
|
if counts["total"] < min_aircraft:
|
||||||
|
continue
|
||||||
|
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||||
|
if adjusted_degraded == 0:
|
||||||
|
continue
|
||||||
|
ratio = adjusted_degraded / counts["total"]
|
||||||
|
if ratio > min_ratio:
|
||||||
|
lat_i, lng_i = gk.split(",")
|
||||||
|
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||||
|
jamming_zones.append(
|
||||||
|
{
|
||||||
|
"lat": int(lat_i) + 0.5,
|
||||||
|
"lng": int(lng_i) + 0.5,
|
||||||
|
"severity": severity,
|
||||||
|
"ratio": round(ratio, 2),
|
||||||
|
"degraded": counts["degraded"],
|
||||||
|
"total": counts["total"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return jamming_zones
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# OpenSky Network API Client (OAuth2)
|
# OpenSky Network API Client (OAuth2)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -459,6 +542,18 @@ def _classify_and_publish(all_adsb_flights):
|
|||||||
|
|
||||||
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
||||||
|
|
||||||
|
# Source attribution: prefer the explicit ``source`` tag stamped
|
||||||
|
# at fetch time (adsb.lol, OpenSky). If absent, fall back to the
|
||||||
|
# legacy ``supplemental_source`` (airplanes.live, adsb.fi) so
|
||||||
|
# supplementals are still attributed without changing their
|
||||||
|
# tagger. Final fallback "adsb.lol" preserves prior behavior for
|
||||||
|
# any caller that synthesizes records without going through one
|
||||||
|
# of our fetchers (e.g. tests).
|
||||||
|
source = (
|
||||||
|
f.get("source")
|
||||||
|
or f.get("supplemental_source")
|
||||||
|
or "adsb.lol"
|
||||||
|
)
|
||||||
flights.append(
|
flights.append(
|
||||||
{
|
{
|
||||||
"callsign": flight_str,
|
"callsign": flight_str,
|
||||||
@@ -480,6 +575,7 @@ def _classify_and_publish(all_adsb_flights):
|
|||||||
"airline_code": airline_code,
|
"airline_code": airline_code,
|
||||||
"aircraft_category": ac_category,
|
"aircraft_category": ac_category,
|
||||||
"nac_p": f.get("nac_p"),
|
"nac_p": f.get("nac_p"),
|
||||||
|
"source": source,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
||||||
@@ -506,6 +602,22 @@ def _classify_and_publish(all_adsb_flights):
|
|||||||
if model:
|
if model:
|
||||||
emi = get_emissions_info(model)
|
emi = get_emissions_info(model)
|
||||||
if emi:
|
if emi:
|
||||||
|
# Cumulative fuel/CO2: multiply the per-hour rate by how
|
||||||
|
# long we've been observing this airframe. Users want to
|
||||||
|
# see the *amount* burned, not just the rate. If we've
|
||||||
|
# never seen this hex before, observed_seconds is 0 and
|
||||||
|
# the cumulative values are 0 until the next refresh —
|
||||||
|
# the rate is still useful info on its own.
|
||||||
|
observed_seconds = _record_flight_observation(
|
||||||
|
f.get("icao24") or ""
|
||||||
|
)
|
||||||
|
elapsed_h = observed_seconds / 3600.0
|
||||||
|
emi = {
|
||||||
|
**emi,
|
||||||
|
"observed_seconds": observed_seconds,
|
||||||
|
"fuel_gallons_burned": round(emi["fuel_gph"] * elapsed_h, 1),
|
||||||
|
"co2_kg_emitted": round(emi["co2_kg_per_hour"] * elapsed_h, 1),
|
||||||
|
}
|
||||||
f["emissions"] = emi
|
f["emissions"] = emi
|
||||||
|
|
||||||
callsign = f.get("callsign", "").strip().upper()
|
callsign = f.get("callsign", "").strip().upper()
|
||||||
@@ -724,56 +836,8 @@ def _classify_and_publish(all_adsb_flights):
|
|||||||
latest_data["military_flights"] = military_snapshot
|
latest_data["military_flights"] = military_snapshot
|
||||||
|
|
||||||
# --- GPS Jamming Detection ---
|
# --- GPS Jamming Detection ---
|
||||||
# Uses NACp (Navigation Accuracy Category – Position) from ADS-B to infer
|
|
||||||
# GPS interference zones, similar to GPSJam.org / Flightradar24.
|
|
||||||
# NACp < 8 = position accuracy worse than the FAA-mandated 0.05 NM.
|
|
||||||
#
|
|
||||||
# Denoising (to suppress false positives from old GA transponders):
|
|
||||||
# 1. Skip nac_p == 0 ("unknown accuracy") — old transponders that never
|
|
||||||
# computed accuracy, NOT evidence of jamming. Real jamming shows 1-7.
|
|
||||||
# 2. Require minimum aircraft per grid cell for statistical validity.
|
|
||||||
# 3. Subtract 1 from degraded count per cell (GPSJam's technique) so a
|
|
||||||
# single quirky transponder can't flag an entire zone.
|
|
||||||
# 4. Require the adjusted ratio to exceed the threshold.
|
|
||||||
try:
|
try:
|
||||||
jamming_grid = {}
|
jamming_zones = detect_gps_jamming_zones(raw_flights_snapshot)
|
||||||
raw_flights = raw_flights_snapshot
|
|
||||||
for rf in raw_flights:
|
|
||||||
rlat = rf.get("lat")
|
|
||||||
rlng = rf.get("lng") or rf.get("lon")
|
|
||||||
if rlat is None or rlng is None:
|
|
||||||
continue
|
|
||||||
nacp = rf.get("nac_p")
|
|
||||||
if nacp is None or nacp == 0:
|
|
||||||
continue
|
|
||||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
|
||||||
if grid_key not in jamming_grid:
|
|
||||||
jamming_grid[grid_key] = {"degraded": 0, "total": 0}
|
|
||||||
jamming_grid[grid_key]["total"] += 1
|
|
||||||
if nacp < GPS_JAMMING_NACP_THRESHOLD:
|
|
||||||
jamming_grid[grid_key]["degraded"] += 1
|
|
||||||
|
|
||||||
jamming_zones = []
|
|
||||||
for gk, counts in jamming_grid.items():
|
|
||||||
if counts["total"] < GPS_JAMMING_MIN_AIRCRAFT:
|
|
||||||
continue
|
|
||||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
|
||||||
if adjusted_degraded == 0:
|
|
||||||
continue
|
|
||||||
ratio = adjusted_degraded / counts["total"]
|
|
||||||
if ratio > GPS_JAMMING_MIN_RATIO:
|
|
||||||
lat_i, lng_i = gk.split(",")
|
|
||||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
|
||||||
jamming_zones.append(
|
|
||||||
{
|
|
||||||
"lat": int(lat_i) + 0.5,
|
|
||||||
"lng": int(lng_i) + 0.5,
|
|
||||||
"severity": severity,
|
|
||||||
"ratio": round(ratio, 2),
|
|
||||||
"degraded": counts["degraded"],
|
|
||||||
"total": counts["total"],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
with _data_lock:
|
with _data_lock:
|
||||||
latest_data["gps_jamming"] = jamming_zones
|
latest_data["gps_jamming"] = jamming_zones
|
||||||
if jamming_zones:
|
if jamming_zones:
|
||||||
@@ -849,7 +913,15 @@ def _fetch_adsb_lol_regions():
|
|||||||
res = fetch_with_curl(url, timeout=10)
|
res = fetch_with_curl(url, timeout=10)
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
data = res.json()
|
data = res.json()
|
||||||
return data.get("ac", [])
|
aircraft = data.get("ac", [])
|
||||||
|
# Stamp the source at the fetch site so attribution survives
|
||||||
|
# the OpenSky/supplemental dedupe-by-hex merge downstream.
|
||||||
|
# Previously adsb.lol records carried no marker while OpenSky
|
||||||
|
# records got ``is_opensky: True`` — which made flight tooltips
|
||||||
|
# look like everything came from OpenSky.
|
||||||
|
for a in aircraft:
|
||||||
|
a["source"] = "adsb.lol"
|
||||||
|
return aircraft
|
||||||
except (
|
except (
|
||||||
requests.RequestException,
|
requests.RequestException,
|
||||||
ConnectionError,
|
ConnectionError,
|
||||||
@@ -932,6 +1004,7 @@ def _enrich_with_opensky_and_supplemental(adsb_flights):
|
|||||||
"gs": (s[9] * 1.94384) if s[9] else 0,
|
"gs": (s[9] * 1.94384) if s[9] else 0,
|
||||||
"t": "Unknown",
|
"t": "Unknown",
|
||||||
"is_opensky": True,
|
"is_opensky": True,
|
||||||
|
"source": "OpenSky",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif os_res.status_code == 429:
|
elif os_res.status_code == 429:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import heapq
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||||
from services.fetchers.retry import with_retry
|
from services.fetchers.retry import with_retry
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ def _geocode_region(region_name: str, country_name: str) -> tuple:
|
|||||||
|
|
||||||
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
||||||
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
||||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": outbound_user_agent("infrastructure-data")})
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
results = response.json()
|
results = response.json()
|
||||||
if results:
|
if results:
|
||||||
|
|||||||
@@ -191,8 +191,13 @@ def fetch_meshtastic_nodes():
|
|||||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||||
|
|
||||||
from services.network_utils import DEFAULT_USER_AGENT
|
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||||
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
|
# The optional Meshtastic callsign is appended as additional context so
|
||||||
|
# meshtastic.liamcottle.net's operator can identify both the install AND
|
||||||
|
# the registered radio operator (when MESHTASTIC_OPERATOR_CALLSIGN is set
|
||||||
|
# and MESHTASTIC_SEND_CALLSIGN_HEADER is true; see issue #203).
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
ua_base = f"{outbound_user_agent('meshtastic-map')}; 24h polling"
|
||||||
if callsign and send_callsign_header:
|
if callsign and send_callsign_header:
|
||||||
user_agent = f"{ua_base}; node={callsign}"
|
user_agent = f"{ua_base}; node={callsign}"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import requests
|
|||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl
|
||||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||||
from services.fetchers.emissions import get_emissions_info
|
from services.fetchers.emissions import get_emissions_info
|
||||||
|
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||||
from services.fetchers.plane_alert import enrich_with_plane_alert
|
from services.fetchers.plane_alert import enrich_with_plane_alert
|
||||||
|
|
||||||
logger = logging.getLogger("services.data_fetcher")
|
logger = logging.getLogger("services.data_fetcher")
|
||||||
@@ -171,6 +172,7 @@ def fetch_military_flights():
|
|||||||
h = a.get("hex", "").lower()
|
h = a.get("hex", "").lower()
|
||||||
if h and h not in seen_hex:
|
if h and h not in seen_hex:
|
||||||
seen_hex.add(h)
|
seen_hex.add(h)
|
||||||
|
a["source"] = "adsb.lol"
|
||||||
all_mil_ac.append(a)
|
all_mil_ac.append(a)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
||||||
@@ -182,6 +184,7 @@ def fetch_military_flights():
|
|||||||
h = a.get("hex", "").lower()
|
h = a.get("hex", "").lower()
|
||||||
if h and h not in seen_hex:
|
if h and h not in seen_hex:
|
||||||
seen_hex.add(h)
|
seen_hex.add(h)
|
||||||
|
a["source"] = "airplanes.live"
|
||||||
all_mil_ac.append(a)
|
all_mil_ac.append(a)
|
||||||
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -234,6 +237,7 @@ def fetch_military_flights():
|
|||||||
"registration": f.get("r", "N/A"),
|
"registration": f.get("r", "N/A"),
|
||||||
"icao24": icao_hex,
|
"icao24": icao_hex,
|
||||||
"squawk": f.get("squawk", ""),
|
"squawk": f.get("squawk", ""),
|
||||||
|
"source": f.get("source") or "adsb.lol",
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -258,7 +262,8 @@ def fetch_military_flights():
|
|||||||
"model": f.get("t", "Unknown"),
|
"model": f.get("t", "Unknown"),
|
||||||
"icao24": icao_hex,
|
"icao24": icao_hex,
|
||||||
"speed_knots": speed_knots,
|
"speed_knots": speed_knots,
|
||||||
"squawk": f.get("squawk", "")
|
"squawk": f.get("squawk", ""),
|
||||||
|
"source": f.get("source") or "adsb.lol",
|
||||||
})
|
})
|
||||||
except Exception as loop_e:
|
except Exception as loop_e:
|
||||||
logger.error(f"Mil flight interpolation error: {loop_e}")
|
logger.error(f"Mil flight interpolation error: {loop_e}")
|
||||||
@@ -296,6 +301,18 @@ def fetch_military_flights():
|
|||||||
if model:
|
if model:
|
||||||
emissions = get_emissions_info(model)
|
emissions = get_emissions_info(model)
|
||||||
if emissions:
|
if emissions:
|
||||||
|
# Cumulative fuel/CO2 since first observation — mirrors
|
||||||
|
# the civilian path in flights._classify_and_publish.
|
||||||
|
observed_seconds = _record_flight_observation(
|
||||||
|
mf.get("icao24") or ""
|
||||||
|
)
|
||||||
|
elapsed_h = observed_seconds / 3600.0
|
||||||
|
emissions = {
|
||||||
|
**emissions,
|
||||||
|
"observed_seconds": observed_seconds,
|
||||||
|
"fuel_gallons_burned": round(emissions["fuel_gph"] * elapsed_h, 1),
|
||||||
|
"co2_kg_emitted": round(emissions["co2_kg_per_hour"] * elapsed_h, 1),
|
||||||
|
}
|
||||||
mf["emissions"] = emissions
|
mf["emissions"] = emissions
|
||||||
if mf.get("alert_category"):
|
if mf.get("alert_category"):
|
||||||
mf["type"] = "tracked_flight"
|
mf["type"] = "tracked_flight"
|
||||||
|
|||||||
@@ -17,6 +17,12 @@ from typing import Any
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _route_db_user_agent() -> str:
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("route-database")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
||||||
@@ -37,7 +43,7 @@ def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
|||||||
response = requests.get(
|
response = requests.get(
|
||||||
url,
|
url,
|
||||||
timeout=_HTTP_TIMEOUT_S,
|
timeout=_HTTP_TIMEOUT_S,
|
||||||
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
headers={"User-Agent": _route_db_user_agent(), "Accept-Encoding": "gzip"},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
text = gzip.decompress(response.content).decode("utf-8-sig")
|
text = gzip.decompress(response.content).decode("utf-8-sig")
|
||||||
|
|||||||
@@ -10,6 +10,12 @@ from datetime import datetime, timezone
|
|||||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _trains_user_agent() -> str:
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("trains")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_EARTH_RADIUS_KM = 6371.0
|
_EARTH_RADIUS_KM = 6371.0
|
||||||
@@ -379,7 +385,7 @@ def _fetch_digitraffic() -> list[dict]:
|
|||||||
timeout=15,
|
timeout=15,
|
||||||
headers={
|
headers={
|
||||||
"Accept-Encoding": "gzip",
|
"Accept-Encoding": "gzip",
|
||||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
"User-Agent": _trains_user_agent(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
|
|||||||
@@ -0,0 +1,457 @@
|
|||||||
|
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||||
|
position publication.
|
||||||
|
|
||||||
|
Why this exists
|
||||||
|
---------------
|
||||||
|
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||||
|
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||||
|
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||||
|
- low-precision (audit issue #245 — false precision from text mentions),
|
||||||
|
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||||
|
certain network paths, including Docker Desktop on some Windows hosts).
|
||||||
|
|
||||||
|
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||||
|
carrier is operating. The article body uses extremely consistent phrasing:
|
||||||
|
|
||||||
|
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||||
|
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||||
|
Yokosuka, Japan."
|
||||||
|
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||||
|
|
||||||
|
Those are deterministic to parse. This module:
|
||||||
|
|
||||||
|
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||||
|
site-wide feed often has fresher posts before the category feed
|
||||||
|
catches up, so we union them.
|
||||||
|
2. Picks the most recent post by parsed ``pubDate``.
|
||||||
|
3. For each carrier in the registry, scans the article body for a
|
||||||
|
"is operating in / is in port in / departed from" pattern near
|
||||||
|
the carrier's name.
|
||||||
|
4. Maps the extracted region phrase to coordinates via the carrier
|
||||||
|
tracker's existing REGION_COORDS.
|
||||||
|
|
||||||
|
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||||
|
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||||
|
with ``position_source_at`` set to the article's actual publication
|
||||||
|
timestamp (not ``now()``).
|
||||||
|
|
||||||
|
Politeness
|
||||||
|
----------
|
||||||
|
We send the per-install operator handle via ``outbound_user_agent``
|
||||||
|
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||||
|
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||||
|
but WordPress RSS feeds are open and serve the full article in
|
||||||
|
``<content:encoded>`` — that's the supported path for aggregators and
|
||||||
|
the one we use. We do not spoof browser headers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_RSS_URLS: tuple[str, ...] = (
|
||||||
|
# Site-wide feed often has the freshest posts before the category
|
||||||
|
# feed catches up. We try this first.
|
||||||
|
"https://news.usni.org/feed",
|
||||||
|
# Category feed has older fleet trackers for backfill.
|
||||||
|
"https://news.usni.org/category/fleet-tracker/feed",
|
||||||
|
)
|
||||||
|
|
||||||
|
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||||
|
|
||||||
|
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||||
|
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||||
|
)
|
||||||
|
|
||||||
|
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||||
|
_WHITESPACE_RE = re.compile(r"\s+")
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(html: str) -> str:
|
||||||
|
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||||
|
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _request_headers() -> dict[str, str]:
|
||||||
|
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||||
|
|
||||||
|
The ``Referer`` is the category index page — that's where a real
|
||||||
|
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||||
|
falls back to HTML. No browser UA spoofing.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||||
|
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pubdate(raw: str) -> datetime | None:
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = parsedate_to_datetime(raw)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||||
|
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||||
|
|
||||||
|
De-duplicates by article link. Returns a list of dicts:
|
||||||
|
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||||
|
"""
|
||||||
|
items_by_link: dict[str, dict] = {}
|
||||||
|
for url in rss_urls:
|
||||||
|
try:
|
||||||
|
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||||
|
continue
|
||||||
|
if not r or r.status_code != 200 or not r.text:
|
||||||
|
logger.debug(
|
||||||
|
"USNI RSS %s returned status=%s body=%d",
|
||||||
|
url,
|
||||||
|
getattr(r, "status_code", "?"),
|
||||||
|
len(getattr(r, "text", "") or ""),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(r.text)
|
||||||
|
except ET.ParseError as exc:
|
||||||
|
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||||
|
continue
|
||||||
|
for item in root.findall(".//item"):
|
||||||
|
title = (item.findtext("title") or "").strip()
|
||||||
|
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||||
|
continue
|
||||||
|
link = (item.findtext("link") or "").strip()
|
||||||
|
if not link or link in items_by_link:
|
||||||
|
continue
|
||||||
|
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||||
|
body_html = (
|
||||||
|
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||||
|
or item.findtext("description", default="")
|
||||||
|
or ""
|
||||||
|
)
|
||||||
|
items_by_link[link] = {
|
||||||
|
"title": title,
|
||||||
|
"link": link,
|
||||||
|
"pub_date": pub_dt,
|
||||||
|
"body": _strip_html(body_html),
|
||||||
|
}
|
||||||
|
return list(items_by_link.values())
|
||||||
|
|
||||||
|
|
||||||
|
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||||
|
# The carrier_tracker table already covers most named bodies of water and
|
||||||
|
# major ports — we just need to teach this module to RECOGNIZE the
|
||||||
|
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||||
|
# the same body of water differently.
|
||||||
|
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||||
|
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||||
|
("eastern mediterranean", "eastern mediterranean"),
|
||||||
|
("western mediterranean", "western mediterranean"),
|
||||||
|
("mediterranean sea", "mediterranean"),
|
||||||
|
("the mediterranean", "mediterranean"),
|
||||||
|
("red sea", "red sea"),
|
||||||
|
("arabian sea area of responsibility", "arabian sea"),
|
||||||
|
("north arabian sea", "north arabian sea"),
|
||||||
|
("arabian sea", "arabian sea"),
|
||||||
|
("persian gulf", "persian gulf"),
|
||||||
|
("gulf of oman", "gulf of oman"),
|
||||||
|
("strait of hormuz", "strait of hormuz"),
|
||||||
|
("south china sea", "south china sea"),
|
||||||
|
("east china sea", "east china sea"),
|
||||||
|
("philippine sea", "philippine sea"),
|
||||||
|
("sea of japan", "sea of japan"),
|
||||||
|
("taiwan strait", "taiwan strait"),
|
||||||
|
("western pacific", "western pacific"),
|
||||||
|
("pacific ocean", "pacific"),
|
||||||
|
("indian ocean", "indian ocean"),
|
||||||
|
("north atlantic", "north atlantic"),
|
||||||
|
("western atlantic", "atlantic"),
|
||||||
|
("eastern atlantic", "atlantic"),
|
||||||
|
("atlantic ocean", "atlantic"),
|
||||||
|
("gulf of aden", "gulf of aden"),
|
||||||
|
("horn of africa", "horn of africa"),
|
||||||
|
("bab el-mandeb", "bab el-mandeb"),
|
||||||
|
("suez canal", "suez canal"),
|
||||||
|
("baltic sea", "baltic sea"),
|
||||||
|
("north sea", "north sea"),
|
||||||
|
("black sea", "black sea"),
|
||||||
|
("south atlantic", "south atlantic"),
|
||||||
|
("coral sea", "coral sea"),
|
||||||
|
("gulf of mexico", "gulf of mexico"),
|
||||||
|
("caribbean sea", "caribbean"),
|
||||||
|
("caribbean", "caribbean"),
|
||||||
|
# Specific ports
|
||||||
|
("naval station norfolk", "norfolk"),
|
||||||
|
("norfolk naval shipyard", "newport news"),
|
||||||
|
("newport news shipbuilding", "newport news"),
|
||||||
|
("newport news", "newport news"),
|
||||||
|
# USNI tags Norfolk mentions with state suffix; match both.
|
||||||
|
("norfolk, va", "norfolk"),
|
||||||
|
("norfolk", "norfolk"),
|
||||||
|
("naval station everett", "puget sound"),
|
||||||
|
("naval base kitsap", "bremerton"),
|
||||||
|
("bremerton", "bremerton"),
|
||||||
|
("puget sound", "puget sound"),
|
||||||
|
("naval base san diego", "san diego"),
|
||||||
|
("san diego, calif", "san diego"),
|
||||||
|
("san diego", "san diego"),
|
||||||
|
("yokosuka, japan", "yokosuka"),
|
||||||
|
("yokosuka", "yokosuka"),
|
||||||
|
("pearl harbor", "pearl harbor"),
|
||||||
|
("apra harbor, guam", "guam"),
|
||||||
|
("guam", "guam"),
|
||||||
|
("bahrain", "bahrain"),
|
||||||
|
("naval station rota", "rota"),
|
||||||
|
("rota, spain", "rota"),
|
||||||
|
("naples, italy", "naples"),
|
||||||
|
# Fleets / AORs
|
||||||
|
("5th fleet", "5th fleet"),
|
||||||
|
("6th fleet", "6th fleet"),
|
||||||
|
("7th fleet", "7th fleet"),
|
||||||
|
("3rd fleet", "3rd fleet"),
|
||||||
|
("2nd fleet", "2nd fleet"),
|
||||||
|
("centcom", "centcom"),
|
||||||
|
("indo-pacific command", "indopacom"),
|
||||||
|
("eucom", "eucom"),
|
||||||
|
("southcom", "southcom"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||||
|
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||||
|
or ``None`` if we don't recognize it.
|
||||||
|
|
||||||
|
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||||
|
``display`` is the phrase we'll show in the dossier description.
|
||||||
|
"""
|
||||||
|
p = (phrase or "").lower().strip()
|
||||||
|
if not p:
|
||||||
|
return None
|
||||||
|
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||||
|
if usni_phrase in p:
|
||||||
|
return canonical, usni_phrase
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||||
|
# phrase that immediately follows. Each pattern is designed to swallow
|
||||||
|
# the optional editorial filler that often appears between verb and
|
||||||
|
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||||
|
# filler; "Norfolk" is the location).
|
||||||
|
#
|
||||||
|
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||||
|
# wins over the generic "is".
|
||||||
|
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||||
|
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||||
|
|
||||||
|
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||||
|
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||||
|
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "is conducting <stuff> in [the] {REGION}"
|
||||||
|
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "is in port in {LOCATION}"
|
||||||
|
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||||
|
# → not captured here; falls through to homeport
|
||||||
|
# "is underway in [the] {REGION}"
|
||||||
|
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "is deployed to [the] {REGION}" / "deployed in"
|
||||||
|
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||||
|
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "arrived [Day] in/at {LOCATION}"
|
||||||
|
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "departed [Day] from {LOCATION}"
|
||||||
|
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||||
|
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
# "is homeported at {LOCATION}"
|
||||||
|
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_region_for_carrier(
|
||||||
|
body: str,
|
||||||
|
carrier_names: list[str],
|
||||||
|
hull_code: str,
|
||||||
|
) -> str | None:
|
||||||
|
"""Return the best-guess region phrase for one carrier from the
|
||||||
|
article body, or None if no confident match.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
1. Find every mention of the carrier (any name variant or the hull
|
||||||
|
code) in the body.
|
||||||
|
2. For each mention, look in the ~300-char window AFTER it for any
|
||||||
|
of the operating-verb patterns.
|
||||||
|
3. Return the first hit. If a more-confident match later turns up
|
||||||
|
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||||
|
first one in document order still wins — USNI's structure puts
|
||||||
|
the position-update sentence near the top of each carrier's
|
||||||
|
section, and the homeport mention later.
|
||||||
|
"""
|
||||||
|
# Build a master mention regex covering every name variant + the hull.
|
||||||
|
candidates: list[str] = []
|
||||||
|
for name in carrier_names:
|
||||||
|
if name and len(name) >= 4:
|
||||||
|
candidates.append(re.escape(name))
|
||||||
|
if hull_code:
|
||||||
|
candidates.append(re.escape(hull_code))
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
window_chars = 320
|
||||||
|
seen_phrases: list[str] = []
|
||||||
|
for mention in mention_re.finditer(body):
|
||||||
|
end = mention.end()
|
||||||
|
window = body[end : end + window_chars]
|
||||||
|
# Cut window at the next sentence break for tighter context.
|
||||||
|
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||||
|
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||||
|
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||||
|
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||||
|
if sent_break:
|
||||||
|
window = window[: sent_break.start() + 1]
|
||||||
|
# Try patterns in priority order.
|
||||||
|
for pat in _OPERATING_PATTERNS:
|
||||||
|
m = pat.search(window)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||||
|
if not phrase:
|
||||||
|
continue
|
||||||
|
# Strip trailing editorial filler — USNI often writes
|
||||||
|
# "Norfolk, Va., according to ship spotters" or
|
||||||
|
# "Yokosuka, Japan, according to..."
|
||||||
|
phrase = re.split(
|
||||||
|
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||||
|
phrase,
|
||||||
|
maxsplit=1,
|
||||||
|
)[0].strip()
|
||||||
|
seen_phrases.append(phrase)
|
||||||
|
return phrase
|
||||||
|
return seen_phrases[0] if seen_phrases else None
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_latest_fleet_tracker_positions(
|
||||||
|
carrier_registry: dict | None = None,
|
||||||
|
region_coords: dict | None = None,
|
||||||
|
) -> dict[str, dict]:
|
||||||
|
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||||
|
|
||||||
|
Entries look like::
|
||||||
|
|
||||||
|
{
|
||||||
|
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||||
|
"desc": "Red Sea (USNI May 18, 2026)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||||
|
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||||
|
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||||
|
"position_confidence": "recent",
|
||||||
|
}
|
||||||
|
|
||||||
|
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||||
|
mention) are simply absent from the result — the caller keeps
|
||||||
|
whatever position they had before.
|
||||||
|
|
||||||
|
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||||
|
module's own tables; passed in here for testability.
|
||||||
|
"""
|
||||||
|
if carrier_registry is None or region_coords is None:
|
||||||
|
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||||
|
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||||
|
region_coords = region_coords or REGION_COORDS
|
||||||
|
|
||||||
|
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||||
|
if not items:
|
||||||
|
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||||
|
# date fall to the back of the list.
|
||||||
|
items.sort(
|
||||||
|
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
latest = items[0]
|
||||||
|
|
||||||
|
pub_dt: datetime | None = latest["pub_date"]
|
||||||
|
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||||
|
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||||
|
|
||||||
|
body = latest["body"]
|
||||||
|
if not body:
|
||||||
|
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
positions: dict[str, dict] = {}
|
||||||
|
for hull, info in carrier_registry.items():
|
||||||
|
# Build name variants we'll try in the body.
|
||||||
|
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||||
|
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||||
|
last_word = without_hull.split()[-1] # "Ford"
|
||||||
|
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||||
|
|
||||||
|
# Variants ordered most-specific first.
|
||||||
|
variants: list[str] = []
|
||||||
|
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||||
|
if v and v not in variants and len(v) >= 4:
|
||||||
|
variants.append(v)
|
||||||
|
|
||||||
|
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||||
|
if not phrase:
|
||||||
|
continue
|
||||||
|
resolved = _resolve_region_phrase(phrase)
|
||||||
|
if not resolved:
|
||||||
|
logger.debug(
|
||||||
|
"USNI: %s region phrase %r did not match any known region",
|
||||||
|
hull, phrase,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
canonical_key, display_phrase = resolved
|
||||||
|
coords = region_coords.get(canonical_key)
|
||||||
|
if not coords:
|
||||||
|
continue
|
||||||
|
|
||||||
|
positions[hull] = {
|
||||||
|
"lat": coords[0],
|
||||||
|
"lng": coords[1],
|
||||||
|
"heading": 0,
|
||||||
|
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||||
|
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||||
|
"source_url": latest["link"],
|
||||||
|
"position_source_at": pub_iso,
|
||||||
|
"position_confidence": "recent",
|
||||||
|
}
|
||||||
|
|
||||||
|
if positions:
|
||||||
|
logger.info(
|
||||||
|
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||||
|
len(positions), len(carrier_registry), latest["link"],
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||||
|
latest["link"],
|
||||||
|
)
|
||||||
|
return positions
|
||||||
@@ -21,9 +21,17 @@ _cache_lock = threading.Lock()
|
|||||||
_local_search_cache: List[Dict[str, Any]] | None = None
|
_local_search_cache: List[Dict[str, Any]] | None = None
|
||||||
_local_search_lock = threading.Lock()
|
_local_search_lock = threading.Lock()
|
||||||
|
|
||||||
_USER_AGENT = os.environ.get(
|
# Round 7a: per-install operator handle threads through every Nominatim
|
||||||
"NOMINATIM_USER_AGENT", "ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)"
|
# call. NOMINATIM_USER_AGENT env override is still honored for operators
|
||||||
)
|
# who run a custom relay / known good identity, but the default uses the
|
||||||
|
# per-install handle so OpenStreetMap can rate-limit per install instead
|
||||||
|
# of treating "Shadowbroker" as one big offender.
|
||||||
|
def _nominatim_user_agent() -> str:
|
||||||
|
override = os.environ.get("NOMINATIM_USER_AGENT", "").strip()
|
||||||
|
if override:
|
||||||
|
return override
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("nominatim")
|
||||||
|
|
||||||
|
|
||||||
def _get_cache(key: str):
|
def _get_cache(key: str):
|
||||||
@@ -178,7 +186,7 @@ def search_geocode(query: str, limit: int = 5, local_only: bool = False) -> List
|
|||||||
res = fetch_with_curl(
|
res = fetch_with_curl(
|
||||||
url,
|
url,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": _USER_AGENT,
|
"User-Agent": _nominatim_user_agent(),
|
||||||
"Accept-Language": "en",
|
"Accept-Language": "en",
|
||||||
},
|
},
|
||||||
timeout=6,
|
timeout=6,
|
||||||
@@ -241,7 +249,7 @@ def reverse_geocode(lat: float, lng: float, local_only: bool = False) -> Dict[st
|
|||||||
res = fetch_with_curl(
|
res = fetch_with_curl(
|
||||||
url,
|
url,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": _USER_AGENT,
|
"User-Agent": _nominatim_user_agent(),
|
||||||
"Accept-Language": "en",
|
"Accept-Language": "en",
|
||||||
},
|
},
|
||||||
timeout=6,
|
timeout=6,
|
||||||
|
|||||||
@@ -8,6 +8,13 @@ from datetime import datetime
|
|||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin, urlparse
|
||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _geopolitics_user_agent() -> str:
|
||||||
|
"""Round 7a: GDELT geopolitics fetcher attribution."""
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("geopolitics-gdelt")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||||
@@ -316,7 +323,7 @@ def _fetch_article_title(url):
|
|||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
current_url,
|
current_url,
|
||||||
timeout=4,
|
timeout=4,
|
||||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT Dashboard/1.0)"},
|
headers={"User-Agent": _geopolitics_user_agent()},
|
||||||
stream=True,
|
stream=True,
|
||||||
allow_redirects=False,
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
@@ -521,10 +528,29 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
|
|||||||
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# GDELT's data.gdeltproject.org is a CNAME to a Google Cloud Storage
|
||||||
|
# bucket of the same name. GCS returns the wildcard ``*.storage.googleapis.com``
|
||||||
|
# certificate, which legitimately does NOT cover the GDELT custom domain
|
||||||
|
# — Python's TLS verification correctly refuses it. Some networks/POPs
|
||||||
|
# happen to route through a path where this works; many do not (notably
|
||||||
|
# Docker Desktop's outbound NAT on local installs).
|
||||||
|
#
|
||||||
|
# Fix: rewrite the URL to hit GCS directly with a path-style bucket
|
||||||
|
# reference, where the standard GCS cert is genuinely valid. Same data,
|
||||||
|
# verified TLS, no operator-side workaround needed.
|
||||||
|
def _gcs_direct_gdelt_url(url: str) -> str:
|
||||||
|
"""If ``url`` points at data.gdeltproject.org, return the equivalent
|
||||||
|
GCS-direct URL. Otherwise return the URL unchanged."""
|
||||||
|
prefix = "://data.gdeltproject.org/"
|
||||||
|
if prefix in url:
|
||||||
|
return url.replace(prefix, "://storage.googleapis.com/data.gdeltproject.org/", 1)
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
def _download_gdelt_export(url):
|
def _download_gdelt_export(url):
|
||||||
"""Download a single GDELT export file, return bytes or None."""
|
"""Download a single GDELT export file, return bytes or None."""
|
||||||
try:
|
try:
|
||||||
res = fetch_with_curl(url, timeout=15)
|
res = fetch_with_curl(_gcs_direct_gdelt_url(url), timeout=15)
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
return res.content
|
return res.content
|
||||||
except (ConnectionError, TimeoutError, OSError): # non-critical
|
except (ConnectionError, TimeoutError, OSError): # non-critical
|
||||||
@@ -620,8 +646,12 @@ def fetch_global_military_incidents():
|
|||||||
# HTTPS is used to prevent passive network observers from injecting
|
# HTTPS is used to prevent passive network observers from injecting
|
||||||
# poisoned export records into the global incident map via MITM.
|
# poisoned export records into the global incident map via MITM.
|
||||||
# GDELT serves the same content over HTTPS as HTTP.
|
# GDELT serves the same content over HTTPS as HTTP.
|
||||||
|
# Use the GCS-direct URL because data.gdeltproject.org's CNAME
|
||||||
|
# serves a wildcard *.storage.googleapis.com cert that legitimately
|
||||||
|
# doesn't cover the GDELT hostname. See _gcs_direct_gdelt_url above.
|
||||||
index_res = fetch_with_curl(
|
index_res = fetch_with_curl(
|
||||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt", timeout=10
|
_gcs_direct_gdelt_url("https://data.gdeltproject.org/gdeltv2/lastupdate.txt"),
|
||||||
|
timeout=10,
|
||||||
)
|
)
|
||||||
if index_res.status_code != 200:
|
if index_res.status_code != 200:
|
||||||
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
||||||
|
|||||||
@@ -69,6 +69,115 @@ def _derive_peer_key(shared_secret: str, peer_url: str) -> bytes:
|
|||||||
).digest()
|
).digest()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Issue #256 (tg12): per-peer HMAC secrets
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Before this change, ALL peer-push HMACs were derived from a single
|
||||||
|
# fleet-shared ``MESH_PEER_PUSH_SECRET``. The receiver could prove a
|
||||||
|
# request was signed by *someone who knows the fleet secret*, but it
|
||||||
|
# could NOT prove which peer signed it — any peer could compute the
|
||||||
|
# expected HMAC for any other peer's URL and impersonate that peer.
|
||||||
|
#
|
||||||
|
# Fix: an optional ``MESH_PEER_SECRETS`` env var maps specific peer URLs
|
||||||
|
# to per-peer secrets. When a peer URL is listed there, only that
|
||||||
|
# per-peer secret is accepted for that URL — the global secret is
|
||||||
|
# ignored for that peer. Peer A no longer learns peer B's secret, so
|
||||||
|
# peer A cannot forge a request claiming to be peer B.
|
||||||
|
#
|
||||||
|
# Backwards-compatible by design:
|
||||||
|
#
|
||||||
|
# - Single-peer installs (``MESH_PEER_SECRETS`` empty) keep using the
|
||||||
|
# global secret. Zero behavior change. Zero operator action required.
|
||||||
|
# - Multi-peer installs that haven't migrated yet keep using the global
|
||||||
|
# secret for every peer. Same behavior as before — same exposure.
|
||||||
|
# - Multi-peer installs that have migrated configure
|
||||||
|
# ``MESH_PEER_SECRETS=urlA=secretA,urlB=secretB`` and immediately get
|
||||||
|
# per-peer identity. Migration is incremental: peers not yet listed
|
||||||
|
# continue using the global secret until both sides of that peering
|
||||||
|
# add their entry.
|
||||||
|
|
||||||
|
_PEER_SECRETS_CACHE: dict[str, str] = {}
|
||||||
|
_PEER_SECRETS_CACHE_RAW: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def _lookup_per_peer_secret(normalized_url: str) -> str:
|
||||||
|
"""Return the per-peer secret for ``normalized_url`` from MESH_PEER_SECRETS.
|
||||||
|
|
||||||
|
Returns "" if no per-peer entry is configured for that URL. The parser
|
||||||
|
is forgiving:
|
||||||
|
|
||||||
|
- Whitespace around items, URLs, and secrets is stripped.
|
||||||
|
- Items without ``=`` or with empty URL/secret halves are skipped.
|
||||||
|
- The URL half is normalized via ``normalize_peer_url`` so config
|
||||||
|
authors don't have to match scheme/port/path quirks exactly.
|
||||||
|
|
||||||
|
The cache is invalidated whenever the env var's raw value changes,
|
||||||
|
which keeps tests' ``monkeypatch.setenv`` calls effective without
|
||||||
|
forcing a process restart.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
raw = str(os.environ.get("MESH_PEER_SECRETS", "") or "").strip()
|
||||||
|
|
||||||
|
global _PEER_SECRETS_CACHE, _PEER_SECRETS_CACHE_RAW
|
||||||
|
if raw != _PEER_SECRETS_CACHE_RAW:
|
||||||
|
new_cache: dict[str, str] = {}
|
||||||
|
for chunk in raw.split(","):
|
||||||
|
chunk = chunk.strip()
|
||||||
|
if not chunk or "=" not in chunk:
|
||||||
|
continue
|
||||||
|
url_part, _, secret_part = chunk.partition("=")
|
||||||
|
normalized = normalize_peer_url(url_part.strip())
|
||||||
|
secret = secret_part.strip()
|
||||||
|
if normalized and secret:
|
||||||
|
new_cache[normalized] = secret
|
||||||
|
_PEER_SECRETS_CACHE = new_cache
|
||||||
|
_PEER_SECRETS_CACHE_RAW = raw
|
||||||
|
|
||||||
|
return _PEER_SECRETS_CACHE.get(normalized_url, "")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_peer_key_for_url(peer_url: str) -> bytes:
|
||||||
|
"""Return the HMAC key for ``peer_url``, preferring per-peer secret.
|
||||||
|
|
||||||
|
Issue #256: this is the function every peer-push call site should
|
||||||
|
use. It looks up the peer-specific secret first, falling back to the
|
||||||
|
fleet-shared ``MESH_PEER_PUSH_SECRET`` only when the URL is NOT
|
||||||
|
listed in ``MESH_PEER_SECRETS``.
|
||||||
|
|
||||||
|
Both sender (computing X-Peer-HMAC) and receiver (verifying it) call
|
||||||
|
this with the SENDER's URL — they must derive the same key, so
|
||||||
|
operators on both ends of a peering need matching MESH_PEER_SECRETS
|
||||||
|
entries for that URL to stay in sync.
|
||||||
|
|
||||||
|
Returns empty bytes when no usable secret exists. Callers must treat
|
||||||
|
that as fail-closed (skip the push, reject the verification).
|
||||||
|
"""
|
||||||
|
normalized_url = normalize_peer_url(peer_url)
|
||||||
|
if not normalized_url:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
per_peer_secret = _lookup_per_peer_secret(normalized_url)
|
||||||
|
if per_peer_secret:
|
||||||
|
return _derive_peer_key(per_peer_secret, normalized_url)
|
||||||
|
|
||||||
|
# No per-peer entry for this URL — fall back to the legacy global
|
||||||
|
# secret. This is what preserves zero-hostility for single-peer
|
||||||
|
# installs and the migration window for multi-peer installs.
|
||||||
|
try:
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
global_secret = str(
|
||||||
|
getattr(get_settings(), "MESH_PEER_PUSH_SECRET", "") or ""
|
||||||
|
).strip()
|
||||||
|
except Exception:
|
||||||
|
return b""
|
||||||
|
if not global_secret:
|
||||||
|
return b""
|
||||||
|
return _derive_peer_key(global_secret, normalized_url)
|
||||||
|
|
||||||
|
|
||||||
def _node_digest(public_key_b64: str) -> str:
|
def _node_digest(public_key_b64: str) -> str:
|
||||||
raw = base64.b64decode(public_key_b64)
|
raw = base64.b64decode(public_key_b64)
|
||||||
return hashlib.sha256(raw).hexdigest()
|
return hashlib.sha256(raw).hexdigest()
|
||||||
|
|||||||
@@ -317,6 +317,39 @@ class DMRelay:
|
|||||||
def _self_mailbox_limit(self) -> int:
|
def _self_mailbox_limit(self) -> int:
|
||||||
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
||||||
|
|
||||||
|
def _per_sender_pending_limit(self) -> int:
|
||||||
|
"""Anti-spam cap on UNACKED messages a single sender can have parked
|
||||||
|
in a single recipient mailbox at any one time. See ``config.py``
|
||||||
|
``MESH_DM_PENDING_PER_SENDER_LIMIT`` for the threat model — this
|
||||||
|
rule is enforced both at ``deposit`` (local) and at
|
||||||
|
``accept_replica`` (peer push acceptance), making it a network
|
||||||
|
rule rather than a client-side honor system."""
|
||||||
|
try:
|
||||||
|
limit = int(getattr(self._settings(), "MESH_DM_PENDING_PER_SENDER_LIMIT", 2) or 2)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
limit = 2
|
||||||
|
return max(1, limit)
|
||||||
|
|
||||||
|
def _per_sender_pending_count(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
mailbox_key: str,
|
||||||
|
sender_block_ref: str,
|
||||||
|
) -> int:
|
||||||
|
"""Count UNACKED messages from ``sender_block_ref`` currently parked
|
||||||
|
in ``mailbox_key``. Caller already holds ``self._lock``.
|
||||||
|
|
||||||
|
Messages that have been claimed/acked are removed from the mailbox
|
||||||
|
list (see ``claim_message_ids``), so anything still here is by
|
||||||
|
definition unacked. We count by exact ``sender_block_ref`` match
|
||||||
|
— that's the per-pair sender identity used for blocking too, so
|
||||||
|
the cap is naturally per-(sender, recipient).
|
||||||
|
"""
|
||||||
|
if not mailbox_key or not sender_block_ref:
|
||||||
|
return 0
|
||||||
|
messages = self._mailboxes.get(mailbox_key, [])
|
||||||
|
return sum(1 for m in messages if m.sender_block_ref == sender_block_ref)
|
||||||
|
|
||||||
def _nonce_ttl_seconds(self) -> int:
|
def _nonce_ttl_seconds(self) -> int:
|
||||||
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
||||||
|
|
||||||
@@ -1515,6 +1548,29 @@ class DMRelay:
|
|||||||
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
||||||
metrics_inc("dm_drop_full")
|
metrics_inc("dm_drop_full")
|
||||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||||
|
# Anti-spam: per-(sender, recipient) cap on unacked messages.
|
||||||
|
# A sender who already has the configured number of messages
|
||||||
|
# parked in this mailbox can't deposit more until the recipient
|
||||||
|
# pulls (acks) at least one. The same cap is re-enforced on
|
||||||
|
# inbound replication in ``accept_replica`` so this rule isn't
|
||||||
|
# bypassable by patching out the local check on a hostile
|
||||||
|
# sender's relay — see config.py
|
||||||
|
# MESH_DM_PENDING_PER_SENDER_LIMIT for the threat model.
|
||||||
|
per_sender_limit = self._per_sender_pending_limit()
|
||||||
|
pending = self._per_sender_pending_count(
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
sender_block_ref=sender_block_ref,
|
||||||
|
)
|
||||||
|
if pending >= per_sender_limit:
|
||||||
|
metrics_inc("dm_drop_per_sender_cap")
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"detail": (
|
||||||
|
f"Recipient already has {pending} unread message"
|
||||||
|
f"{'s' if pending != 1 else ''} from you. Wait for "
|
||||||
|
"them to read your messages before sending more."
|
||||||
|
),
|
||||||
|
}
|
||||||
if not msg_id:
|
if not msg_id:
|
||||||
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
||||||
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
||||||
@@ -1539,8 +1595,245 @@ class DMRelay:
|
|||||||
)
|
)
|
||||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||||
self._save()
|
self._save()
|
||||||
|
# Cross-node mailbox replication: push the freshly-stored
|
||||||
|
# envelope to every authenticated relay peer so the recipient
|
||||||
|
# can log into ANY node and find their messages. The push is
|
||||||
|
# async (fire-and-forget thread) so deposit() returns
|
||||||
|
# immediately — slow Tor peers can't block the sender's UX.
|
||||||
|
# Each receiving peer re-enforces the per-sender cap on
|
||||||
|
# acceptance, so hostile relays can't widen the cap.
|
||||||
|
try:
|
||||||
|
envelope_for_push = self.envelope_for_replication(
|
||||||
|
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||||
|
)
|
||||||
|
if envelope_for_push:
|
||||||
|
self._replicate_envelope_to_peers_async(
|
||||||
|
envelope=envelope_for_push,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
metrics_inc("dm_replication_push_error")
|
||||||
return {"ok": True, "msg_id": msg_id}
|
return {"ok": True, "msg_id": msg_id}
|
||||||
|
|
||||||
|
def accept_replica(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
envelope: dict[str, Any],
|
||||||
|
originating_peer_url: str = "",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Receive a DM envelope replicated from a peer relay.
|
||||||
|
|
||||||
|
Cross-node mailbox replication entry point. When a sender's local
|
||||||
|
relay accepts a ``deposit`` and pushes the envelope to
|
||||||
|
``MESH_RELAY_PEERS`` (so the recipient can log into any peer
|
||||||
|
node and find their messages), each receiving peer calls
|
||||||
|
``accept_replica`` to ingest it.
|
||||||
|
|
||||||
|
The per-(sender, recipient) cap is re-enforced HERE. That's what
|
||||||
|
makes the rule a NETWORK rule rather than a client-side honor
|
||||||
|
system: a hostile sender who patches out the local ``deposit``
|
||||||
|
check still can't get a 3rd unacked message to spread, because
|
||||||
|
every honest peer enforces the same cap on inbound replicas.
|
||||||
|
Result: hostile relays can hold extras locally, but those extras
|
||||||
|
never reach any node a legitimate recipient is polling from.
|
||||||
|
|
||||||
|
Returns the same shape as ``deposit`` so the calling endpoint can
|
||||||
|
forward the result back to the originating peer.
|
||||||
|
"""
|
||||||
|
if not isinstance(envelope, dict):
|
||||||
|
return {"ok": False, "detail": "envelope must be an object"}
|
||||||
|
msg_id = str(envelope.get("msg_id", "") or "").strip()
|
||||||
|
mailbox_key = str(envelope.get("mailbox_key", "") or "").strip()
|
||||||
|
sender_block_ref = str(envelope.get("sender_block_ref", "") or "").strip()
|
||||||
|
ciphertext = str(envelope.get("ciphertext", "") or "")
|
||||||
|
if not msg_id or not mailbox_key or not sender_block_ref or not ciphertext:
|
||||||
|
return {"ok": False, "detail": "envelope missing required fields"}
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._refresh_from_shared_relay()
|
||||||
|
self._cleanup_expired()
|
||||||
|
|
||||||
|
# Idempotent — if we already hold this exact msg_id, the
|
||||||
|
# replication round-tripped or a peer pushed the same
|
||||||
|
# envelope through multiple paths. Accept silently.
|
||||||
|
if any(m.msg_id == msg_id for m in self._mailboxes.get(mailbox_key, [])):
|
||||||
|
metrics_inc("dm_replica_duplicate")
|
||||||
|
return {"ok": True, "msg_id": msg_id, "duplicate": True}
|
||||||
|
|
||||||
|
# Same per-class cap as the deposit path — defense in depth
|
||||||
|
# against a peer that wraps a "deposit" as a "replica" to
|
||||||
|
# bypass the class limit.
|
||||||
|
delivery_class = str(envelope.get("delivery_class", "") or "")
|
||||||
|
if delivery_class in ("request", "shared", "self"):
|
||||||
|
class_limit = self._mailbox_limit_for_class(delivery_class)
|
||||||
|
else:
|
||||||
|
class_limit = self._shared_mailbox_limit()
|
||||||
|
if len(self._mailboxes.get(mailbox_key, [])) >= class_limit:
|
||||||
|
metrics_inc("dm_replica_drop_full")
|
||||||
|
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||||
|
|
||||||
|
# THE network rule: per-(sender, recipient) anti-spam cap.
|
||||||
|
per_sender_limit = self._per_sender_pending_limit()
|
||||||
|
pending = self._per_sender_pending_count(
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
sender_block_ref=sender_block_ref,
|
||||||
|
)
|
||||||
|
if pending >= per_sender_limit:
|
||||||
|
metrics_inc("dm_replica_drop_per_sender_cap")
|
||||||
|
# Returning a structured rejection — the sender's relay
|
||||||
|
# learns its envelope was rejected by an honest peer and
|
||||||
|
# can stop trying to push it.
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"detail": (
|
||||||
|
"Per-sender cap reached on this relay; refusing replica"
|
||||||
|
),
|
||||||
|
"cap_violation": True,
|
||||||
|
"pending": pending,
|
||||||
|
"limit": per_sender_limit,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Accept the replica into the local mailbox.
|
||||||
|
self._mailboxes[mailbox_key].append(
|
||||||
|
DMMessage(
|
||||||
|
sender_id=str(envelope.get("sender_id", "") or ""),
|
||||||
|
ciphertext=ciphertext,
|
||||||
|
timestamp=float(envelope.get("timestamp", time.time()) or time.time()),
|
||||||
|
msg_id=msg_id,
|
||||||
|
delivery_class=str(envelope.get("delivery_class", "shared") or "shared"),
|
||||||
|
sender_seal=str(envelope.get("sender_seal", "") or ""),
|
||||||
|
relay_salt=str(envelope.get("relay_salt", "") or ""),
|
||||||
|
sender_block_ref=sender_block_ref,
|
||||||
|
payload_format=str(envelope.get("payload_format", "dm1") or "dm1"),
|
||||||
|
session_welcome=str(envelope.get("session_welcome", "") or ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||||
|
self._save()
|
||||||
|
metrics_inc("dm_replica_accepted")
|
||||||
|
return {"ok": True, "msg_id": msg_id}
|
||||||
|
|
||||||
|
def _replicate_envelope_to_peers_async(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
envelope: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""Push an outbound DM envelope to every authenticated relay peer.
|
||||||
|
|
||||||
|
Fire-and-forget: spawned in a background thread so ``deposit``
|
||||||
|
returns to the caller immediately. Per-peer errors are logged
|
||||||
|
and swallowed — the sender's UX must not block on slow Tor
|
||||||
|
peers, and a peer that's down today gets the next message
|
||||||
|
whenever it comes back. Inbound recipient polling from a healthy
|
||||||
|
peer keeps the system functional during peer failures.
|
||||||
|
|
||||||
|
Each peer is authed with the existing per-peer HMAC pattern
|
||||||
|
(#256) — same headers and key resolver gate-message replication
|
||||||
|
uses, so a hostile node that doesn't know any peer's HMAC key
|
||||||
|
can't impersonate a legitimate relay.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
|
||||||
|
def _do_push():
|
||||||
|
try:
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
normalize_peer_url,
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
)
|
||||||
|
from services.mesh.mesh_router import (
|
||||||
|
authenticated_push_peer_urls,
|
||||||
|
)
|
||||||
|
|
||||||
|
peers = authenticated_push_peer_urls()
|
||||||
|
if not peers:
|
||||||
|
return
|
||||||
|
|
||||||
|
payload = json.dumps(
|
||||||
|
{"envelope": envelope},
|
||||||
|
separators=(",", ":"),
|
||||||
|
ensure_ascii=False,
|
||||||
|
).encode("utf-8")
|
||||||
|
|
||||||
|
timeout = max(
|
||||||
|
1,
|
||||||
|
int(getattr(self._settings(), "MESH_RELAY_PUSH_TIMEOUT_S", 10) or 10),
|
||||||
|
)
|
||||||
|
|
||||||
|
for peer_url in peers:
|
||||||
|
try:
|
||||||
|
normalized = normalize_peer_url(peer_url)
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
peer_key = resolve_peer_key_for_url(normalized)
|
||||||
|
if peer_key:
|
||||||
|
headers["X-Peer-Url"] = normalized
|
||||||
|
headers["X-Peer-HMAC"] = hmac.new(
|
||||||
|
peer_key, payload, hashlib.sha256
|
||||||
|
).hexdigest()
|
||||||
|
url = f"{peer_url}/api/mesh/dm/replicate-envelope"
|
||||||
|
resp = _requests.post(
|
||||||
|
url, data=payload, timeout=timeout, headers=headers,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
metrics_inc("dm_replication_push_ok")
|
||||||
|
else:
|
||||||
|
# 4xx including the structured cap_violation
|
||||||
|
# rejection from accept_replica — sender's
|
||||||
|
# relay learns and stops retrying this msg_id.
|
||||||
|
metrics_inc("dm_replication_push_rejected")
|
||||||
|
except Exception:
|
||||||
|
# Per-peer failure is non-fatal — log to metrics
|
||||||
|
# but don't break the loop. Other peers and a
|
||||||
|
# future retry can still propagate the envelope.
|
||||||
|
metrics_inc("dm_replication_push_error")
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
# Outer guard — never let replication errors propagate
|
||||||
|
# back to the sender's deposit() caller.
|
||||||
|
metrics_inc("dm_replication_push_error")
|
||||||
|
|
||||||
|
thread = threading.Thread(
|
||||||
|
target=_do_push,
|
||||||
|
name="dm-replicate-push",
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
def envelope_for_replication(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
mailbox_key: str,
|
||||||
|
msg_id: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Return the wire-form envelope for a stored message, suitable
|
||||||
|
for POSTing to a peer relay's replicate-envelope endpoint.
|
||||||
|
|
||||||
|
Returns ``None`` if the message isn't in the mailbox (already
|
||||||
|
acked, expired, never existed). The caller holds the
|
||||||
|
responsibility for transport security (Tor SOCKS for .onion
|
||||||
|
peers, per-peer HMAC) and for not leaking the envelope to
|
||||||
|
clearnet peers when private transport is required.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
for m in self._mailboxes.get(mailbox_key, []):
|
||||||
|
if m.msg_id == msg_id:
|
||||||
|
return {
|
||||||
|
"msg_id": m.msg_id,
|
||||||
|
"mailbox_key": mailbox_key,
|
||||||
|
"sender_id": m.sender_id,
|
||||||
|
"sender_block_ref": m.sender_block_ref,
|
||||||
|
"sender_seal": m.sender_seal,
|
||||||
|
"ciphertext": m.ciphertext,
|
||||||
|
"timestamp": m.timestamp,
|
||||||
|
"delivery_class": m.delivery_class,
|
||||||
|
"relay_salt": m.relay_salt,
|
||||||
|
"payload_format": m.payload_format,
|
||||||
|
"session_welcome": m.session_welcome,
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._refresh_from_shared_relay()
|
self._refresh_from_shared_relay()
|
||||||
|
|||||||
@@ -33,8 +33,9 @@ Each event contains:
|
|||||||
|
|
||||||
Persistence: JSON file at backend/data/infonet.json
|
Persistence: JSON file at backend/data/infonet.json
|
||||||
|
|
||||||
Encrypted gate chat events are intentionally kept off the public chain and
|
Encrypted gate chat events are private-chain ciphertext records. They are
|
||||||
persisted separately via GateMessageStore.
|
excluded from public read surfaces and replicated only over private Infonet
|
||||||
|
transports.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -64,6 +65,8 @@ from services.mesh.mesh_schema import (
|
|||||||
ACTIVE_PUBLIC_LEDGER_EVENT_TYPES,
|
ACTIVE_PUBLIC_LEDGER_EVENT_TYPES,
|
||||||
PUBLIC_LEDGER_EVENT_TYPES,
|
PUBLIC_LEDGER_EVENT_TYPES,
|
||||||
validate_event_payload,
|
validate_event_payload,
|
||||||
|
validate_private_dm_ledger_payload,
|
||||||
|
validate_private_gate_ledger_payload,
|
||||||
validate_protocol_fields,
|
validate_protocol_fields,
|
||||||
validate_public_ledger_payload,
|
validate_public_ledger_payload,
|
||||||
)
|
)
|
||||||
@@ -127,6 +130,12 @@ GATE_SEGMENT_MAX_COMPRESSED_BYTES = max(
|
|||||||
int(os.environ.get("MESH_GATE_SEGMENT_MAX_COMPRESSED_BYTES", str(2 * 1024 * 1024)) or str(2 * 1024 * 1024)),
|
int(os.environ.get("MESH_GATE_SEGMENT_MAX_COMPRESSED_BYTES", str(2 * 1024 * 1024)) or str(2 * 1024 * 1024)),
|
||||||
)
|
)
|
||||||
GATE_SEGMENT_STORAGE_VERSION = 1
|
GATE_SEGMENT_STORAGE_VERSION = 1
|
||||||
|
DM_HASHCHAIN_SPOOL_LIMIT = max(1, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_LIMIT", "2") or "2"))
|
||||||
|
DM_HASHCHAIN_SPOOL_SENDER_LIMIT = max(
|
||||||
|
1,
|
||||||
|
int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_SENDER_LIMIT", "1") or "1"),
|
||||||
|
)
|
||||||
|
DM_HASHCHAIN_SPOOL_TTL_S = max(60, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_TTL_S", "3600") or "3600"))
|
||||||
_PUBLIC_EVENT_APPEND_HOOKS: list[Any] = []
|
_PUBLIC_EVENT_APPEND_HOOKS: list[Any] = []
|
||||||
_PUBLIC_EVENT_APPEND_HOOKS_LOCK = threading.Lock()
|
_PUBLIC_EVENT_APPEND_HOOKS_LOCK = threading.Lock()
|
||||||
|
|
||||||
@@ -216,18 +225,19 @@ def _peer_pair_ref_key(peer_url: str) -> bytes:
|
|||||||
Returns an empty key on misconfiguration so callers fail closed.
|
Returns an empty key on misconfiguration so callers fail closed.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from services.config import get_settings
|
from services.mesh.mesh_crypto import (
|
||||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
normalize_peer_url,
|
||||||
|
resolve_peer_key_for_url,
|
||||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
return b""
|
return b""
|
||||||
if not secret:
|
|
||||||
return b""
|
|
||||||
normalized = normalize_peer_url(peer_url or "")
|
normalized = normalize_peer_url(peer_url or "")
|
||||||
if not normalized:
|
if not normalized:
|
||||||
return b""
|
return b""
|
||||||
peer_key = _derive_peer_key(secret, normalized)
|
# Issue #256: resolve_peer_key_for_url() prefers per-peer secrets
|
||||||
|
# from MESH_PEER_SECRETS and falls back to the global
|
||||||
|
# MESH_PEER_PUSH_SECRET only when the URL has no per-peer entry.
|
||||||
|
peer_key = resolve_peer_key_for_url(normalized)
|
||||||
if not peer_key:
|
if not peer_key:
|
||||||
return b""
|
return b""
|
||||||
# Domain-separate from the transport HMAC key so the two
|
# Domain-separate from the transport HMAC key so the two
|
||||||
@@ -339,6 +349,32 @@ def _private_gate_event_id(
|
|||||||
).hexdigest()
|
).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _private_gate_signature_payload_variants(gate_id: str, event: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
payload = _private_gate_signature_payload(gate_id, event)
|
||||||
|
variants: list[dict[str, Any]] = [payload]
|
||||||
|
event_payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||||
|
reply_to = str(event_payload.get("reply_to", "") or "").strip()
|
||||||
|
if reply_to:
|
||||||
|
variants.append(_private_gate_signature_payload(gate_id, event, include_reply_to=False))
|
||||||
|
if "epoch" in payload:
|
||||||
|
no_epoch = dict(payload)
|
||||||
|
no_epoch.pop("epoch", None)
|
||||||
|
variants.append(no_epoch)
|
||||||
|
if reply_to:
|
||||||
|
no_epoch_no_reply = _private_gate_signature_payload(gate_id, event, include_reply_to=False)
|
||||||
|
no_epoch_no_reply.pop("epoch", None)
|
||||||
|
variants.append(no_epoch_no_reply)
|
||||||
|
deduped: list[dict[str, Any]] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for variant in variants:
|
||||||
|
material = json.dumps(variant, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
if material in seen:
|
||||||
|
continue
|
||||||
|
seen.add(material)
|
||||||
|
deduped.append(variant)
|
||||||
|
return deduped
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_private_gate_event(gate_id: str, event: dict[str, Any]) -> dict[str, Any]:
|
def _sanitize_private_gate_event(gate_id: str, event: dict[str, Any]) -> dict[str, Any]:
|
||||||
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||||
sanitized = {
|
sanitized = {
|
||||||
@@ -1567,11 +1603,18 @@ class Infonet:
|
|||||||
def _rebuild_state(self) -> None:
|
def _rebuild_state(self) -> None:
|
||||||
self.event_index = {}
|
self.event_index = {}
|
||||||
self.node_sequences = {}
|
self.node_sequences = {}
|
||||||
# Keep private signed-write replay domains across public-chain
|
# Keep private signed-write replay domains that are not represented
|
||||||
# rebuilds; these domains protect local side effects that are not
|
# on-chain, but rebuild the gate_message sequence domain from chain
|
||||||
# represented as public Infonet events.
|
# events so reloads/fork application do not mix it with public
|
||||||
if not isinstance(getattr(self, "sequence_domains", None), dict):
|
# per-node message sequences.
|
||||||
self.sequence_domains = {}
|
preserved_domains = {}
|
||||||
|
if isinstance(getattr(self, "sequence_domains", None), dict):
|
||||||
|
preserved_domains = {
|
||||||
|
key: value
|
||||||
|
for key, value in self.sequence_domains.items()
|
||||||
|
if not str(key or "").endswith("|gate_message")
|
||||||
|
}
|
||||||
|
self.sequence_domains = dict(preserved_domains)
|
||||||
self.public_key_bindings = {}
|
self.public_key_bindings = {}
|
||||||
self.revocations = {}
|
self.revocations = {}
|
||||||
self._replay_filter = ReplayFilter()
|
self._replay_filter = ReplayFilter()
|
||||||
@@ -1583,9 +1626,12 @@ class Infonet:
|
|||||||
node_id = evt.get("node_id", "")
|
node_id = evt.get("node_id", "")
|
||||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||||
if node_id and sequence:
|
if node_id and sequence:
|
||||||
last = self.node_sequences.get(node_id, 0)
|
sequence_table, sequence_key = self._sequence_table_for_event(
|
||||||
|
evt.get("event_type", ""), node_id
|
||||||
|
)
|
||||||
|
last = sequence_table.get(sequence_key, 0)
|
||||||
if sequence > last:
|
if sequence > last:
|
||||||
self.node_sequences[node_id] = sequence
|
sequence_table[sequence_key] = sequence
|
||||||
public_key = str(evt.get("public_key", "") or "")
|
public_key = str(evt.get("public_key", "") or "")
|
||||||
if public_key and node_id:
|
if public_key and node_id:
|
||||||
existing = self.public_key_bindings.get(public_key)
|
existing = self.public_key_bindings.get(public_key)
|
||||||
@@ -1897,6 +1943,295 @@ class Infonet:
|
|||||||
self._save()
|
self._save()
|
||||||
return True, "ok"
|
return True, "ok"
|
||||||
|
|
||||||
|
def _sequence_table_for_event(self, event_type: str, node_id: str) -> tuple[dict[str, int], str]:
|
||||||
|
normalized = str(event_type or "").strip().lower()
|
||||||
|
if normalized == "gate_message":
|
||||||
|
return self.sequence_domains, f"{node_id}|gate_message"
|
||||||
|
if normalized == "dm_message":
|
||||||
|
return self.sequence_domains, f"{node_id}|dm_message"
|
||||||
|
return self.node_sequences, node_id
|
||||||
|
|
||||||
|
def _dm_spool_target_key(self, payload: dict[str, Any]) -> tuple[str, str]:
|
||||||
|
delivery_class = str(payload.get("delivery_class", "") or "").strip().lower()
|
||||||
|
if delivery_class == "shared":
|
||||||
|
key = str(payload.get("recipient_token", "") or "").strip()
|
||||||
|
else:
|
||||||
|
key = str(payload.get("recipient_id", "") or "").strip()
|
||||||
|
return delivery_class, key
|
||||||
|
|
||||||
|
def _dm_spool_active_counts(
|
||||||
|
self,
|
||||||
|
payload: dict[str, Any],
|
||||||
|
*,
|
||||||
|
sender_id: str = "",
|
||||||
|
now: float | None = None,
|
||||||
|
) -> tuple[int, int]:
|
||||||
|
delivery_class, key = self._dm_spool_target_key(payload)
|
||||||
|
if not key:
|
||||||
|
return 0, 0
|
||||||
|
sender_id = str(sender_id or "").strip()
|
||||||
|
current = time.time() if now is None else float(now)
|
||||||
|
total_count = 0
|
||||||
|
sender_count = 0
|
||||||
|
for evt in reversed(self.events):
|
||||||
|
if evt.get("event_type") != "dm_message":
|
||||||
|
continue
|
||||||
|
evt_payload = evt.get("payload") if isinstance(evt.get("payload"), dict) else {}
|
||||||
|
evt_delivery_class, evt_key = self._dm_spool_target_key(evt_payload)
|
||||||
|
if evt_delivery_class != delivery_class:
|
||||||
|
continue
|
||||||
|
if evt_key != key:
|
||||||
|
continue
|
||||||
|
evt_ts = float(evt_payload.get("timestamp", evt.get("timestamp", 0)) or 0)
|
||||||
|
if evt_ts > 0 and current - evt_ts > DM_HASHCHAIN_SPOOL_TTL_S:
|
||||||
|
continue
|
||||||
|
total_count += 1
|
||||||
|
if sender_id and str(evt.get("node_id", "") or "").strip() == sender_id:
|
||||||
|
sender_count += 1
|
||||||
|
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT and (
|
||||||
|
not sender_id or sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT
|
||||||
|
):
|
||||||
|
break
|
||||||
|
return total_count, sender_count
|
||||||
|
|
||||||
|
def _dm_spool_active_count(self, payload: dict[str, Any], *, now: float | None = None) -> int:
|
||||||
|
total_count, _sender_count = self._dm_spool_active_counts(payload, now=now)
|
||||||
|
return total_count
|
||||||
|
|
||||||
|
def append_private_dm_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
node_id: str,
|
||||||
|
payload: dict,
|
||||||
|
signature: str,
|
||||||
|
sequence: int,
|
||||||
|
public_key: str,
|
||||||
|
public_key_algo: str,
|
||||||
|
protocol_version: str = "",
|
||||||
|
timestamp: float = 0,
|
||||||
|
) -> dict:
|
||||||
|
"""Append an encrypted DM dead-drop message to the private Infonet ledger.
|
||||||
|
|
||||||
|
The event is a small offline spool, capped per mailbox target, so the
|
||||||
|
hashchain can carry a couple of sealed DMs without becoming an
|
||||||
|
unbounded global mailbox.
|
||||||
|
"""
|
||||||
|
event_type = "dm_message"
|
||||||
|
if sequence <= 0:
|
||||||
|
raise ValueError("sequence is required and must be > 0")
|
||||||
|
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||||
|
last = sequence_table.get(sequence_key, 0)
|
||||||
|
if sequence <= last:
|
||||||
|
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||||
|
|
||||||
|
raw_payload = dict(payload or {})
|
||||||
|
if "message" in raw_payload or "plaintext" in raw_payload or "_local_plaintext" in raw_payload:
|
||||||
|
raise ValueError("private DM ledger payload must not contain plaintext")
|
||||||
|
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||||
|
raise ValueError("DM hashchain spool requires private_strong transport_lock")
|
||||||
|
|
||||||
|
payload = normalize_payload(event_type, raw_payload)
|
||||||
|
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||||
|
if not ok:
|
||||||
|
raise ValueError(reason)
|
||||||
|
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=node_id)
|
||||||
|
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||||
|
raise ValueError("DM hashchain sender spool full for recipient")
|
||||||
|
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||||
|
raise ValueError("DM hashchain spool full for recipient")
|
||||||
|
|
||||||
|
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||||
|
raise ValueError("payload exceeds max size")
|
||||||
|
|
||||||
|
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||||
|
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||||
|
if not ok:
|
||||||
|
raise ValueError(reason)
|
||||||
|
|
||||||
|
if not (signature and public_key and public_key_algo):
|
||||||
|
raise ValueError("Missing signature fields")
|
||||||
|
algo = parse_public_key_algo(public_key_algo)
|
||||||
|
if not algo:
|
||||||
|
raise ValueError("Unsupported public_key_algo")
|
||||||
|
if not verify_node_binding(node_id, public_key):
|
||||||
|
raise ValueError("node_id mismatch")
|
||||||
|
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||||
|
if not bound:
|
||||||
|
raise ValueError(bind_reason)
|
||||||
|
sig_payload = build_signature_payload(
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
if not verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
raise ValueError("Invalid signature")
|
||||||
|
|
||||||
|
revoked, _info = self._revocation_status(public_key)
|
||||||
|
if revoked:
|
||||||
|
raise ValueError("public key is revoked")
|
||||||
|
|
||||||
|
event = ChainEvent(
|
||||||
|
prev_hash=self.head_hash,
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
timestamp=float(timestamp or time.time()),
|
||||||
|
sequence=sequence,
|
||||||
|
signature=signature,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
protocol_version=protocol_version,
|
||||||
|
)
|
||||||
|
event_dict = event.to_dict()
|
||||||
|
self._write_wal(event_dict)
|
||||||
|
self.events.append(event_dict)
|
||||||
|
self.event_index[event.event_id] = len(self.events) - 1
|
||||||
|
self.head_hash = event.event_id
|
||||||
|
sequence_table[sequence_key] = sequence
|
||||||
|
self._replay_filter.add(event.event_id)
|
||||||
|
self._invalidate_merkle_cache()
|
||||||
|
self._update_counters_for_event(event_dict)
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from services.mesh.mesh_rns import rns_bridge
|
||||||
|
|
||||||
|
rns_bridge.publish_event(event_dict)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_notify_public_event_append_hooks(event_dict)
|
||||||
|
logger.info(
|
||||||
|
f"Infonet append [dm_message] by {_redact_node(node_id)} seq={sequence} "
|
||||||
|
f"id={event.event_id[:16]}..."
|
||||||
|
)
|
||||||
|
return event_dict
|
||||||
|
|
||||||
|
def append_private_gate_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
node_id: str,
|
||||||
|
payload: dict,
|
||||||
|
signature: str,
|
||||||
|
sequence: int,
|
||||||
|
public_key: str,
|
||||||
|
public_key_algo: str,
|
||||||
|
protocol_version: str = "",
|
||||||
|
timestamp: float = 0,
|
||||||
|
) -> dict:
|
||||||
|
"""Append an encrypted gate message to the private Infonet ledger.
|
||||||
|
|
||||||
|
Gate messages use their own sequence domain so a gate post cannot
|
||||||
|
consume or replay-block the author's public broadcast sequence.
|
||||||
|
"""
|
||||||
|
event_type = "gate_message"
|
||||||
|
if sequence <= 0:
|
||||||
|
raise ValueError("sequence is required and must be > 0")
|
||||||
|
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||||
|
last = sequence_table.get(sequence_key, 0)
|
||||||
|
if sequence <= last:
|
||||||
|
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||||
|
|
||||||
|
raw_payload = dict(payload or {})
|
||||||
|
if "message" in raw_payload or "_local_plaintext" in raw_payload or "_local_reply_to" in raw_payload:
|
||||||
|
raise ValueError("private gate ledger payload must not contain plaintext")
|
||||||
|
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||||
|
raise ValueError("gate messages require private_strong transport_lock")
|
||||||
|
|
||||||
|
payload = normalize_payload(event_type, raw_payload)
|
||||||
|
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||||
|
if not ok:
|
||||||
|
raise ValueError(reason)
|
||||||
|
|
||||||
|
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||||
|
raise ValueError("payload exceeds max size")
|
||||||
|
|
||||||
|
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||||
|
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||||
|
if not ok:
|
||||||
|
raise ValueError(reason)
|
||||||
|
|
||||||
|
if not (signature and public_key and public_key_algo):
|
||||||
|
raise ValueError("Missing signature fields")
|
||||||
|
algo = parse_public_key_algo(public_key_algo)
|
||||||
|
if not algo:
|
||||||
|
raise ValueError("Unsupported public_key_algo")
|
||||||
|
if not verify_node_binding(node_id, public_key):
|
||||||
|
raise ValueError("node_id mismatch")
|
||||||
|
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||||
|
if not bound:
|
||||||
|
raise ValueError(bind_reason)
|
||||||
|
event_for_signature = {"payload": payload}
|
||||||
|
signature_ok = False
|
||||||
|
for signature_payload in _private_gate_signature_payload_variants(
|
||||||
|
str(payload.get("gate", "") or ""),
|
||||||
|
event_for_signature,
|
||||||
|
):
|
||||||
|
sig_payload = build_signature_payload(
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=signature_payload,
|
||||||
|
)
|
||||||
|
if verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
signature_ok = True
|
||||||
|
break
|
||||||
|
if not signature_ok:
|
||||||
|
raise ValueError("Invalid signature")
|
||||||
|
|
||||||
|
revoked, _info = self._revocation_status(public_key)
|
||||||
|
if revoked:
|
||||||
|
raise ValueError("public key is revoked")
|
||||||
|
|
||||||
|
event = ChainEvent(
|
||||||
|
prev_hash=self.head_hash,
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
timestamp=float(timestamp or time.time()),
|
||||||
|
sequence=sequence,
|
||||||
|
signature=signature,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
protocol_version=protocol_version,
|
||||||
|
)
|
||||||
|
event_dict = event.to_dict()
|
||||||
|
self._write_wal(event_dict)
|
||||||
|
self.events.append(event_dict)
|
||||||
|
self.event_index[event.event_id] = len(self.events) - 1
|
||||||
|
self.head_hash = event.event_id
|
||||||
|
sequence_table[sequence_key] = sequence
|
||||||
|
self._replay_filter.add(event.event_id)
|
||||||
|
self._invalidate_merkle_cache()
|
||||||
|
self._update_counters_for_event(event_dict)
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from services.mesh.mesh_rns import rns_bridge
|
||||||
|
|
||||||
|
rns_bridge.publish_event(event_dict)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_notify_public_event_append_hooks(event_dict)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Infonet append [gate_message] by {_redact_node(node_id)} seq={sequence} "
|
||||||
|
f"id={event.event_id[:16]}..."
|
||||||
|
)
|
||||||
|
return event_dict
|
||||||
|
|
||||||
def append(
|
def append(
|
||||||
self,
|
self,
|
||||||
event_type: str,
|
event_type: str,
|
||||||
@@ -2077,6 +2412,18 @@ class Infonet:
|
|||||||
if not event_id or not prev_hash:
|
if not event_id or not prev_hash:
|
||||||
rejected.append({"index": idx, "reason": "Missing event_id or prev_hash"})
|
rejected.append({"index": idx, "reason": "Missing event_id or prev_hash"})
|
||||||
continue
|
continue
|
||||||
|
if event_id in self.event_index:
|
||||||
|
duplicates += 1
|
||||||
|
continue
|
||||||
|
if self._replay_filter.seen(event_id):
|
||||||
|
try:
|
||||||
|
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||||
|
|
||||||
|
metrics_inc("ingest_replay_seen")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
duplicates += 1
|
||||||
|
continue
|
||||||
if prev_hash != expected_prev:
|
if prev_hash != expected_prev:
|
||||||
try:
|
try:
|
||||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||||
@@ -2095,25 +2442,14 @@ class Infonet:
|
|||||||
pass
|
pass
|
||||||
rejected.append({"index": idx, "reason": "network_id mismatch"})
|
rejected.append({"index": idx, "reason": "network_id mismatch"})
|
||||||
continue
|
continue
|
||||||
if event_id in self.event_index:
|
|
||||||
duplicates += 1
|
|
||||||
continue
|
|
||||||
if self._replay_filter.seen(event_id):
|
|
||||||
try:
|
|
||||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
|
||||||
|
|
||||||
metrics_inc("ingest_replay_seen")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
duplicates += 1
|
|
||||||
continue
|
|
||||||
if prev_hash != self.head_hash:
|
if prev_hash != self.head_hash:
|
||||||
rejected.append({"index": idx, "reason": "prev_hash does not match head"})
|
rejected.append({"index": idx, "reason": "prev_hash does not match head"})
|
||||||
continue
|
continue
|
||||||
if sequence <= 0:
|
if sequence <= 0:
|
||||||
rejected.append({"index": idx, "reason": "Invalid sequence"})
|
rejected.append({"index": idx, "reason": "Invalid sequence"})
|
||||||
continue
|
continue
|
||||||
last = self.node_sequences.get(node_id, 0)
|
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||||
|
last = sequence_table.get(sequence_key, 0)
|
||||||
if sequence <= last:
|
if sequence <= last:
|
||||||
rejected.append({"index": idx, "reason": "Replay detected"})
|
rejected.append({"index": idx, "reason": "Replay detected"})
|
||||||
continue
|
continue
|
||||||
@@ -2148,7 +2484,18 @@ class Infonet:
|
|||||||
if not ok:
|
if not ok:
|
||||||
rejected.append({"index": idx, "reason": reason})
|
rejected.append({"index": idx, "reason": reason})
|
||||||
continue
|
continue
|
||||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
if event_type == "gate_message":
|
||||||
|
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||||
|
elif event_type == "dm_message":
|
||||||
|
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||||
|
if ok:
|
||||||
|
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=str(evt.get("node_id", "") or ""))
|
||||||
|
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||||
|
ok, reason = False, "DM hashchain sender spool full for recipient"
|
||||||
|
elif total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||||
|
ok, reason = False, "DM hashchain spool full for recipient"
|
||||||
|
else:
|
||||||
|
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||||
if not ok:
|
if not ok:
|
||||||
rejected.append({"index": idx, "reason": reason})
|
rejected.append({"index": idx, "reason": reason})
|
||||||
continue
|
continue
|
||||||
@@ -2224,7 +2571,7 @@ class Infonet:
|
|||||||
pass
|
pass
|
||||||
rejected.append({"index": idx, "reason": "public key is revoked"})
|
rejected.append({"index": idx, "reason": "public key is revoked"})
|
||||||
continue
|
continue
|
||||||
last_seq = self.node_sequences.get(node_id, 0)
|
last_seq = sequence_table.get(sequence_key, 0)
|
||||||
if sequence <= last_seq:
|
if sequence <= last_seq:
|
||||||
try:
|
try:
|
||||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||||
@@ -2260,18 +2607,30 @@ class Infonet:
|
|||||||
rejected.append({"index": idx, "reason": bind_reason})
|
rejected.append({"index": idx, "reason": bind_reason})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sig_payload = build_signature_payload(
|
if event_type == "gate_message":
|
||||||
event_type=event_type,
|
signature_payloads = _private_gate_signature_payload_variants(
|
||||||
node_id=node_id,
|
str(payload.get("gate", "") or ""),
|
||||||
sequence=sequence,
|
evt,
|
||||||
payload=payload,
|
)
|
||||||
)
|
else:
|
||||||
if not verify_signature(
|
signature_payloads = [payload]
|
||||||
public_key_b64=public_key,
|
signature_ok = False
|
||||||
public_key_algo=public_key_algo,
|
for signature_payload in signature_payloads:
|
||||||
signature_hex=signature,
|
sig_payload = build_signature_payload(
|
||||||
payload=sig_payload,
|
event_type=event_type,
|
||||||
):
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=signature_payload,
|
||||||
|
)
|
||||||
|
if verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
signature_ok = True
|
||||||
|
break
|
||||||
|
if not signature_ok:
|
||||||
try:
|
try:
|
||||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||||
|
|
||||||
@@ -2301,7 +2660,7 @@ class Infonet:
|
|||||||
self.events.append(evt)
|
self.events.append(evt)
|
||||||
self.event_index[event_id] = len(self.events) - 1
|
self.event_index[event_id] = len(self.events) - 1
|
||||||
self.head_hash = event_id
|
self.head_hash = event_id
|
||||||
self.node_sequences[node_id] = sequence
|
sequence_table[sequence_key] = sequence
|
||||||
self._update_counters_for_event(evt)
|
self._update_counters_for_event(evt)
|
||||||
accepted += 1
|
accepted += 1
|
||||||
expected_prev = event_id
|
expected_prev = event_id
|
||||||
@@ -2364,6 +2723,7 @@ class Infonet:
|
|||||||
verify_node_binding,
|
verify_node_binding,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
event_type = evt_dict.get("event_type", "")
|
||||||
node_id = evt_dict.get("node_id", "")
|
node_id = evt_dict.get("node_id", "")
|
||||||
if not parse_public_key_algo(public_key_algo):
|
if not parse_public_key_algo(public_key_algo):
|
||||||
return False, f"Unsupported public_key_algo at index {i}"
|
return False, f"Unsupported public_key_algo at index {i}"
|
||||||
@@ -2374,21 +2734,41 @@ class Infonet:
|
|||||||
return False, f"public key binding conflict at index {i}"
|
return False, f"public key binding conflict at index {i}"
|
||||||
seen_public_keys[public_key] = node_id
|
seen_public_keys[public_key] = node_id
|
||||||
|
|
||||||
normalized = normalize_payload(
|
payload = evt_dict.get("payload", {})
|
||||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
if event_type == "gate_message":
|
||||||
)
|
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||||
sig_payload = build_signature_payload(
|
if not ok:
|
||||||
event_type=evt_dict.get("event_type", ""),
|
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||||
node_id=node_id,
|
signature_payloads = _private_gate_signature_payload_variants(
|
||||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
str(payload.get("gate", "") or ""),
|
||||||
payload=normalized,
|
evt_dict,
|
||||||
)
|
)
|
||||||
if not verify_signature(
|
elif event_type == "dm_message":
|
||||||
public_key_b64=public_key,
|
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||||
public_key_algo=public_key_algo,
|
if not ok:
|
||||||
signature_hex=signature,
|
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||||
payload=sig_payload,
|
signature_payloads = [normalize_payload(event_type, payload)]
|
||||||
):
|
else:
|
||||||
|
signature_payloads = [
|
||||||
|
normalize_payload(event_type, payload)
|
||||||
|
]
|
||||||
|
signature_ok = False
|
||||||
|
for signature_payload in signature_payloads:
|
||||||
|
sig_payload = build_signature_payload(
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||||
|
payload=signature_payload,
|
||||||
|
)
|
||||||
|
if verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
signature_ok = True
|
||||||
|
break
|
||||||
|
if not signature_ok:
|
||||||
return False, f"Invalid signature at index {i}"
|
return False, f"Invalid signature at index {i}"
|
||||||
|
|
||||||
prev = evt_dict["event_id"]
|
prev = evt_dict["event_id"]
|
||||||
@@ -2453,27 +2833,48 @@ class Infonet:
|
|||||||
verify_node_binding,
|
verify_node_binding,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
event_type = evt_dict.get("event_type", "")
|
||||||
node_id = evt_dict.get("node_id", "")
|
node_id = evt_dict.get("node_id", "")
|
||||||
if not parse_public_key_algo(public_key_algo):
|
if not parse_public_key_algo(public_key_algo):
|
||||||
return False, f"Unsupported public_key_algo at index {i}"
|
return False, f"Unsupported public_key_algo at index {i}"
|
||||||
if not verify_node_binding(node_id, public_key):
|
if not verify_node_binding(node_id, public_key):
|
||||||
return False, f"node_id mismatch at index {i}"
|
return False, f"node_id mismatch at index {i}"
|
||||||
|
|
||||||
normalized = normalize_payload(
|
payload = evt_dict.get("payload", {})
|
||||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
if event_type == "gate_message":
|
||||||
)
|
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||||
sig_payload = build_signature_payload(
|
if not ok:
|
||||||
event_type=evt_dict.get("event_type", ""),
|
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||||
node_id=node_id,
|
signature_payloads = _private_gate_signature_payload_variants(
|
||||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
str(payload.get("gate", "") or ""),
|
||||||
payload=normalized,
|
evt_dict,
|
||||||
)
|
)
|
||||||
if not verify_signature(
|
elif event_type == "dm_message":
|
||||||
public_key_b64=public_key,
|
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||||
public_key_algo=public_key_algo,
|
if not ok:
|
||||||
signature_hex=signature,
|
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||||
payload=sig_payload,
|
signature_payloads = [normalize_payload(event_type, payload)]
|
||||||
):
|
else:
|
||||||
|
signature_payloads = [
|
||||||
|
normalize_payload(event_type, payload)
|
||||||
|
]
|
||||||
|
signature_ok = False
|
||||||
|
for signature_payload in signature_payloads:
|
||||||
|
sig_payload = build_signature_payload(
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||||
|
payload=signature_payload,
|
||||||
|
)
|
||||||
|
if verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
signature_ok = True
|
||||||
|
break
|
||||||
|
if not signature_ok:
|
||||||
return False, f"Invalid signature at index {i}"
|
return False, f"Invalid signature at index {i}"
|
||||||
prev = evt_dict["event_id"]
|
prev = evt_dict["event_id"]
|
||||||
|
|
||||||
@@ -2537,7 +2938,14 @@ class Infonet:
|
|||||||
node_id = evt.get("node_id", "")
|
node_id = evt.get("node_id", "")
|
||||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||||
if node_id and sequence:
|
if node_id and sequence:
|
||||||
last_seq[node_id] = max(last_seq.get(node_id, 0), sequence)
|
sequence_key = (
|
||||||
|
f"{node_id}|gate_message"
|
||||||
|
if str(evt.get("event_type", "") or "").strip().lower() == "gate_message"
|
||||||
|
else f"{node_id}|dm_message"
|
||||||
|
if str(evt.get("event_type", "") or "").strip().lower() == "dm_message"
|
||||||
|
else node_id
|
||||||
|
)
|
||||||
|
last_seq[sequence_key] = max(last_seq.get(sequence_key, 0), sequence)
|
||||||
public_key = str(evt.get("public_key", "") or "")
|
public_key = str(evt.get("public_key", "") or "")
|
||||||
if public_key and node_id:
|
if public_key and node_id:
|
||||||
seen_public_keys.setdefault(public_key, node_id)
|
seen_public_keys.setdefault(public_key, node_id)
|
||||||
@@ -2557,8 +2965,21 @@ class Infonet:
|
|||||||
existing_idx = self.event_index.get(event_id)
|
existing_idx = self.event_index.get(event_id)
|
||||||
if existing_idx is not None and existing_idx <= prev_index:
|
if existing_idx is not None and existing_idx <= prev_index:
|
||||||
return False, "duplicate event_id"
|
return False, "duplicate event_id"
|
||||||
payload = normalize_payload(event_type, dict(payload or {}))
|
if event_type == "gate_message":
|
||||||
|
payload = dict(payload or {})
|
||||||
|
elif event_type == "dm_message":
|
||||||
|
payload = normalize_payload(event_type, dict(payload or {}))
|
||||||
|
else:
|
||||||
|
payload = normalize_payload(event_type, dict(payload or {}))
|
||||||
ok, reason = validate_event_payload(event_type, payload)
|
ok, reason = validate_event_payload(event_type, payload)
|
||||||
|
if not ok:
|
||||||
|
return False, reason
|
||||||
|
if event_type == "gate_message":
|
||||||
|
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||||
|
elif event_type == "dm_message":
|
||||||
|
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||||
|
else:
|
||||||
|
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||||
if not ok:
|
if not ok:
|
||||||
return False, reason
|
return False, reason
|
||||||
proto = evt.get("protocol_version") or PROTOCOL_VERSION
|
proto = evt.get("protocol_version") or PROTOCOL_VERSION
|
||||||
@@ -2572,7 +2993,14 @@ class Infonet:
|
|||||||
revoked, _info = self._revocation_status(public_key)
|
revoked, _info = self._revocation_status(public_key)
|
||||||
if revoked and event_type != "key_revoke":
|
if revoked and event_type != "key_revoke":
|
||||||
return False, "public key revoked"
|
return False, "public key revoked"
|
||||||
last = last_seq.get(node_id, 0)
|
sequence_key = (
|
||||||
|
f"{node_id}|gate_message"
|
||||||
|
if event_type == "gate_message"
|
||||||
|
else f"{node_id}|dm_message"
|
||||||
|
if event_type == "dm_message"
|
||||||
|
else node_id
|
||||||
|
)
|
||||||
|
last = last_seq.get(sequence_key, 0)
|
||||||
if sequence <= last:
|
if sequence <= last:
|
||||||
return False, "sequence replay"
|
return False, "sequence replay"
|
||||||
from services.mesh.mesh_crypto import (
|
from services.mesh.mesh_crypto import (
|
||||||
@@ -2590,23 +3018,35 @@ class Infonet:
|
|||||||
if existing and existing != node_id:
|
if existing and existing != node_id:
|
||||||
return False, "public key binding conflict"
|
return False, "public key binding conflict"
|
||||||
seen_public_keys[public_key] = node_id
|
seen_public_keys[public_key] = node_id
|
||||||
sig_payload = build_signature_payload(
|
if event_type == "gate_message":
|
||||||
event_type=event_type,
|
signature_payloads = _private_gate_signature_payload_variants(
|
||||||
node_id=node_id,
|
str(payload.get("gate", "") or ""),
|
||||||
sequence=sequence,
|
evt,
|
||||||
payload=payload,
|
)
|
||||||
)
|
else:
|
||||||
if not verify_signature(
|
signature_payloads = [payload]
|
||||||
public_key_b64=public_key,
|
signature_ok = False
|
||||||
public_key_algo=public_key_algo,
|
for signature_payload in signature_payloads:
|
||||||
signature_hex=signature,
|
sig_payload = build_signature_payload(
|
||||||
payload=sig_payload,
|
event_type=event_type,
|
||||||
):
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=signature_payload,
|
||||||
|
)
|
||||||
|
if verify_signature(
|
||||||
|
public_key_b64=public_key,
|
||||||
|
public_key_algo=public_key_algo,
|
||||||
|
signature_hex=signature,
|
||||||
|
payload=sig_payload,
|
||||||
|
):
|
||||||
|
signature_ok = True
|
||||||
|
break
|
||||||
|
if not signature_ok:
|
||||||
return False, "invalid signature"
|
return False, "invalid signature"
|
||||||
computed = ChainEvent.from_dict(evt).event_id
|
computed = ChainEvent.from_dict(evt).event_id
|
||||||
if computed != event_id:
|
if computed != event_id:
|
||||||
return False, "event_id mismatch"
|
return False, "event_id mismatch"
|
||||||
last_seq[node_id] = sequence
|
last_seq[sequence_key] = sequence
|
||||||
|
|
||||||
# Apply fork
|
# Apply fork
|
||||||
self.events = prefix + ordered
|
self.events = prefix + ordered
|
||||||
|
|||||||
@@ -2,10 +2,64 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from dataclasses import asdict, dataclass
|
from dataclasses import asdict, dataclass
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
from datetime import timezone
|
||||||
|
|
||||||
from services.mesh.mesh_peer_store import PeerRecord
|
from services.mesh.mesh_peer_store import PeerRecord
|
||||||
|
|
||||||
|
|
||||||
|
class PeerSyncRateLimited(Exception):
|
||||||
|
"""Upstream peer returned HTTP 429 — Too Many Requests.
|
||||||
|
|
||||||
|
Carries the ``Retry-After`` header value (parsed to seconds) so
|
||||||
|
the caller can pass it to ``finish_sync(retry_after_s=...)`` and
|
||||||
|
actually wait that long instead of hammering the upstream every
|
||||||
|
60s and keeping its rate-limit bucket full.
|
||||||
|
|
||||||
|
``retry_after_s`` is 0 when the upstream didn't provide a header.
|
||||||
|
Caller should still apply the exponential backoff in that case.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message: str, retry_after_s: int = 0, status: int = 429):
|
||||||
|
super().__init__(message)
|
||||||
|
self.retry_after_s = max(0, int(retry_after_s or 0))
|
||||||
|
self.status = int(status or 429)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_retry_after_header(header_value: str, *, now: float | None = None) -> int:
|
||||||
|
"""Parse the ``Retry-After`` HTTP header.
|
||||||
|
|
||||||
|
Two valid forms per RFC 7231 §7.1.3:
|
||||||
|
|
||||||
|
* Delay-seconds: a non-negative integer (e.g. ``Retry-After: 120``)
|
||||||
|
* HTTP-date: an absolute time (e.g. ``Retry-After: Wed, 21 Oct 2026 07:28:00 GMT``)
|
||||||
|
|
||||||
|
Returns the wait in **seconds from now**. Unparseable / empty headers
|
||||||
|
return 0 (caller falls back to exponential backoff). Clamped at a
|
||||||
|
sane upper bound (1 hour) so a typo'd or hostile peer can't pin us
|
||||||
|
silent for days.
|
||||||
|
"""
|
||||||
|
value = str(header_value or "").strip()
|
||||||
|
if not value:
|
||||||
|
return 0
|
||||||
|
upper_bound = 3600 # never trust a peer to silence us > 1h
|
||||||
|
# Form 1: pure integer seconds.
|
||||||
|
if value.isdigit():
|
||||||
|
return min(max(0, int(value)), upper_bound)
|
||||||
|
# Form 2: HTTP-date.
|
||||||
|
try:
|
||||||
|
target = parsedate_to_datetime(value)
|
||||||
|
if target is None:
|
||||||
|
return 0
|
||||||
|
if target.tzinfo is None:
|
||||||
|
target = target.replace(tzinfo=timezone.utc)
|
||||||
|
current = float(now if now is not None else time.time())
|
||||||
|
delta = int(target.timestamp() - current)
|
||||||
|
return min(max(0, delta), upper_bound)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class SyncWorkerState:
|
class SyncWorkerState:
|
||||||
last_sync_started_at: int = 0
|
last_sync_started_at: int = 0
|
||||||
@@ -72,6 +126,59 @@ def begin_sync(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _failure_backoff_seconds(
|
||||||
|
*,
|
||||||
|
base_backoff_s: int,
|
||||||
|
consecutive_failures: int,
|
||||||
|
retry_after_s: int,
|
||||||
|
cap_s: int = 1800,
|
||||||
|
) -> int:
|
||||||
|
"""Compute the next-attempt delay after a failed sync.
|
||||||
|
|
||||||
|
Two inputs combine:
|
||||||
|
|
||||||
|
* ``retry_after_s`` — when an upstream peer answered HTTP 429
|
||||||
|
with a ``Retry-After`` header, we honor it exactly. Continuing
|
||||||
|
to hammer the upstream every 60s is the bug this fix exists to
|
||||||
|
close: it keeps the upstream's rate-limit bucket full
|
||||||
|
indefinitely and no sync ever lands.
|
||||||
|
|
||||||
|
* Exponential growth on ``consecutive_failures`` — even without an
|
||||||
|
explicit Retry-After, repeated failures should slow us down. The
|
||||||
|
first failure waits ``base`` (preserves pre-fix behavior for
|
||||||
|
one-off blips). Each subsequent failure doubles the wait, capped
|
||||||
|
to ``cap_s`` (default 30 minutes). With base=60 and cap=1800,
|
||||||
|
the schedule is 60s → 120s → 240s → 480s → 960s → 1800s →
|
||||||
|
1800s → … .
|
||||||
|
|
||||||
|
The actual delay is the MAX of the two — whichever asks for more
|
||||||
|
patience wins. ``retry_after_s == 0`` (no header) falls back to
|
||||||
|
pure exponential. An aggressive ``Retry-After`` (say 600s while
|
||||||
|
we're only at 1 failure) wins over the exponential ladder.
|
||||||
|
"""
|
||||||
|
base = max(0, int(base_backoff_s or 0))
|
||||||
|
failures = max(0, int(consecutive_failures or 0))
|
||||||
|
cap = max(0, int(cap_s or 0))
|
||||||
|
retry_after = max(0, int(retry_after_s or 0))
|
||||||
|
# ``cap_s=0`` explicitly disables the exponential ladder entirely
|
||||||
|
# — operators who want the pre-fix "honor Retry-After only" behavior
|
||||||
|
# can set this. The default cap of 1800s is what saturates the
|
||||||
|
# ladder at the 5th-6th failure for base=60.
|
||||||
|
if cap == 0:
|
||||||
|
return retry_after
|
||||||
|
# 2^(failures-1) — so failure #1 = base (preserves the pre-fix
|
||||||
|
# default for transient blips), failure #2 = 2*base, etc. Cap on
|
||||||
|
# the exponent (16) is defense against integer overflow on a
|
||||||
|
# hostile or very large failures counter.
|
||||||
|
if base > 0 and failures > 0:
|
||||||
|
exponent = min(max(0, failures - 1), 16)
|
||||||
|
grown = base * (2 ** exponent)
|
||||||
|
else:
|
||||||
|
grown = 0
|
||||||
|
exponential = min(max(0, grown), cap)
|
||||||
|
return max(exponential, retry_after)
|
||||||
|
|
||||||
|
|
||||||
def finish_sync(
|
def finish_sync(
|
||||||
state: SyncWorkerState,
|
state: SyncWorkerState,
|
||||||
*,
|
*,
|
||||||
@@ -83,7 +190,26 @@ def finish_sync(
|
|||||||
now: float | None = None,
|
now: float | None = None,
|
||||||
interval_s: int = 300,
|
interval_s: int = 300,
|
||||||
failure_backoff_s: int = 60,
|
failure_backoff_s: int = 60,
|
||||||
|
retry_after_s: int = 0,
|
||||||
|
failure_backoff_cap_s: int = 1800,
|
||||||
) -> SyncWorkerState:
|
) -> SyncWorkerState:
|
||||||
|
"""Finalise a sync attempt and compute when the next one should run.
|
||||||
|
|
||||||
|
New args (added for the 429 retry storm fix):
|
||||||
|
|
||||||
|
* ``retry_after_s`` — if the peer responded with HTTP 429 + a
|
||||||
|
``Retry-After`` header, pass that value here. ``finish_sync``
|
||||||
|
will use ``max(exponential, retry_after_s)`` for the delay so
|
||||||
|
we never hammer a peer that asked us to back off.
|
||||||
|
* ``failure_backoff_cap_s`` — upper bound on the exponential
|
||||||
|
ladder. Default 1800 (30 min) — keeps a sync queue from going
|
||||||
|
silent for hours while still cutting the request rate to
|
||||||
|
something the upstream can absorb.
|
||||||
|
|
||||||
|
The pre-fix behavior (constant 60s on every failure) is recoverable
|
||||||
|
by passing ``failure_backoff_cap_s=0`` and ``retry_after_s=0``, but
|
||||||
|
there's no reason to.
|
||||||
|
"""
|
||||||
timestamp = int(now if now is not None else time.time())
|
timestamp = int(now if now is not None else time.time())
|
||||||
if ok:
|
if ok:
|
||||||
return SyncWorkerState(
|
return SyncWorkerState(
|
||||||
@@ -99,17 +225,25 @@ def finish_sync(
|
|||||||
consecutive_failures=0,
|
consecutive_failures=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
next_failures = state.consecutive_failures + 1
|
||||||
|
delay_s = _failure_backoff_seconds(
|
||||||
|
base_backoff_s=failure_backoff_s,
|
||||||
|
consecutive_failures=next_failures,
|
||||||
|
retry_after_s=retry_after_s,
|
||||||
|
cap_s=failure_backoff_cap_s,
|
||||||
|
)
|
||||||
|
|
||||||
return SyncWorkerState(
|
return SyncWorkerState(
|
||||||
last_sync_started_at=state.last_sync_started_at,
|
last_sync_started_at=state.last_sync_started_at,
|
||||||
last_sync_finished_at=timestamp,
|
last_sync_finished_at=timestamp,
|
||||||
last_sync_ok_at=state.last_sync_ok_at,
|
last_sync_ok_at=state.last_sync_ok_at,
|
||||||
next_sync_due_at=timestamp + max(0, int(failure_backoff_s or 0)),
|
next_sync_due_at=timestamp + delay_s,
|
||||||
last_peer_url=peer_url or state.last_peer_url,
|
last_peer_url=peer_url or state.last_peer_url,
|
||||||
last_error=str(error or "").strip(),
|
last_error=str(error or "").strip(),
|
||||||
last_outcome="fork" if fork_detected else "error",
|
last_outcome="fork" if fork_detected else "error",
|
||||||
current_head=current_head or state.current_head,
|
current_head=current_head or state.current_head,
|
||||||
fork_detected=bool(fork_detected),
|
fork_detected=bool(fork_detected),
|
||||||
consecutive_failures=state.consecutive_failures + 1,
|
consecutive_failures=next_failures,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -142,5 +276,6 @@ def should_run_sync(
|
|||||||
) -> bool:
|
) -> bool:
|
||||||
current_time = int(now if now is not None else time.time())
|
current_time = int(now if now is not None else time.time())
|
||||||
if state.last_outcome == "running":
|
if state.last_outcome == "running":
|
||||||
return False
|
started_at = int(state.last_sync_started_at or 0)
|
||||||
|
return started_at <= 0 or current_time - started_at >= 300
|
||||||
return int(state.next_sync_due_at or 0) <= current_time
|
return int(state.next_sync_due_at or 0) <= current_time
|
||||||
|
|||||||
@@ -26,7 +26,11 @@ from enum import Enum
|
|||||||
from typing import Any, Callable, Optional
|
from typing import Any, Callable, Optional
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
from services.mesh.mesh_crypto import (
|
||||||
|
_derive_peer_key,
|
||||||
|
normalize_peer_url,
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
)
|
||||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||||
from services.mesh.mesh_privacy_policy import (
|
from services.mesh.mesh_privacy_policy import (
|
||||||
TRANSPORT_TIER_ORDER as _TIER_RANK,
|
TRANSPORT_TIER_ORDER as _TIER_RANK,
|
||||||
@@ -703,7 +707,6 @@ class InternetTransport(_PeerPushTransportMixin):
|
|||||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
return TransportResult(False, self.NAME, str(exc))
|
return TransportResult(False, self.NAME, str(exc))
|
||||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
|
||||||
|
|
||||||
delivered = 0
|
delivered = 0
|
||||||
last_error = ""
|
last_error = ""
|
||||||
@@ -713,10 +716,13 @@ class InternetTransport(_PeerPushTransportMixin):
|
|||||||
try:
|
try:
|
||||||
normalized_peer_url = normalize_peer_url(peer_url)
|
normalized_peer_url = normalize_peer_url(peer_url)
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
if secret:
|
# Issue #256: per-peer secret takes precedence over the
|
||||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
# global MESH_PEER_PUSH_SECRET. When neither is set the
|
||||||
if not peer_key:
|
# key is empty and we skip the HMAC header entirely so a
|
||||||
raise ValueError("invalid peer URL for HMAC derivation")
|
# bare (unsigned) push still works on test deployments
|
||||||
|
# that have not yet configured any secret at all.
|
||||||
|
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||||
|
if peer_key:
|
||||||
headers["X-Peer-Url"] = normalized_peer_url
|
headers["X-Peer-Url"] = normalized_peer_url
|
||||||
headers["X-Peer-HMAC"] = hmac.new(
|
headers["X-Peer-HMAC"] = hmac.new(
|
||||||
peer_key,
|
peer_key,
|
||||||
@@ -798,7 +804,6 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
|||||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
return TransportResult(False, self.NAME, str(exc))
|
return TransportResult(False, self.NAME, str(exc))
|
||||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
|
||||||
|
|
||||||
delivered = 0
|
delivered = 0
|
||||||
last_error = ""
|
last_error = ""
|
||||||
@@ -808,10 +813,10 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
|||||||
try:
|
try:
|
||||||
normalized_peer_url = normalize_peer_url(peer_url)
|
normalized_peer_url = normalize_peer_url(peer_url)
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
if secret:
|
# Issue #256: per-peer secret takes precedence; see the
|
||||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
# other transport above for the rationale.
|
||||||
if not peer_key:
|
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||||
raise ValueError("invalid peer URL for HMAC derivation")
|
if peer_key:
|
||||||
headers["X-Peer-Url"] = normalized_peer_url
|
headers["X-Peer-Url"] = normalized_peer_url
|
||||||
headers["X-Peer-HMAC"] = hmac.new(
|
headers["X-Peer-HMAC"] = hmac.new(
|
||||||
peer_key,
|
peer_key,
|
||||||
|
|||||||
@@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import math
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
|
|
||||||
@@ -33,6 +36,58 @@ def _require_fields(payload: dict[str, Any], fields: tuple[str, ...]) -> tuple[b
|
|||||||
return True, "ok"
|
return True, "ok"
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_base64ish(value: Any) -> bytes | None:
|
||||||
|
raw = str(value or "").strip()
|
||||||
|
if not raw or any(ch.isspace() for ch in raw):
|
||||||
|
return None
|
||||||
|
padded = raw + ("=" * (-len(raw) % 4))
|
||||||
|
for altchars in (None, b"-_"):
|
||||||
|
try:
|
||||||
|
return base64.b64decode(padded.encode("ascii"), altchars=altchars, validate=True)
|
||||||
|
except (binascii.Error, UnicodeEncodeError, ValueError):
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _byte_entropy(data: bytes) -> float:
|
||||||
|
if not data:
|
||||||
|
return 0.0
|
||||||
|
counts = [0] * 256
|
||||||
|
for byte in data:
|
||||||
|
counts[byte] += 1
|
||||||
|
total = float(len(data))
|
||||||
|
return -sum((count / total) * math.log2(count / total) for count in counts if count)
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_sealed_bytes_field(
|
||||||
|
payload: dict[str, Any],
|
||||||
|
field: str,
|
||||||
|
*,
|
||||||
|
min_bytes: int = 8,
|
||||||
|
entropy_floor: float = 2.5,
|
||||||
|
) -> tuple[bool, str]:
|
||||||
|
data = _decode_base64ish(payload.get(field, ""))
|
||||||
|
if data is None:
|
||||||
|
return False, f"{field} must be base64-encoded sealed bytes"
|
||||||
|
if len(data) < min_bytes:
|
||||||
|
return False, f"{field} is too short"
|
||||||
|
|
||||||
|
# Short test vectors and compact envelopes can be low entropy; only apply
|
||||||
|
# heuristics once there is enough material to distinguish a sealed blob
|
||||||
|
# from accidental base64-encoded plaintext.
|
||||||
|
if len(data) >= 32:
|
||||||
|
printable = sum(1 for byte in data if 32 <= byte <= 126 or byte in (9, 10, 13))
|
||||||
|
if printable / len(data) > 0.9:
|
||||||
|
try:
|
||||||
|
data.decode("utf-8")
|
||||||
|
return False, f"{field} looks like encoded plaintext"
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
if _byte_entropy(data) < entropy_floor:
|
||||||
|
return False, f"{field} entropy is too low for sealed bytes"
|
||||||
|
return True, "ok"
|
||||||
|
|
||||||
|
|
||||||
def _validate_message(payload: dict[str, Any]) -> tuple[bool, str]:
|
def _validate_message(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||||
ok, reason = _require_fields(
|
ok, reason = _require_fields(
|
||||||
payload, ("message", "destination", "channel", "priority", "ephemeral")
|
payload, ("message", "destination", "channel", "priority", "ephemeral")
|
||||||
@@ -331,6 +386,7 @@ ACTIVE_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
|||||||
LEGACY_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
LEGACY_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
||||||
{
|
{
|
||||||
"gate_message",
|
"gate_message",
|
||||||
|
"dm_message",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
"""Event types that exist historically on the public chain and must remain
|
"""Event types that exist historically on the public chain and must remain
|
||||||
@@ -425,6 +481,8 @@ def validate_event_payload(event_type: str, payload: dict[str, Any]) -> tuple[bo
|
|||||||
|
|
||||||
|
|
||||||
def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) -> tuple[bool, str]:
|
def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) -> tuple[bool, str]:
|
||||||
|
if event_type == "gate_message":
|
||||||
|
return validate_private_gate_ledger_payload(payload)
|
||||||
if event_type not in PUBLIC_LEDGER_EVENT_TYPES and event_type not in _EXTENSION_VALIDATORS:
|
if event_type not in PUBLIC_LEDGER_EVENT_TYPES and event_type not in _EXTENSION_VALIDATORS:
|
||||||
return False, f"{event_type} is not allowed on the public ledger"
|
return False, f"{event_type} is not allowed on the public ledger"
|
||||||
forbidden = sorted(
|
forbidden = sorted(
|
||||||
@@ -441,6 +499,92 @@ def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) ->
|
|||||||
return True, "ok"
|
return True, "ok"
|
||||||
|
|
||||||
|
|
||||||
|
_PRIVATE_GATE_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||||
|
{
|
||||||
|
"gate",
|
||||||
|
"ciphertext",
|
||||||
|
"nonce",
|
||||||
|
"sender_ref",
|
||||||
|
"format",
|
||||||
|
"epoch",
|
||||||
|
"gate_envelope",
|
||||||
|
"envelope_hash",
|
||||||
|
"reply_to",
|
||||||
|
"transport_lock",
|
||||||
|
"signed_context",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_private_gate_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||||
|
"""Validate ciphertext-only gate events for private Infonet replication."""
|
||||||
|
ok, reason = validate_event_payload("gate_message", payload)
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
unexpected = sorted(
|
||||||
|
key
|
||||||
|
for key in payload.keys()
|
||||||
|
if str(key or "").strip().lower() not in _PRIVATE_GATE_LEDGER_ALLOWED_FIELDS
|
||||||
|
)
|
||||||
|
if unexpected:
|
||||||
|
return False, f"private gate ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||||
|
if "message" in payload or "_local_plaintext" in payload or "_local_reply_to" in payload:
|
||||||
|
return False, "private gate ledger payload must not contain plaintext"
|
||||||
|
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||||
|
if transport_lock and transport_lock not in {"private", "private_strong", "rns", "onion"}:
|
||||||
|
return False, "gate messages require private transport_lock"
|
||||||
|
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
ok, reason = _validate_sealed_bytes_field(payload, "nonce")
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
return True, "ok"
|
||||||
|
|
||||||
|
|
||||||
|
_PRIVATE_DM_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||||
|
{
|
||||||
|
"recipient_id",
|
||||||
|
"delivery_class",
|
||||||
|
"recipient_token",
|
||||||
|
"ciphertext",
|
||||||
|
"msg_id",
|
||||||
|
"timestamp",
|
||||||
|
"format",
|
||||||
|
"session_welcome",
|
||||||
|
"sender_seal",
|
||||||
|
"relay_salt",
|
||||||
|
"transport_lock",
|
||||||
|
"signed_context",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_private_dm_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||||
|
"""Validate ciphertext-only DM dead-drop events for private Infonet replication."""
|
||||||
|
ok, reason = validate_event_payload("dm_message", payload)
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
unexpected = sorted(
|
||||||
|
key
|
||||||
|
for key in payload.keys()
|
||||||
|
if str(key or "").strip().lower() not in _PRIVATE_DM_LEDGER_ALLOWED_FIELDS
|
||||||
|
)
|
||||||
|
if unexpected:
|
||||||
|
return False, f"private DM ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||||
|
if "message" in payload or "plaintext" in payload or "_local_plaintext" in payload:
|
||||||
|
return False, "private DM ledger payload must not contain plaintext"
|
||||||
|
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||||
|
if transport_lock != "private_strong":
|
||||||
|
return False, "DM hashchain spool requires private_strong transport_lock"
|
||||||
|
if not str(payload.get("ciphertext", "") or "").strip():
|
||||||
|
return False, "ciphertext cannot be empty"
|
||||||
|
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
return True, "ok"
|
||||||
|
|
||||||
|
|
||||||
def validate_protocol_fields(protocol_version: str, network_id: str) -> tuple[bool, str]:
|
def validate_protocol_fields(protocol_version: str, network_id: str) -> tuple[bool, str]:
|
||||||
if protocol_version != PROTOCOL_VERSION:
|
if protocol_version != PROTOCOL_VERSION:
|
||||||
return False, "Unsupported protocol_version"
|
return False, "Unsupported protocol_version"
|
||||||
|
|||||||
@@ -91,13 +91,15 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
|||||||
return {"ok": False, "detail": "lookup token required"}
|
return {"ok": False, "detail": "lookup token required"}
|
||||||
try:
|
try:
|
||||||
from services.config import get_settings
|
from services.config import get_settings
|
||||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
from services.mesh.mesh_crypto import (
|
||||||
|
normalize_peer_url,
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
)
|
||||||
from services.mesh.mesh_router import configured_relay_peer_urls
|
from services.mesh.mesh_router import configured_relay_peer_urls
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
secret = str(getattr(settings, "MESH_PEER_PUSH_SECRET", "") or "").strip()
|
# Issue #256: secret check moved per-peer below. We still bail out
|
||||||
if not secret:
|
# cleanly when there are no peers configured at all.
|
||||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
|
||||||
peers = configured_relay_peer_urls()
|
peers = configured_relay_peer_urls()
|
||||||
if not peers:
|
if not peers:
|
||||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||||
@@ -121,7 +123,8 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
|||||||
or os.environ.get("SB_TEST_NODE_URL", "").strip()
|
or os.environ.get("SB_TEST_NODE_URL", "").strip()
|
||||||
or normalized_peer_url
|
or normalized_peer_url
|
||||||
)
|
)
|
||||||
peer_key = _derive_peer_key(secret, sender_peer_url)
|
# Issue #256: prefer per-peer secret keyed by the sender URL.
|
||||||
|
peer_key = resolve_peer_key_for_url(sender_peer_url)
|
||||||
if not peer_key:
|
if not peer_key:
|
||||||
continue
|
continue
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ import subprocess
|
|||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
|
import uuid
|
||||||
import requests
|
import requests
|
||||||
|
from pathlib import Path
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
@@ -20,14 +22,211 @@ _session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
|
|||||||
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||||
|
|
||||||
|
|
||||||
# Default outbound User-Agent. Generic by design — does NOT include any
|
# ---------------------------------------------------------------------------
|
||||||
# personal contact info or a fork-specific repo URL. Operators who run a
|
# Per-operator outbound identification
|
||||||
# public-facing relay and want to identify themselves to upstreams (e.g.
|
# ---------------------------------------------------------------------------
|
||||||
# for Nominatim / weather.gov usage-policy compliance) can override this
|
#
|
||||||
# via the SHADOWBROKER_USER_AGENT env var.
|
# Issues #289 / #290 / #291 and the retrofit of PR #284 (#218 / #219 / #220):
|
||||||
|
# every third-party API the backend calls used to identify itself with a
|
||||||
|
# single "Shadowbroker" aggregate User-Agent. From the upstream's
|
||||||
|
# perspective, that meant every Shadowbroker install in the world looked
|
||||||
|
# like one giant entity hammering them. If one install misbehaved, the
|
||||||
|
# upstream's only recourse was to block "Shadowbroker" as a whole — which
|
||||||
|
# would take out every other install too.
|
||||||
|
#
|
||||||
|
# Fix: give each install a stable pseudonymous handle and include it in
|
||||||
|
# the User-Agent. Now an upstream can rate-limit or block the offending
|
||||||
|
# operator without affecting anyone else.
|
||||||
|
#
|
||||||
|
# The handle:
|
||||||
|
#
|
||||||
|
# - Is auto-generated on first call if no `OPERATOR_HANDLE` is configured
|
||||||
|
# (looks like "operator-7f3a92" — 6 hex chars from uuid4()).
|
||||||
|
# - Is persisted to ``backend/data/operator_handle.json`` so it survives
|
||||||
|
# restarts. Under Docker compose that file lives in the volume mount
|
||||||
|
# alongside `carrier_cache.json` and the other persistent state.
|
||||||
|
# - Can be overridden by the operator via the `OPERATOR_HANDLE` setting
|
||||||
|
# (env var or settings UI). Operators with their own GitHub handle,
|
||||||
|
# organization name, etc. can use that for traceability.
|
||||||
|
# - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is
|
||||||
|
# strictly for public third-party API attribution.
|
||||||
|
|
||||||
|
_SHADOWBROKER_VERSION = "0.9"
|
||||||
|
_OPERATOR_HANDLE_FILE = (
|
||||||
|
Path(__file__).parent.parent / "data" / "operator_handle.json"
|
||||||
|
)
|
||||||
|
_OPERATOR_HANDLE_CACHE: str = ""
|
||||||
|
_OPERATOR_HANDLE_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_operator_handle() -> str:
|
||||||
|
"""Produce a stable pseudonymous handle for first-launch installs.
|
||||||
|
|
||||||
|
Format: ``operator-7f3a92`` (6 hex chars from a fresh uuid4()).
|
||||||
|
Distinct per install. Carries no real-world identity by default —
|
||||||
|
operators who want one can override via ``OPERATOR_HANDLE``.
|
||||||
|
|
||||||
|
Note: the prefix is deliberately neutral. Earlier drafts used
|
||||||
|
``shadow-`` which, while accurate to the project name, looks
|
||||||
|
exactly like the kind of pattern a third-party abuse-detection
|
||||||
|
system would auto-block as suspicious. ``operator-`` describes
|
||||||
|
what the value actually is and doesn't pattern-match malware.
|
||||||
|
"""
|
||||||
|
return f"operator-{uuid.uuid4().hex[:6]}"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_persisted_operator_handle() -> str:
|
||||||
|
"""Return the previously-saved handle from disk, or empty if none.
|
||||||
|
|
||||||
|
Reads ``backend/data/operator_handle.json`` if it exists. Any read
|
||||||
|
error returns empty so a fresh handle gets generated rather than
|
||||||
|
crashing the request.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if _OPERATOR_HANDLE_FILE.exists():
|
||||||
|
data = json.loads(_OPERATOR_HANDLE_FILE.read_text(encoding="utf-8"))
|
||||||
|
return str(data.get("handle", "") or "").strip()
|
||||||
|
except (OSError, json.JSONDecodeError, ValueError):
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_operator_handle(handle: str) -> None:
|
||||||
|
"""Atomically save the auto-generated handle so subsequent restarts
|
||||||
|
use the same one. Failure to persist is non-fatal — the request still
|
||||||
|
succeeds with the in-memory handle, we just may generate a different
|
||||||
|
one on the next process restart."""
|
||||||
|
try:
|
||||||
|
_OPERATOR_HANDLE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = _OPERATOR_HANDLE_FILE.with_suffix(_OPERATOR_HANDLE_FILE.suffix + ".tmp")
|
||||||
|
tmp.write_text(
|
||||||
|
json.dumps({"handle": handle, "_meta": {
|
||||||
|
"purpose": "Per-install operator handle for outbound third-party API attribution.",
|
||||||
|
"see": "backend/services/network_utils.py:outbound_user_agent",
|
||||||
|
}}, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
os.replace(tmp, _OPERATOR_HANDLE_FILE)
|
||||||
|
except OSError as exc:
|
||||||
|
logger.debug("Could not persist operator_handle (continuing in-memory): %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def get_operator_handle() -> str:
|
||||||
|
"""Return the stable per-install operator handle.
|
||||||
|
|
||||||
|
Resolution order:
|
||||||
|
1. ``OPERATOR_HANDLE`` setting (env var / settings UI) if non-empty.
|
||||||
|
2. Process-cached value from previous call this run.
|
||||||
|
3. Value persisted to ``operator_handle.json`` (from a previous run).
|
||||||
|
4. Newly generated pseudonymous handle, persisted to disk.
|
||||||
|
|
||||||
|
The handle is normalized: stripped of whitespace, lowercased,
|
||||||
|
non-alphanumeric chars (except ``-`` and ``_``) replaced with ``-``.
|
||||||
|
This both sanitizes any HTTP-header-unsafe characters AND prevents
|
||||||
|
the operator from impersonating real third-party projects via
|
||||||
|
inventive whitespace.
|
||||||
|
"""
|
||||||
|
global _OPERATOR_HANDLE_CACHE
|
||||||
|
with _OPERATOR_HANDLE_LOCK:
|
||||||
|
# 1. Configured override always wins.
|
||||||
|
configured = ""
|
||||||
|
try:
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
configured = str(getattr(get_settings(), "OPERATOR_HANDLE", "") or "").strip()
|
||||||
|
except Exception:
|
||||||
|
configured = ""
|
||||||
|
if configured:
|
||||||
|
return _normalize_handle(configured)
|
||||||
|
|
||||||
|
# 2. In-memory cache (fast path for repeated calls).
|
||||||
|
if _OPERATOR_HANDLE_CACHE:
|
||||||
|
return _OPERATOR_HANDLE_CACHE
|
||||||
|
|
||||||
|
# 3. On-disk handle from a previous run.
|
||||||
|
persisted = _load_persisted_operator_handle()
|
||||||
|
if persisted:
|
||||||
|
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
|
||||||
|
return _OPERATOR_HANDLE_CACHE
|
||||||
|
|
||||||
|
# 4. Generate, persist, return.
|
||||||
|
fresh = _generate_operator_handle()
|
||||||
|
_persist_operator_handle(fresh)
|
||||||
|
_OPERATOR_HANDLE_CACHE = fresh
|
||||||
|
return fresh
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_handle(raw: str) -> str:
|
||||||
|
"""Strip whitespace, lowercase, replace unsafe characters with dashes."""
|
||||||
|
safe = "".join(
|
||||||
|
ch if (ch.isalnum() or ch in "-_") else "-"
|
||||||
|
for ch in raw.strip().lower()
|
||||||
|
)
|
||||||
|
# Collapse runs of dashes and trim to a reasonable length so an
|
||||||
|
# operator can't make our outbound logs unreadable.
|
||||||
|
while "--" in safe:
|
||||||
|
safe = safe.replace("--", "-")
|
||||||
|
safe = safe.strip("-")
|
||||||
|
return safe[:48] if safe else "anonymous"
|
||||||
|
|
||||||
|
|
||||||
|
_CONTACT_URL = "https://github.com/BigBodyCobain/Shadowbroker/issues"
|
||||||
|
|
||||||
|
|
||||||
|
def outbound_user_agent(purpose: str = "") -> str:
|
||||||
|
"""Build a User-Agent for an outbound third-party HTTP request.
|
||||||
|
|
||||||
|
Returns something like::
|
||||||
|
|
||||||
|
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
|
||||||
|
+https://github.com/BigBodyCobain/Shadowbroker/issues)
|
||||||
|
|
||||||
|
The ``purpose`` is optional but recommended — it tells the upstream
|
||||||
|
what feature of ours is making the call (``wikipedia``, ``openmhz``,
|
||||||
|
``nominatim``, etc.), which makes their logs and our complaints
|
||||||
|
actionable.
|
||||||
|
|
||||||
|
Every outbound call in the backend that previously sent a custom
|
||||||
|
User-Agent should call this helper instead. Centralizing here means:
|
||||||
|
- one place to change the contact URL,
|
||||||
|
- one place to bump the version on release,
|
||||||
|
- one place a Wikimedia / OpenMHz operator can reach to ask for
|
||||||
|
the project to back off, with a per-install handle so they can
|
||||||
|
target the specific install instead of the project as a whole.
|
||||||
|
"""
|
||||||
|
handle = get_operator_handle()
|
||||||
|
if purpose:
|
||||||
|
purpose_clean = _normalize_handle(purpose)
|
||||||
|
return (
|
||||||
|
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||||
|
f"(operator: {handle}; purpose: {purpose_clean}; +{_CONTACT_URL})"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||||
|
f"(operator: {handle}; +{_CONTACT_URL})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_operator_handle_cache_for_tests() -> None:
|
||||||
|
"""Test-only: invalidate the in-memory cache so a test can set a
|
||||||
|
new ``OPERATOR_HANDLE`` env var and see it picked up immediately."""
|
||||||
|
global _OPERATOR_HANDLE_CACHE
|
||||||
|
with _OPERATOR_HANDLE_LOCK:
|
||||||
|
_OPERATOR_HANDLE_CACHE = ""
|
||||||
|
|
||||||
|
|
||||||
|
# Default outbound User-Agent. Retained for backwards compatibility with
|
||||||
|
# call sites that haven't been migrated to ``outbound_user_agent()`` yet.
|
||||||
|
# Operators who want full per-install attribution should set the
|
||||||
|
# ``OPERATOR_HANDLE`` setting and migrate call sites incrementally.
|
||||||
|
#
|
||||||
|
# Operators who run a public-facing relay can also override the whole UA
|
||||||
|
# string via the ``SHADOWBROKER_USER_AGENT`` env var. That override
|
||||||
|
# completely bypasses the per-operator helper; only use it if you know
|
||||||
|
# what you're doing.
|
||||||
DEFAULT_USER_AGENT = os.environ.get(
|
DEFAULT_USER_AGENT = os.environ.get(
|
||||||
"SHADOWBROKER_USER_AGENT",
|
"SHADOWBROKER_USER_AGENT",
|
||||||
"ShadowBroker-OSINT/0.9",
|
f"Shadowbroker/{_SHADOWBROKER_VERSION}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||||
|
|||||||
@@ -2,14 +2,34 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import logging
|
import logging
|
||||||
from cachetools import cached, TTLCache
|
from cachetools import cached, TTLCache
|
||||||
import cloudscraper
|
|
||||||
import reverse_geocoder as rg
|
import reverse_geocoder as rg
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_OPENMHZ_AUDIO_HOSTS = {"media.openmhz.com", "media2.openmhz.com", "media3.openmhz.com"}
|
_OPENMHZ_AUDIO_HOSTS = {"media.openmhz.com", "media2.openmhz.com", "media3.openmhz.com"}
|
||||||
|
|
||||||
|
|
||||||
|
# Round 7a / Issues #289, #290, #291 (tg12 audit):
|
||||||
|
# We previously sent a spoofed Chrome User-Agent and (for OpenMHz) used
|
||||||
|
# cloudscraper to bypass anti-bot challenges. Both are dishonest and ToS-
|
||||||
|
# unfriendly. We now send the per-install Shadowbroker UA — the upstream
|
||||||
|
# can identify us, rate-limit us per install, and contact us if needed.
|
||||||
|
#
|
||||||
|
# If the upstream actively blocks our honest UA, the feature degrades
|
||||||
|
# gracefully (returns an empty list / cached results) rather than
|
||||||
|
# escalating to deception.
|
||||||
|
|
||||||
|
|
||||||
|
def _broadcastify_user_agent() -> str:
|
||||||
|
return outbound_user_agent("broadcastify")
|
||||||
|
|
||||||
|
|
||||||
|
def _openmhz_user_agent() -> str:
|
||||||
|
return outbound_user_agent("openmhz")
|
||||||
|
|
||||||
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
||||||
radio_cache = TTLCache(maxsize=1, ttl=300)
|
radio_cache = TTLCache(maxsize=1, ttl=300)
|
||||||
|
|
||||||
@@ -22,8 +42,12 @@ def get_top_broadcastify_feeds():
|
|||||||
"""
|
"""
|
||||||
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
# Issue #289 (tg12) + Round 7a: identify ourselves honestly as a
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
# per-install Shadowbroker scraper. Broadcastify can rate-limit
|
||||||
|
# us per install or block us; either way we stop pretending to be
|
||||||
|
# a browser. If they block, the panel degrades gracefully.
|
||||||
|
"User-Agent": _broadcastify_user_agent(),
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,21 +113,32 @@ openmhz_systems_cache = TTLCache(maxsize=1, ttl=3600)
|
|||||||
|
|
||||||
@cached(openmhz_systems_cache)
|
@cached(openmhz_systems_cache)
|
||||||
def get_openmhz_systems():
|
def get_openmhz_systems():
|
||||||
"""Fetches the full directory of OpenMHZ systems."""
|
"""Fetches the full directory of OpenMHZ systems.
|
||||||
logger.info("Scraping OpenMHZ Systems (Cache Miss)")
|
|
||||||
scraper = cloudscraper.create_scraper(
|
|
||||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
Issue #290 (tg12) + Round 7a: replaced cloudscraper-based Chrome
|
||||||
|
impersonation with an honest per-install Shadowbroker User-Agent.
|
||||||
|
If OpenMHz's Cloudflare layer blocks honest traffic, we accept
|
||||||
|
that degradation (return empty list) rather than spoof a browser.
|
||||||
|
"""
|
||||||
|
logger.info("Fetching OpenMHZ Systems (Cache Miss)")
|
||||||
try:
|
try:
|
||||||
res = scraper.get("https://api.openmhz.com/systems", timeout=15)
|
res = requests.get(
|
||||||
|
"https://api.openmhz.com/systems",
|
||||||
|
timeout=15,
|
||||||
|
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||||
|
)
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
data = res.json()
|
data = res.json()
|
||||||
# Return list of systems
|
|
||||||
return data.get("systems", []) if isinstance(data, dict) else []
|
return data.get("systems", []) if isinstance(data, dict) else []
|
||||||
|
if res.status_code in (403, 503):
|
||||||
|
logger.warning(
|
||||||
|
"OpenMHZ returned %s for systems directory — Cloudflare may "
|
||||||
|
"be blocking our honest UA. Feature degrades to empty result.",
|
||||||
|
res.status_code,
|
||||||
|
)
|
||||||
return []
|
return []
|
||||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||||
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
|
logger.error(f"OpenMHZ Systems Fetch Exception: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
@@ -113,21 +148,25 @@ openmhz_calls_cache = TTLCache(maxsize=100, ttl=20)
|
|||||||
|
|
||||||
@cached(openmhz_calls_cache)
|
@cached(openmhz_calls_cache)
|
||||||
def get_recent_openmhz_calls(sys_name: str):
|
def get_recent_openmhz_calls(sys_name: str):
|
||||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata')."""
|
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata').
|
||||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
|
||||||
scraper = cloudscraper.create_scraper(
|
|
||||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
Issue #290 (tg12) + Round 7a: same honest-UA model as
|
||||||
|
``get_openmhz_systems``.
|
||||||
|
"""
|
||||||
|
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||||
try:
|
try:
|
||||||
url = f"https://api.openmhz.com/{sys_name}/calls"
|
url = f"https://api.openmhz.com/{sys_name}/calls"
|
||||||
res = scraper.get(url, timeout=15)
|
res = requests.get(
|
||||||
|
url,
|
||||||
|
timeout=15,
|
||||||
|
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||||
|
)
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
data = res.json()
|
data = res.json()
|
||||||
return data.get("calls", []) if isinstance(data, dict) else []
|
return data.get("calls", []) if isinstance(data, dict) else []
|
||||||
return []
|
return []
|
||||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||||
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
|
logger.error(f"OpenMHZ Calls Fetch Exception ({sys_name}): {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
@@ -163,9 +202,11 @@ def openmhz_audio_response(target_url: str):
|
|||||||
timeout=(5, 20),
|
timeout=(5, 20),
|
||||||
allow_redirects=False,
|
allow_redirects=False,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": "Mozilla/5.0",
|
# Issue #291 (tg12) + Round 7a: drop spoofed Mozilla
|
||||||
|
# UA and the fake first-party Referer. Identify as
|
||||||
|
# the per-install Shadowbroker proxy honestly.
|
||||||
|
"User-Agent": _openmhz_user_agent(),
|
||||||
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
||||||
"Referer": "https://openmhz.com/",
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if upstream.is_redirect or upstream.status_code in (301, 302, 303, 307, 308):
|
if upstream.is_redirect or upstream.status_code in (301, 302, 303, 307, 308):
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import concurrent.futures
|
|||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
import requests as _requests
|
import requests as _requests
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
from services.network_utils import fetch_with_curl
|
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -15,6 +15,31 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
|||||||
# Nominatim requires max 1 req/sec — track last call time
|
# Nominatim requires max 1 req/sec — track last call time
|
||||||
_nominatim_last_call = 0.0
|
_nominatim_last_call = 0.0
|
||||||
|
|
||||||
|
# Issues #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||||
|
# clients to identify themselves with a stable User-Agent that includes
|
||||||
|
# a contact path.
|
||||||
|
#
|
||||||
|
# Round 7a: the original fix in PR #284 used a single project-wide
|
||||||
|
# identifier, which from Wikimedia's perspective made every Shadowbroker
|
||||||
|
# install in the world look like one giant scraper. If one install
|
||||||
|
# misbehaved, their only recourse was to block "Shadowbroker" as a
|
||||||
|
# whole. We now build the headers from ``outbound_user_agent('wikimedia')``
|
||||||
|
# which embeds the per-install operator handle (auto-generated or
|
||||||
|
# operator-chosen), so Wikimedia can rate-limit / contact the specific
|
||||||
|
# install instead of the project.
|
||||||
|
|
||||||
|
|
||||||
|
def _wikimedia_request_headers() -> dict[str, str]:
|
||||||
|
ua = outbound_user_agent("wikimedia")
|
||||||
|
return {
|
||||||
|
"User-Agent": ua,
|
||||||
|
# Browser-JS-style header that Wikimedia's policy explicitly
|
||||||
|
# accepts on top of (or instead of) User-Agent. We send both so
|
||||||
|
# whichever the upstream prefers, the per-operator handle is
|
||||||
|
# always available.
|
||||||
|
"Api-User-Agent": ua,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||||
@@ -45,9 +70,7 @@ def _reverse_geocode(lat: float, lng: float) -> dict:
|
|||||||
f"https://nominatim.openstreetmap.org/reverse?"
|
f"https://nominatim.openstreetmap.org/reverse?"
|
||||||
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
|
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
|
||||||
)
|
)
|
||||||
headers = {
|
headers = {"User-Agent": outbound_user_agent("nominatim")}
|
||||||
"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard; contact@shadowbroker.app)"
|
|
||||||
}
|
|
||||||
|
|
||||||
for attempt in range(2):
|
for attempt in range(2):
|
||||||
# Enforce Nominatim's 1 req/sec policy
|
# Enforce Nominatim's 1 req/sec policy
|
||||||
@@ -121,7 +144,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
|||||||
"""
|
"""
|
||||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||||
try:
|
try:
|
||||||
res = fetch_with_curl(url, timeout=6)
|
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||||
|
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||||
|
# sends the project default, and we also add the Wikimedia-
|
||||||
|
# specific Api-User-Agent that the policy specifically asks
|
||||||
|
# for, since this request originates from a backend service
|
||||||
|
# that proxies on behalf of (potentially many) browser users.
|
||||||
|
res = fetch_with_curl(url, timeout=6, headers=_wikimedia_request_headers())
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
results = res.json().get("results", {}).get("bindings", [])
|
results = res.json().get("results", {}).get("bindings", [])
|
||||||
if results:
|
if results:
|
||||||
@@ -147,7 +176,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
|||||||
slug = quote(name.replace(" ", "_"))
|
slug = quote(name.replace(" ", "_"))
|
||||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||||
try:
|
try:
|
||||||
res = fetch_with_curl(url, timeout=5)
|
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||||
|
# their UA policy; see _fetch_wikidata_leader above.
|
||||||
|
res = fetch_with_curl(url, timeout=5, headers=_wikimedia_request_headers())
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
data = res.json()
|
data = res.json()
|
||||||
if data.get("type") != "disambiguation":
|
if data.get("type") != "disambiguation":
|
||||||
|
|||||||
@@ -34,6 +34,11 @@ from services.sar.sar_config import (
|
|||||||
copernicus_token,
|
copernicus_token,
|
||||||
earthdata_token,
|
earthdata_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _sar_user_agent() -> str:
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("sar-products")
|
||||||
from services.sar.sar_normalize import (
|
from services.sar.sar_normalize import (
|
||||||
SarAnomaly,
|
SarAnomaly,
|
||||||
evidence_hash_for_payload,
|
evidence_hash_for_payload,
|
||||||
@@ -442,7 +447,7 @@ def _fetch_unosat_packages() -> list[dict[str, Any]]:
|
|||||||
# HDX CKAN returns 406 without explicit Accept + a browser-ish UA.
|
# HDX CKAN returns 406 without explicit Accept + a browser-ish UA.
|
||||||
hdx_headers = {
|
hdx_headers = {
|
||||||
"Accept": "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker-SAR/1.0)",
|
"User-Agent": _sar_user_agent(),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
resp = fetch_with_curl(url, timeout=20, headers=hdx_headers)
|
resp = fetch_with_curl(url, timeout=20, headers=hdx_headers)
|
||||||
|
|||||||
@@ -11,12 +11,21 @@ import requests
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
|
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
|
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
|
||||||
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
|
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
|
||||||
|
|
||||||
|
|
||||||
|
def _planetary_user_agent() -> str:
|
||||||
|
# Round 7a: per-install handle so Microsoft Planetary Computer can
|
||||||
|
# attribute requests to the specific operator rather than treating
|
||||||
|
# the whole Shadowbroker user base as one entity.
|
||||||
|
return outbound_user_agent("sentinel2-planetary-computer")
|
||||||
|
|
||||||
|
|
||||||
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||||
lat_span = 0.18
|
lat_span = 0.18
|
||||||
lng_span = 0.24
|
lng_span = 0.24
|
||||||
@@ -64,7 +73,7 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
|||||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||||
json=search_payload,
|
json=search_payload,
|
||||||
timeout=8,
|
timeout=8,
|
||||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard)"},
|
headers={"User-Agent": _planetary_user_agent()},
|
||||||
)
|
)
|
||||||
search_res.raise_for_status()
|
search_res.raise_for_status()
|
||||||
data = search_res.json()
|
data = search_res.json()
|
||||||
|
|||||||
@@ -20,7 +20,11 @@ from cachetools import TTLCache
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_SHODAN_BASE = "https://api.shodan.io"
|
_SHODAN_BASE = "https://api.shodan.io"
|
||||||
_USER_AGENT = "ShadowBroker/0.9.79 local Shodan connector"
|
# Round 7a: per-install attribution. Shodan already has the operator API
|
||||||
|
# key for billing, but the UA still identifies the install.
|
||||||
|
def _shodan_user_agent():
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("shodan")
|
||||||
_REQUEST_TIMEOUT = 15
|
_REQUEST_TIMEOUT = 15
|
||||||
_MIN_INTERVAL_SECONDS = 1.05 # Shodan docs say API plans are rate limited to ~1 req/sec.
|
_MIN_INTERVAL_SECONDS = 1.05 # Shodan docs say API plans are rate limited to ~1 req/sec.
|
||||||
_DEFAULT_SEARCH_PAGES = 1
|
_DEFAULT_SEARCH_PAGES = 1
|
||||||
@@ -179,7 +183,7 @@ def _request(path: str, *, params: dict[str, Any], cache: TTLCache[str, dict[str
|
|||||||
f"{_SHODAN_BASE}{path}",
|
f"{_SHODAN_BASE}{path}",
|
||||||
params=payload,
|
params=payload,
|
||||||
timeout=_REQUEST_TIMEOUT,
|
timeout=_REQUEST_TIMEOUT,
|
||||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
headers={"User-Agent": _shodan_user_agent(), "Accept": "application/json"},
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
_last_request_at = time.monotonic()
|
_last_request_at = time.monotonic()
|
||||||
|
|||||||
@@ -19,6 +19,13 @@ from pathlib import Path
|
|||||||
import requests
|
import requests
|
||||||
from sgp4.api import Satrec, WGS72, jday
|
from sgp4.api import Satrec, WGS72, jday
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _tinygs_user_agent(purpose: str) -> str:
|
||||||
|
"""Round 7a: per-install handle for CelesTrak / TinyGS attribution."""
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent(f"tinygs-{purpose}")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -113,7 +120,7 @@ def _fetch_celestrak_tles() -> list[dict]:
|
|||||||
params={"GROUP": group, "FORMAT": "json"},
|
params={"GROUP": group, "FORMAT": "json"},
|
||||||
timeout=20,
|
timeout=20,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": "ShadowBroker-OSINT/1.0 (CelesTrak fair-use)",
|
"User-Agent": _tinygs_user_agent("celestrak"),
|
||||||
"Accept": "application/json",
|
"Accept": "application/json",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -259,7 +266,7 @@ def _fetch_tinygs_telemetry() -> None:
|
|||||||
timeout=15,
|
timeout=15,
|
||||||
headers={
|
headers={
|
||||||
"Accept": "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
"User-Agent": _tinygs_user_agent("tinygs"),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|||||||
@@ -173,6 +173,94 @@ def _verify_tor_bundle(archive_path: Path, bundle_url: str) -> tuple[bool, str]:
|
|||||||
return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)"
|
return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_tor_bundle_safely(archive_path: Path, install_dir: Path) -> bool:
|
||||||
|
"""Extract a Tor Expert Bundle tar.gz safely.
|
||||||
|
|
||||||
|
Issue #251: the previous extractor checked tarinfo.name against path
|
||||||
|
traversal but never inspected tarinfo.linkname for symlink/hardlink
|
||||||
|
members. Python 3.11's tarfile honors symlinks during extractall(),
|
||||||
|
so a malicious archive could ship a member like::
|
||||||
|
|
||||||
|
name = "innocent.txt" # passes the path check
|
||||||
|
type = SYMTYPE
|
||||||
|
linkname = "C:\\Windows\\System32\\config\\system"
|
||||||
|
|
||||||
|
and extractall() would then create that symlink. Subsequent reads
|
||||||
|
of innocent.txt deference to a sensitive system file; subsequent
|
||||||
|
writes corrupt one. Tor bundles never legitimately contain symlinks
|
||||||
|
or hardlinks, so we refuse all link members categorically rather
|
||||||
|
than trying to validate linkname targets (which has its own pitfalls
|
||||||
|
around relative path resolution).
|
||||||
|
|
||||||
|
Also refuses non-regular-non-directory members (devices, FIFOs,
|
||||||
|
character/block special files) for completeness — none of those
|
||||||
|
belong in a Tor Expert Bundle and accepting them is a category of
|
||||||
|
bug we don't need to debug later.
|
||||||
|
|
||||||
|
Returns True on success, False on rejection (and logs the reason).
|
||||||
|
The caller is responsible for cleaning up the archive file.
|
||||||
|
"""
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
install_resolved = install_dir.resolve()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||||
|
for member in tar.getmembers():
|
||||||
|
# Reject anything that isn't a regular file or directory.
|
||||||
|
# Symlinks (SYMTYPE) and hardlinks (LNKTYPE) are the
|
||||||
|
# path-traversal vectors; the others (CHRTYPE, BLKTYPE,
|
||||||
|
# FIFOTYPE, CONTTYPE) have no legitimate use in a Tor
|
||||||
|
# Expert Bundle.
|
||||||
|
if member.issym() or member.islnk():
|
||||||
|
logger.error(
|
||||||
|
"Tor bundle extraction blocked: link member %s -> %s "
|
||||||
|
"(symlinks/hardlinks are not allowed in Tor bundles; "
|
||||||
|
"this archive is malformed or hostile)",
|
||||||
|
member.name,
|
||||||
|
member.linkname,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
if not (member.isfile() or member.isdir()):
|
||||||
|
logger.error(
|
||||||
|
"Tor bundle extraction blocked: unexpected member type "
|
||||||
|
"for %s (only regular files and directories are allowed)",
|
||||||
|
member.name,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Path traversal check (preserves the original guard).
|
||||||
|
try:
|
||||||
|
member_path = (install_dir / member.name).resolve()
|
||||||
|
except OSError as exc:
|
||||||
|
logger.error(
|
||||||
|
"Tor bundle extraction blocked: cannot resolve member "
|
||||||
|
"path %s: %s",
|
||||||
|
member.name,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
member_path.relative_to(install_resolved)
|
||||||
|
except ValueError:
|
||||||
|
logger.error(
|
||||||
|
"Tor bundle extraction blocked: path traversal on %s "
|
||||||
|
"(resolves to %s, outside install dir %s)",
|
||||||
|
member.name,
|
||||||
|
member_path,
|
||||||
|
install_resolved,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# All members validated — extract.
|
||||||
|
tar.extractall(path=str(install_dir))
|
||||||
|
except tarfile.TarError as exc:
|
||||||
|
logger.error("Tor bundle extraction failed: malformed tar (%s)", exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _auto_install_tor() -> str | None:
|
def _auto_install_tor() -> str | None:
|
||||||
"""Install or download Tor when it is safe to do so."""
|
"""Install or download Tor when it is safe to do so."""
|
||||||
if os.name != "nt":
|
if os.name != "nt":
|
||||||
@@ -203,14 +291,9 @@ def _auto_install_tor() -> str | None:
|
|||||||
logger.info("Download complete, extracting...")
|
logger.info("Download complete, extracting...")
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
if not _extract_tor_bundle_safely(archive_path, TOR_INSTALL_DIR):
|
||||||
for member in tar.getmembers():
|
archive_path.unlink(missing_ok=True)
|
||||||
member_path = (TOR_INSTALL_DIR / member.name).resolve()
|
return None
|
||||||
if not str(member_path).startswith(str(TOR_INSTALL_DIR.resolve())):
|
|
||||||
logger.error("Tar path traversal blocked: %s", member.name)
|
|
||||||
archive_path.unlink(missing_ok=True)
|
|
||||||
return None
|
|
||||||
tar.extractall(path=str(TOR_INSTALL_DIR))
|
|
||||||
|
|
||||||
archive_path.unlink(missing_ok=True)
|
archive_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ from cachetools import TTLCache
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_FINNHUB_BASE = "https://finnhub.io/api/v1"
|
_FINNHUB_BASE = "https://finnhub.io/api/v1"
|
||||||
_USER_AGENT = "ShadowBroker/0.9.79 Finnhub connector"
|
def _finnhub_user_agent():
|
||||||
|
from services.network_utils import outbound_user_agent
|
||||||
|
return outbound_user_agent("finnhub")
|
||||||
_REQUEST_TIMEOUT = 12
|
_REQUEST_TIMEOUT = 12
|
||||||
_MIN_INTERVAL_SECONDS = 0.35 # Stay well under 60 calls/min
|
_MIN_INTERVAL_SECONDS = 0.35 # Stay well under 60 calls/min
|
||||||
|
|
||||||
@@ -89,7 +91,7 @@ def _request(path: str, params: dict[str, Any] | None = None) -> Any:
|
|||||||
f"{_FINNHUB_BASE}{path}",
|
f"{_FINNHUB_BASE}{path}",
|
||||||
params=payload,
|
params=payload,
|
||||||
timeout=_REQUEST_TIMEOUT,
|
timeout=_REQUEST_TIMEOUT,
|
||||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
headers={"User-Agent": _finnhub_user_agent(), "Accept": "application/json"},
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
_last_request_at = time.monotonic()
|
_last_request_at = time.monotonic()
|
||||||
|
|||||||
+232
-14
@@ -6,9 +6,11 @@ Public API:
|
|||||||
schedule_restart(project_root) (spawn detached start script, then exit)
|
schedule_restart(project_root) (spawn detached start script, then exit)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
@@ -29,6 +31,19 @@ DOCKER_UPDATE_COMMANDS = (
|
|||||||
"docker compose pull && docker compose up -d"
|
"docker compose pull && docker compose up -d"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Issue #231: baked-in release digests. Loaded lazily, used as a fallback
|
||||||
|
# verification source when the release's SHA256SUMS.txt asset can't be
|
||||||
|
# fetched (e.g. transient network failure during update).
|
||||||
|
_RELEASE_DIGESTS_FILE = (
|
||||||
|
Path(__file__).resolve().parent.parent / "data" / "release_digests.json"
|
||||||
|
)
|
||||||
|
# Pattern for the maintainer's signed source-archive release asset. This
|
||||||
|
# is the file we prefer over the auto-generated ``zipball_url`` because
|
||||||
|
# the maintainer's build process publishes it with a matching entry in
|
||||||
|
# SHA256SUMS.txt — the zipball does not have a signed digest.
|
||||||
|
_SOURCE_ASSET_PATTERN = re.compile(r"^ShadowBroker_v\d", re.IGNORECASE)
|
||||||
|
_SHA256SUMS_ASSET_NAME = "SHA256SUMS.txt"
|
||||||
|
|
||||||
|
|
||||||
def _is_docker() -> bool:
|
def _is_docker() -> bool:
|
||||||
"""Detect if we're running inside a Docker container."""
|
"""Detect if we're running inside a Docker container."""
|
||||||
@@ -40,7 +55,6 @@ def _is_docker() -> bool:
|
|||||||
except (FileNotFoundError, PermissionError):
|
except (FileNotFoundError, PermissionError):
|
||||||
pass
|
pass
|
||||||
return os.environ.get("container") == "docker"
|
return os.environ.get("container") == "docker"
|
||||||
_EXPECTED_SHA256 = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
|
||||||
_ALLOWED_UPDATE_HOSTS = {
|
_ALLOWED_UPDATE_HOSTS = {
|
||||||
"api.github.com",
|
"api.github.com",
|
||||||
"codeload.github.com",
|
"codeload.github.com",
|
||||||
@@ -119,7 +133,16 @@ def _validate_update_url(url: str, *, allow_release_page: bool = False) -> str:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
def _download_release(temp_dir: str) -> tuple:
|
def _download_release(temp_dir: str) -> tuple:
|
||||||
"""Fetch latest release info and download the source zip archive.
|
"""Fetch latest release info and download the source zip archive.
|
||||||
Returns (zip_path, version_tag, download_url, release_url).
|
|
||||||
|
Issue #231: prefer the maintainer's signed release asset (matching
|
||||||
|
``ShadowBroker_v*.zip``) over the auto-generated ``zipball_url``,
|
||||||
|
because the maintainer's release process publishes a matching entry
|
||||||
|
in SHA256SUMS.txt for the named asset but NOT for the zipball.
|
||||||
|
|
||||||
|
Returns (zip_path, version_tag, download_url, release_url, asset_name,
|
||||||
|
sha256sums_url) — the last two are empty strings when the release
|
||||||
|
doesn't publish a signed asset, falling back to the legacy zipball
|
||||||
|
path.
|
||||||
"""
|
"""
|
||||||
logger.info("Fetching latest release info from GitHub...")
|
logger.info("Fetching latest release info from GitHub...")
|
||||||
_validate_update_url(GITHUB_RELEASES_URL)
|
_validate_update_url(GITHUB_RELEASES_URL)
|
||||||
@@ -131,9 +154,42 @@ def _download_release(temp_dir: str) -> tuple:
|
|||||||
tag = release.get("tag_name", "unknown")
|
tag = release.get("tag_name", "unknown")
|
||||||
release_url = str(release.get("html_url") or GITHUB_RELEASES_PAGE_URL).strip()
|
release_url = str(release.get("html_url") or GITHUB_RELEASES_PAGE_URL).strip()
|
||||||
_validate_update_url(release_url, allow_release_page=True)
|
_validate_update_url(release_url, allow_release_page=True)
|
||||||
zip_url = str(release.get("zipball_url") or "").strip()
|
|
||||||
if not zip_url:
|
# Prefer the maintainer-signed release asset. Fall back to the
|
||||||
raise RuntimeError("Latest release is missing a source archive URL")
|
# auto-generated zipball if the release doesn't publish one.
|
||||||
|
assets = release.get("assets") or []
|
||||||
|
asset_name = ""
|
||||||
|
asset_url = ""
|
||||||
|
sha256sums_url = ""
|
||||||
|
for a in assets:
|
||||||
|
name = str(a.get("name") or "").strip()
|
||||||
|
download = str(a.get("browser_download_url") or "").strip()
|
||||||
|
if not name or not download:
|
||||||
|
continue
|
||||||
|
if _SOURCE_ASSET_PATTERN.match(name) and name.lower().endswith(".zip"):
|
||||||
|
asset_name = name
|
||||||
|
asset_url = download
|
||||||
|
elif name == _SHA256SUMS_ASSET_NAME:
|
||||||
|
sha256sums_url = download
|
||||||
|
|
||||||
|
if asset_url:
|
||||||
|
zip_url = asset_url
|
||||||
|
logger.info(
|
||||||
|
"Using signed release asset %s (sha256sums=%s)",
|
||||||
|
asset_name,
|
||||||
|
"yes" if sha256sums_url else "no",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
zip_url = str(release.get("zipball_url") or "").strip()
|
||||||
|
if not zip_url:
|
||||||
|
raise RuntimeError("Latest release is missing a source archive URL")
|
||||||
|
logger.warning(
|
||||||
|
"Release does not publish a signed ShadowBroker_v*.zip asset — "
|
||||||
|
"falling back to auto-generated zipball_url. Integrity will be "
|
||||||
|
"verified against the baked-in release_digests.json (if present) "
|
||||||
|
"or HTTPS-only otherwise."
|
||||||
|
)
|
||||||
|
|
||||||
_validate_update_url(zip_url)
|
_validate_update_url(zip_url)
|
||||||
|
|
||||||
logger.info(f"Downloading {zip_url} ...")
|
logger.info(f"Downloading {zip_url} ...")
|
||||||
@@ -150,19 +206,174 @@ def _download_release(temp_dir: str) -> tuple:
|
|||||||
|
|
||||||
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
|
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
|
||||||
logger.info(f"Downloaded {size_mb:.1f} MB — ZIP validated OK")
|
logger.info(f"Downloaded {size_mb:.1f} MB — ZIP validated OK")
|
||||||
return zip_path, tag, zip_url, release_url
|
return zip_path, tag, zip_url, release_url, asset_name, sha256sums_url
|
||||||
|
|
||||||
|
|
||||||
def _validate_zip_hash(zip_path: str) -> None:
|
def _compute_sha256(zip_path: str) -> str:
|
||||||
if not _EXPECTED_SHA256:
|
"""Return the hex SHA-256 of the file at ``zip_path`` (lowercase)."""
|
||||||
return
|
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
with open(zip_path, "rb") as f:
|
with open(zip_path, "rb") as f:
|
||||||
for chunk in iter(lambda: f.read(1024 * 128), b""):
|
for chunk in iter(lambda: f.read(1024 * 128), b""):
|
||||||
h.update(chunk)
|
h.update(chunk)
|
||||||
digest = h.hexdigest().lower()
|
return h.hexdigest().lower()
|
||||||
if digest != _EXPECTED_SHA256:
|
|
||||||
raise RuntimeError("Update SHA-256 mismatch")
|
|
||||||
|
def _load_baked_in_release_digests() -> dict:
|
||||||
|
"""Return the ``release_digests.json`` mapping, or an empty dict.
|
||||||
|
|
||||||
|
Schema (issue #231):
|
||||||
|
{
|
||||||
|
"<release_tag>": {
|
||||||
|
"<asset_filename>": "<sha256_hex>",
|
||||||
|
...
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
raw = _RELEASE_DIGESTS_FILE.read_text(encoding="utf-8")
|
||||||
|
parsed = json.loads(raw)
|
||||||
|
except (OSError, ValueError) as exc:
|
||||||
|
logger.debug("Release digest file unreadable: %s", exc)
|
||||||
|
return {}
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
return {}
|
||||||
|
cleaned: dict[str, dict[str, str]] = {}
|
||||||
|
for k, v in parsed.items():
|
||||||
|
if not isinstance(k, str) or k.startswith("_"):
|
||||||
|
continue
|
||||||
|
if isinstance(v, dict):
|
||||||
|
entries = {
|
||||||
|
fname: digest.strip().lower()
|
||||||
|
for fname, digest in v.items()
|
||||||
|
if isinstance(fname, str) and isinstance(digest, str)
|
||||||
|
}
|
||||||
|
if entries:
|
||||||
|
cleaned[k] = entries
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_sha256sums(sha256sums_url: str) -> dict[str, str]:
|
||||||
|
"""Download a SHA256SUMS.txt and return {filename: digest_hex_lower}.
|
||||||
|
|
||||||
|
Standard ``sha256sum`` format: ``<digest> <filename>`` per line. The
|
||||||
|
leading ``*`` binary-mode marker (e.g. ``<digest> *<filename>``) is
|
||||||
|
handled.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
_validate_update_url(sha256sums_url)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
logger.warning("SHA256SUMS URL rejected: %s", exc)
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
resp = requests.get(sha256sums_url, timeout=15)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
logger.info("SHA256SUMS fetch failed: %s", exc)
|
||||||
|
return {}
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for line in resp.text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
# Tolerant split: handle both `<digest> <name>` and `<digest> *<name>`.
|
||||||
|
parts = line.split(None, 1)
|
||||||
|
if len(parts) != 2:
|
||||||
|
continue
|
||||||
|
digest, fname = parts
|
||||||
|
fname = fname.lstrip("*").strip()
|
||||||
|
digest = digest.strip().lower()
|
||||||
|
if len(digest) == 64 and all(c in "0123456789abcdef" for c in digest) and fname:
|
||||||
|
out[fname] = digest
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_zip_hash(
|
||||||
|
zip_path: str,
|
||||||
|
*,
|
||||||
|
asset_name: str = "",
|
||||||
|
sha256sums_url: str = "",
|
||||||
|
release_tag: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Verify the downloaded archive against trusted digest sources.
|
||||||
|
|
||||||
|
Issue #231: previously this returned silently when ``MESH_UPDATE_SHA256``
|
||||||
|
was unset, which made the auto-updater a supply-chain RCE vector on any
|
||||||
|
compromise of the GitHub release pipeline. The chain now is:
|
||||||
|
|
||||||
|
1. ``MESH_UPDATE_SHA256`` env var (operator override — preserved for
|
||||||
|
power-users who want to pin an exact digest manually)
|
||||||
|
2. ``SHA256SUMS.txt`` release asset (primary — the maintainer's
|
||||||
|
release process already publishes this)
|
||||||
|
3. Baked-in ``backend/data/release_digests.json`` (second line of
|
||||||
|
defense for releases that lack the SHA256SUMS asset, or when the
|
||||||
|
asset can't be fetched at update time)
|
||||||
|
4. HTTPS-only fallback with a loud warning (preserves the auto-update
|
||||||
|
flow during transient outages — but never silently)
|
||||||
|
|
||||||
|
A mismatch from a source that DID respond is fatal: the update is
|
||||||
|
refused and the existing install keeps running. Only the "no source
|
||||||
|
reachable at all" case falls back to HTTPS-only.
|
||||||
|
|
||||||
|
Returns a short human-readable description of which source verified
|
||||||
|
the archive (used in the update-success message).
|
||||||
|
"""
|
||||||
|
actual = _compute_sha256(zip_path)
|
||||||
|
|
||||||
|
# Source 1: explicit operator override.
|
||||||
|
override = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
||||||
|
if override:
|
||||||
|
if actual == override:
|
||||||
|
return f"verified via MESH_UPDATE_SHA256 ({actual[:16]}...)"
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Update SHA-256 mismatch vs MESH_UPDATE_SHA256: archive={actual[:16]}..., "
|
||||||
|
f"expected={override[:16]}..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Source 2: SHA256SUMS.txt asset from the release.
|
||||||
|
sums_map: dict[str, str] = {}
|
||||||
|
if sha256sums_url and asset_name:
|
||||||
|
sums_map = _fetch_sha256sums(sha256sums_url)
|
||||||
|
|
||||||
|
sums_expected = sums_map.get(asset_name) if asset_name else None
|
||||||
|
if sums_expected:
|
||||||
|
if actual == sums_expected:
|
||||||
|
return f"verified via release SHA256SUMS.txt ({actual[:16]}...)"
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Update SHA-256 mismatch vs release SHA256SUMS.txt: "
|
||||||
|
f"archive={actual[:16]}..., expected={sums_expected[:16]}..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Source 3: baked-in digest list.
|
||||||
|
baked = _load_baked_in_release_digests()
|
||||||
|
baked_expected = ""
|
||||||
|
if release_tag and asset_name:
|
||||||
|
baked_expected = baked.get(release_tag, {}).get(asset_name, "")
|
||||||
|
if baked_expected:
|
||||||
|
if actual == baked_expected:
|
||||||
|
return f"verified via baked-in digest list ({actual[:16]}...)"
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Update SHA-256 mismatch vs baked-in digest list: "
|
||||||
|
f"archive={actual[:16]}..., expected={baked_expected[:16]}..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Source 4: HTTPS-only fallback. We keep onboarding/auto-update working
|
||||||
|
# during transient outages (no SHA256SUMS reachable AND no baked-in
|
||||||
|
# entry for this release), but surface the degraded posture loudly so
|
||||||
|
# the operator can see it in logs and the maintainer can populate the
|
||||||
|
# digest list on the next release bump.
|
||||||
|
logger.warning(
|
||||||
|
"Update integrity check fell back to HTTPS-only trust "
|
||||||
|
"(no SHA256SUMS.txt response and no baked-in digest for "
|
||||||
|
"release=%s asset=%s). The archive SHA-256 is %s. Once the "
|
||||||
|
"release ships a SHA256SUMS.txt asset OR backend/data/"
|
||||||
|
"release_digests.json is updated with this release, the secure "
|
||||||
|
"path will activate automatically.",
|
||||||
|
release_tag or "unknown",
|
||||||
|
asset_name or "unknown",
|
||||||
|
actual,
|
||||||
|
)
|
||||||
|
return f"https-only (no digest source reachable, archive={actual[:16]}...)"
|
||||||
|
|
||||||
|
|
||||||
def _is_source_checkout(project_root: str) -> bool:
|
def _is_source_checkout(project_root: str) -> bool:
|
||||||
@@ -334,7 +545,7 @@ def perform_update(project_root: str) -> dict:
|
|||||||
temp_dir = tempfile.mkdtemp(prefix="sb_update_")
|
temp_dir = tempfile.mkdtemp(prefix="sb_update_")
|
||||||
manual_url = GITHUB_RELEASES_PAGE_URL
|
manual_url = GITHUB_RELEASES_PAGE_URL
|
||||||
try:
|
try:
|
||||||
zip_path, version, url, release_url = _download_release(temp_dir)
|
zip_path, version, url, release_url, asset_name, sha256sums_url = _download_release(temp_dir)
|
||||||
manual_url = release_url or manual_url
|
manual_url = release_url or manual_url
|
||||||
|
|
||||||
if in_docker:
|
if in_docker:
|
||||||
@@ -366,7 +577,13 @@ def perform_update(project_root: str) -> dict:
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
_validate_zip_hash(zip_path)
|
verification_note = _validate_zip_hash(
|
||||||
|
zip_path,
|
||||||
|
asset_name=asset_name,
|
||||||
|
sha256sums_url=sha256sums_url,
|
||||||
|
release_tag=version,
|
||||||
|
)
|
||||||
|
logger.info("Update archive %s", verification_note)
|
||||||
backup_path = _backup_current(project_root, temp_dir)
|
backup_path = _backup_current(project_root, temp_dir)
|
||||||
copied = _extract_and_copy(zip_path, project_root, temp_dir)
|
copied = _extract_and_copy(zip_path, project_root, temp_dir)
|
||||||
|
|
||||||
@@ -378,6 +595,7 @@ def perform_update(project_root: str) -> dict:
|
|||||||
"manual_url": manual_url,
|
"manual_url": manual_url,
|
||||||
"release_url": release_url,
|
"release_url": release_url,
|
||||||
"download_url": url,
|
"download_url": url,
|
||||||
|
"integrity": verification_note,
|
||||||
"message": f"Updated to {version} — {copied} files replaced. Restarting...",
|
"message": f"Updated to {version} — {copied} files replaced. Restarting...",
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -0,0 +1,677 @@
|
|||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"issue": "#239",
|
||||||
|
"note": "Snapshot of currently-tolerated duplicate route registrations. The test in test_no_new_duplicate_routes.py fails if any NEW (method, path) duplicate appears outside this list. Removing entries (by actually deduping) is fine and the test stays green. New entries here require explicit, reviewed updates.",
|
||||||
|
"generated_with": "python -c 'see tests/test_no_new_duplicate_routes.py'"
|
||||||
|
},
|
||||||
|
"duplicates": {
|
||||||
|
"DELETE /api/mesh/peers": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_operator",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"DELETE /api/wormhole/dm/contact/{peer_id}": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"DELETE /api/wormhole/dm/invite/handles/{handle}": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/cctv/media": [
|
||||||
|
"main",
|
||||||
|
"routers.cctv"
|
||||||
|
],
|
||||||
|
"GET /api/debug-latest": [
|
||||||
|
"main",
|
||||||
|
"routers.health"
|
||||||
|
],
|
||||||
|
"GET /api/geocode/reverse": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/geocode/search": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/health": [
|
||||||
|
"main",
|
||||||
|
"routers.health"
|
||||||
|
],
|
||||||
|
"GET /api/live-data": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"GET /api/live-data/fast": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"GET /api/live-data/slow": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/channels": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/dm/count": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/dm/poll": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/dm/prekey-bundle": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/dm/pubkey": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/dm/witness": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/gate/list": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/gate/{gate_id}": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/gate/{gate_id}/messages": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/event/{event_id}": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/events": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/locator": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/merkle": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/messages": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/messages/wait": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/node/{node_id}": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/status": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/infonet/sync": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/log": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/messages": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/metrics": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/consensus": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/markets": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/markets/more": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/predictions": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/profile": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/search": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/oracle/stakes/{message_id}": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/peers": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_operator",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/reputation": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/reputation/all": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/reputation/batch": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/rns/status": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/signals": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/status": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"GET /api/mesh/trust/vouches": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"GET /api/oracle/region-intel": [
|
||||||
|
"main",
|
||||||
|
"routers.sigint"
|
||||||
|
],
|
||||||
|
"GET /api/radio/nearest": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/radio/nearest-list": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/radio/openmhz/audio": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/radio/openmhz/calls/{sys_name}": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/radio/openmhz/systems": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/radio/top": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/refresh": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"GET /api/region-dossier": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/route/{callsign}": [
|
||||||
|
"main",
|
||||||
|
"routers.radio"
|
||||||
|
],
|
||||||
|
"GET /api/sentinel2/search": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/settings/api-keys": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"GET /api/settings/api-keys/meta": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"GET /api/settings/news-feeds": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"GET /api/settings/node": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"GET /api/settings/privacy-profile": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/settings/wormhole": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/settings/wormhole-status": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/sigint/nearest-sdr": [
|
||||||
|
"main",
|
||||||
|
"routers.sigint"
|
||||||
|
],
|
||||||
|
"GET /api/thermal/verify": [
|
||||||
|
"main",
|
||||||
|
"routers.sigint"
|
||||||
|
],
|
||||||
|
"GET /api/tools/shodan/status": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/tools/uw/status": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/dm/contacts": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/dm/identity": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/dm/invite": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/dm/invite/handles": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/gate/{gate_id}/identity": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/gate/{gate_id}/key": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/gate/{gate_id}/personas": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/health": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/identity": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"GET /api/wormhole/status": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"PATCH /api/mesh/peers": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_operator",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/ais/feed": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"POST /api/layers": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/block": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/count": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/poll": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/register": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/send": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/dm/witness": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/gate/create": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/gate/peer-pull": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_peer_sync"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/gate/peer-push": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_peer_sync"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/gate/{gate_id}/message": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/identity/revoke": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/identity/rotate": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/infonet/ingest": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/infonet/peer-push": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_peer_sync"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/infonet/sync": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/oracle/predict": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/oracle/resolve": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/oracle/resolve-stakes": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/oracle/stake": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_oracle"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/peers": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_operator",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/report": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/send": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/trust/vouch": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_dm"
|
||||||
|
],
|
||||||
|
"POST /api/mesh/vote": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"POST /api/sentinel/tile": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/sentinel/token": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/settings/news-feeds/reset": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"POST /api/sigint/transmit": [
|
||||||
|
"main",
|
||||||
|
"routers.sigint"
|
||||||
|
],
|
||||||
|
"POST /api/system/update": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"POST /api/tools/shodan/count": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/tools/shodan/host": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/tools/shodan/search": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/tools/uw/congress": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/tools/uw/darkpool": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/tools/uw/flow": [
|
||||||
|
"main",
|
||||||
|
"routers.tools"
|
||||||
|
],
|
||||||
|
"POST /api/viewport": [
|
||||||
|
"main",
|
||||||
|
"routers.data"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/connect": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/disconnect": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/bootstrap-decrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/bootstrap-encrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/build-seal": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/compose": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/dead-drop-token": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/dead-drop-tokens": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/decrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/encrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/invite/import": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/open-seal": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/pairwise-alias": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/pairwise-alias/rotate": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/prekey/register": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/register-key": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/reset": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/sas": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/dm/sender-token": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/enter": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/key/grant": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/key/rotate": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/leave": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/message/compose": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/message/decrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/message/post": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/message/post-encrypted": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/message/sign-encrypted": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/messages/decrypt": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/persona/activate": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/persona/clear": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/persona/create": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/persona/retire": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/proof": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/gate/state/export": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/identity/bootstrap": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/join": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/leave": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/restart": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/sign": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"POST /api/wormhole/sign-raw": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"PUT /api/mesh/gate/{gate_id}/envelope_policy": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"PUT /api/mesh/gate/{gate_id}/legacy_envelope_fallback": [
|
||||||
|
"main",
|
||||||
|
"routers.mesh_public"
|
||||||
|
],
|
||||||
|
"PUT /api/settings/news-feeds": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"PUT /api/settings/node": [
|
||||||
|
"main",
|
||||||
|
"routers.admin"
|
||||||
|
],
|
||||||
|
"PUT /api/settings/privacy-profile": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"PUT /api/settings/wormhole": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
],
|
||||||
|
"PUT /api/wormhole/dm/contact": [
|
||||||
|
"main",
|
||||||
|
"routers.wormhole"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -87,11 +87,28 @@ def _run_gate_release_once(monkeypatch, *, transport_tier="private_strong"):
|
|||||||
def _patch_for_successful_post(monkeypatch, module):
|
def _patch_for_successful_post(monkeypatch, module):
|
||||||
"""Apply standard monkeypatches so a gate_message post succeeds."""
|
"""Apply standard monkeypatches so a gate_message post succeeds."""
|
||||||
import main
|
import main
|
||||||
|
from services.mesh import mesh_hashchain
|
||||||
|
|
||||||
_setup_gate_outbox(monkeypatch)
|
_setup_gate_outbox(monkeypatch)
|
||||||
monkeypatch.setattr(main, "_verify_gate_message_signed_write", lambda **kw: (True, "ok", kw.get("reply_to", "")))
|
monkeypatch.setattr(main, "_verify_gate_message_signed_write", lambda **kw: (True, "ok", kw.get("reply_to", "")))
|
||||||
monkeypatch.setattr(main, "_resolve_envelope_policy", lambda _gate_id: "envelope_disabled")
|
monkeypatch.setattr(main, "_resolve_envelope_policy", lambda _gate_id: "envelope_disabled")
|
||||||
|
|
||||||
|
def _fake_private_gate_append(**kwargs):
|
||||||
|
return {
|
||||||
|
"event_id": f"ledger-ev-{kwargs.get('sequence', 0)}",
|
||||||
|
"event_type": "gate_message",
|
||||||
|
"node_id": kwargs["node_id"],
|
||||||
|
"payload": dict(kwargs["payload"]),
|
||||||
|
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||||
|
"sequence": kwargs["sequence"],
|
||||||
|
"signature": kwargs["signature"],
|
||||||
|
"public_key": kwargs["public_key"],
|
||||||
|
"public_key_algo": kwargs["public_key_algo"],
|
||||||
|
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", _fake_private_gate_append)
|
||||||
|
|
||||||
from services.mesh.mesh_reputation import gate_manager, reputation_ledger
|
from services.mesh.mesh_reputation import gate_manager, reputation_ledger
|
||||||
|
|
||||||
monkeypatch.setattr(gate_manager, "can_enter", lambda *a, **kw: (True, "ok"))
|
monkeypatch.setattr(gate_manager, "can_enter", lambda *a, **kw: (True, "ok"))
|
||||||
@@ -255,19 +272,30 @@ def test_gate_post_preserves_gate_envelope_in_store(monkeypatch):
|
|||||||
|
|
||||||
|
|
||||||
def test_gate_post_advances_sequence(monkeypatch):
|
def test_gate_post_advances_sequence(monkeypatch):
|
||||||
"""validate_and_set_sequence must be called to advance the counter."""
|
"""append_private_gate_message must receive the gate sequence."""
|
||||||
import main
|
import main
|
||||||
from services.mesh import mesh_hashchain
|
from services.mesh import mesh_hashchain
|
||||||
|
|
||||||
_patch_for_successful_post(monkeypatch, main)
|
_patch_for_successful_post(monkeypatch, main)
|
||||||
|
|
||||||
seq_calls = []
|
append_calls = []
|
||||||
|
|
||||||
def track_seq(node_id, seq, *, domain=""):
|
def track_private_append(**kwargs):
|
||||||
seq_calls.append((node_id, seq, domain))
|
append_calls.append(kwargs)
|
||||||
return (True, "ok")
|
return {
|
||||||
|
"event_id": "ev-seq",
|
||||||
|
"event_type": "gate_message",
|
||||||
|
"node_id": kwargs["node_id"],
|
||||||
|
"payload": dict(kwargs["payload"]),
|
||||||
|
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||||
|
"sequence": kwargs["sequence"],
|
||||||
|
"signature": kwargs["signature"],
|
||||||
|
"public_key": kwargs["public_key"],
|
||||||
|
"public_key_algo": kwargs["public_key_algo"],
|
||||||
|
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||||
|
}
|
||||||
|
|
||||||
monkeypatch.setattr(mesh_hashchain.infonet, "validate_and_set_sequence", track_seq)
|
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", track_private_append)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
mesh_hashchain.gate_store,
|
mesh_hashchain.gate_store,
|
||||||
"append",
|
"append",
|
||||||
@@ -280,8 +308,9 @@ def test_gate_post_advances_sequence(monkeypatch):
|
|||||||
|
|
||||||
assert result["ok"] is True
|
assert result["ok"] is True
|
||||||
assert result["queued"] is True
|
assert result["queued"] is True
|
||||||
assert len(seq_calls) == 1
|
assert len(append_calls) == 1
|
||||||
assert seq_calls[0] == ("!sb_test1234567890", 42, "gate_message")
|
assert append_calls[0]["node_id"] == "!sb_test1234567890"
|
||||||
|
assert append_calls[0]["sequence"] == 42
|
||||||
|
|
||||||
|
|
||||||
def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
||||||
@@ -290,11 +319,11 @@ def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
|||||||
from services.mesh import mesh_hashchain
|
from services.mesh import mesh_hashchain
|
||||||
|
|
||||||
_patch_for_successful_post(monkeypatch, main)
|
_patch_for_successful_post(monkeypatch, main)
|
||||||
monkeypatch.setattr(
|
|
||||||
mesh_hashchain.infonet,
|
def reject_private_append(**_kwargs):
|
||||||
"validate_and_set_sequence",
|
raise ValueError("Replay detected: sequence 1 <= last 1")
|
||||||
lambda node_id, seq: (False, "Replay detected: sequence 1 <= last 1"),
|
|
||||||
)
|
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", reject_private_append)
|
||||||
|
|
||||||
gate_id = "infonet"
|
gate_id = "infonet"
|
||||||
body = _build_gate_message_body(gate_id, sequence=1)
|
body = _build_gate_message_body(gate_id, sequence=1)
|
||||||
|
|||||||
@@ -0,0 +1,261 @@
|
|||||||
|
"""Infonet sync respects upstream HTTP 429 + applies exponential backoff.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Before this fix, ``finish_sync`` used a constant 60s ``failure_backoff_s``
|
||||||
|
regardless of how many consecutive failures preceded. When an upstream
|
||||||
|
peer (e.g. the seed onion) returned HTTP 429 "Too Many Requests", the
|
||||||
|
sync worker would:
|
||||||
|
|
||||||
|
1. Receive 429
|
||||||
|
2. Stringify the status into a generic ``ValueError``
|
||||||
|
3. Call ``finish_sync(error=str(exc))`` -- losing the status code
|
||||||
|
4. Schedule next attempt for ``now + 60s``
|
||||||
|
5. Retry. Upstream's rate-limit bucket is still full. 429 again. Loop.
|
||||||
|
|
||||||
|
Net effect: a node with one transient 429 would hammer the upstream
|
||||||
|
every 60s forever, keeping the bucket full and never recovering. This
|
||||||
|
is what kept the user's Infonet node from reaching the seed peer.
|
||||||
|
|
||||||
|
What the fix does
|
||||||
|
-----------------
|
||||||
|
* New typed exception ``PeerSyncRateLimited`` carries the parsed
|
||||||
|
``Retry-After`` value out of the HTTP layer.
|
||||||
|
* ``_sync_from_peer`` returns ``(ok, error, forked, retry_after_s)``
|
||||||
|
instead of the old 3-tuple.
|
||||||
|
* ``finish_sync`` honors ``retry_after_s`` AND applies exponential
|
||||||
|
backoff: ``delay = max(retry_after_s, base * 2^failures, cap=1800)``.
|
||||||
|
* ``parse_retry_after_header`` handles both RFC 7231 forms (delay
|
||||||
|
seconds, and HTTP-date).
|
||||||
|
|
||||||
|
These tests pin every part of the new contract.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# parse_retry_after_header — both RFC 7231 forms + edge cases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseRetryAfter:
|
||||||
|
def test_integer_seconds(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
assert parse_retry_after_header("120") == 120
|
||||||
|
assert parse_retry_after_header(" 30 ") == 30
|
||||||
|
assert parse_retry_after_header("0") == 0
|
||||||
|
|
||||||
|
def test_http_date(self):
|
||||||
|
"""RFC 7231 §7.1.3 explicitly allows ``Retry-After: <HTTP-date>``.
|
||||||
|
We compute seconds-from-now so callers can use the same field
|
||||||
|
regardless of which form the upstream chose."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
# Pin "now" so the test is deterministic.
|
||||||
|
now = 1_700_000_000.0 # 2023-11-14T22:13:20Z
|
||||||
|
# 300 seconds in the future, formatted per RFC 7231.
|
||||||
|
future = "Tue, 14 Nov 2023 22:18:20 GMT"
|
||||||
|
result = parse_retry_after_header(future, now=now)
|
||||||
|
assert 295 <= result <= 305, f"expected ~300s, got {result}"
|
||||||
|
|
||||||
|
def test_http_date_in_past_returns_zero(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
now = 1_700_000_000.0
|
||||||
|
past = "Mon, 13 Nov 2023 00:00:00 GMT"
|
||||||
|
assert parse_retry_after_header(past, now=now) == 0
|
||||||
|
|
||||||
|
def test_empty_and_whitespace_return_zero(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
assert parse_retry_after_header("") == 0
|
||||||
|
assert parse_retry_after_header(" ") == 0
|
||||||
|
|
||||||
|
def test_malformed_returns_zero(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
assert parse_retry_after_header("not a header") == 0
|
||||||
|
assert parse_retry_after_header("xyz") == 0
|
||||||
|
|
||||||
|
def test_clamps_to_one_hour(self):
|
||||||
|
"""A hostile peer can't silence us for a week by claiming a
|
||||||
|
24h Retry-After. We cap at 1 hour."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
assert parse_retry_after_header("86400") == 3600 # 24h -> 1h
|
||||||
|
assert parse_retry_after_header("99999999") == 3600
|
||||||
|
|
||||||
|
def test_negative_returns_zero(self):
|
||||||
|
"""RFC 7231 says ``Retry-After`` is a non-negative integer;
|
||||||
|
leading-minus parses as a non-digit and yields 0 here."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||||
|
assert parse_retry_after_header("-10") == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _failure_backoff_seconds — exponential growth, retry-after override, cap
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFailureBackoffSeconds:
|
||||||
|
def test_exponential_growth(self):
|
||||||
|
"""First failure uses the base (preserves pre-fix behavior
|
||||||
|
for one-off blips). Each subsequent failure doubles the wait,
|
||||||
|
capped at 1800s. With base=60: 60, 120, 240, 480, 960, 1800,
|
||||||
|
1800, 1800."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||||
|
delays = [
|
||||||
|
_failure_backoff_seconds(
|
||||||
|
base_backoff_s=60,
|
||||||
|
consecutive_failures=n,
|
||||||
|
retry_after_s=0,
|
||||||
|
cap_s=1800,
|
||||||
|
)
|
||||||
|
for n in range(1, 9)
|
||||||
|
]
|
||||||
|
assert delays == [60, 120, 240, 480, 960, 1800, 1800, 1800], delays
|
||||||
|
|
||||||
|
def test_retry_after_wins_when_larger(self):
|
||||||
|
"""If the upstream says ``Retry-After: 600`` but exponential
|
||||||
|
would only ask for 60s (one failure), we honor the upstream."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||||
|
assert _failure_backoff_seconds(
|
||||||
|
base_backoff_s=60,
|
||||||
|
consecutive_failures=1,
|
||||||
|
retry_after_s=600,
|
||||||
|
cap_s=1800,
|
||||||
|
) == 600
|
||||||
|
|
||||||
|
def test_exponential_wins_when_larger(self):
|
||||||
|
"""If exponential is asking for 1800s (6+ failures) but
|
||||||
|
upstream only sent ``Retry-After: 30``, we honor exponential.
|
||||||
|
The 30s was the upstream's view at one moment; our exponential
|
||||||
|
reflects sustained failure."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||||
|
result = _failure_backoff_seconds(
|
||||||
|
base_backoff_s=60,
|
||||||
|
consecutive_failures=7,
|
||||||
|
retry_after_s=30,
|
||||||
|
cap_s=1800,
|
||||||
|
)
|
||||||
|
assert result == 1800
|
||||||
|
|
||||||
|
def test_cap_zero_disables_exponential(self):
|
||||||
|
"""Operators who want pre-fix behavior can set cap=0; only the
|
||||||
|
upstream's Retry-After is respected. (Pre-fix had no
|
||||||
|
exponential growth at all.)"""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||||
|
assert _failure_backoff_seconds(
|
||||||
|
base_backoff_s=60,
|
||||||
|
consecutive_failures=10,
|
||||||
|
retry_after_s=120,
|
||||||
|
cap_s=0,
|
||||||
|
) == 120
|
||||||
|
|
||||||
|
def test_zero_inputs_return_zero(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||||
|
assert _failure_backoff_seconds(
|
||||||
|
base_backoff_s=0,
|
||||||
|
consecutive_failures=0,
|
||||||
|
retry_after_s=0,
|
||||||
|
) == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# finish_sync end-to-end — failure path with retry-after + growing counter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFinishSyncBackoff:
|
||||||
|
def _state(self, **overrides):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import SyncWorkerState
|
||||||
|
base = {
|
||||||
|
"last_sync_started_at": 0,
|
||||||
|
"last_sync_finished_at": 0,
|
||||||
|
"last_sync_ok_at": 0,
|
||||||
|
"next_sync_due_at": 0,
|
||||||
|
"last_peer_url": "",
|
||||||
|
"last_error": "",
|
||||||
|
"last_outcome": "idle",
|
||||||
|
"current_head": "",
|
||||||
|
"fork_detected": False,
|
||||||
|
"consecutive_failures": 0,
|
||||||
|
}
|
||||||
|
base.update(overrides)
|
||||||
|
return SyncWorkerState(**base)
|
||||||
|
|
||||||
|
def test_first_failure_uses_base_unchanged(self):
|
||||||
|
"""One failure means consecutive_failures becomes 1, which uses
|
||||||
|
``base * 2^0 = base``. Preserves the pre-fix behavior so a
|
||||||
|
single transient upstream blip doesn't suddenly take 2 minutes
|
||||||
|
to retry — that change has to be earned by sustained failure."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||||
|
result = finish_sync(
|
||||||
|
self._state(),
|
||||||
|
ok=False,
|
||||||
|
error="some upstream blip",
|
||||||
|
now=1000.0,
|
||||||
|
failure_backoff_s=60,
|
||||||
|
)
|
||||||
|
assert result.consecutive_failures == 1
|
||||||
|
assert result.next_sync_due_at == 1000 + 60
|
||||||
|
assert result.last_error == "some upstream blip"
|
||||||
|
assert result.last_outcome == "error"
|
||||||
|
|
||||||
|
def test_consecutive_failures_grow_the_delay(self):
|
||||||
|
"""After 5 prior failures already in state, the next failure
|
||||||
|
sets consecutive=6 and uses the cap (1800s = 60 * 2^5)."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||||
|
result = finish_sync(
|
||||||
|
self._state(consecutive_failures=5),
|
||||||
|
ok=False,
|
||||||
|
error="HTTP 429",
|
||||||
|
now=2000.0,
|
||||||
|
failure_backoff_s=60,
|
||||||
|
)
|
||||||
|
assert result.consecutive_failures == 6
|
||||||
|
assert result.next_sync_due_at == 2000 + 1800
|
||||||
|
|
||||||
|
def test_retry_after_honored_at_low_failure_count(self):
|
||||||
|
"""When the upstream says ``Retry-After: 900`` but we'd
|
||||||
|
otherwise only wait 240s (4 failures = 60*2^3), wait 900s."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||||
|
result = finish_sync(
|
||||||
|
self._state(consecutive_failures=3),
|
||||||
|
ok=False,
|
||||||
|
error="HTTP 429",
|
||||||
|
now=5000.0,
|
||||||
|
failure_backoff_s=60,
|
||||||
|
retry_after_s=900,
|
||||||
|
)
|
||||||
|
assert result.consecutive_failures == 4
|
||||||
|
assert result.next_sync_due_at == 5000 + 900
|
||||||
|
|
||||||
|
def test_success_resets_consecutive_failures(self):
|
||||||
|
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||||
|
result = finish_sync(
|
||||||
|
self._state(consecutive_failures=4),
|
||||||
|
ok=True,
|
||||||
|
now=7000.0,
|
||||||
|
interval_s=300,
|
||||||
|
)
|
||||||
|
assert result.consecutive_failures == 0
|
||||||
|
assert result.next_sync_due_at == 7000 + 300
|
||||||
|
assert result.last_outcome == "ok"
|
||||||
|
|
||||||
|
def test_last_error_carries_status_string(self):
|
||||||
|
"""The pre-fix path stringified exceptions into ``last_error``
|
||||||
|
but the string was often empty (HTTP layer raised ValueError
|
||||||
|
with no message). We now require callers to pass something
|
||||||
|
meaningful — see the typed exception path in main.py."""
|
||||||
|
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||||
|
result = finish_sync(
|
||||||
|
self._state(),
|
||||||
|
ok=False,
|
||||||
|
error="HTTP 429 from peer (retry_after=120s): rate-limited",
|
||||||
|
now=1000.0,
|
||||||
|
failure_backoff_s=60,
|
||||||
|
retry_after_s=120,
|
||||||
|
)
|
||||||
|
assert "HTTP 429" in result.last_error
|
||||||
|
assert "retry_after=120s" in result.last_error
|
||||||
@@ -117,3 +117,11 @@ def test_finish_solo_sync_marks_first_node_ready_without_peer_failure():
|
|||||||
assert finished.next_sync_due_at == 500
|
assert finished.next_sync_due_at == 500
|
||||||
assert should_run_sync(finished, now=499) is False
|
assert should_run_sync(finished, now=499) is False
|
||||||
assert should_run_sync(finished, now=500) is True
|
assert should_run_sync(finished, now=500) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_run_sync_recovers_stale_running_state():
|
||||||
|
fresh = SyncWorkerState(last_sync_started_at=100, last_outcome="running")
|
||||||
|
stale = SyncWorkerState(last_sync_started_at=100, last_outcome="running")
|
||||||
|
|
||||||
|
assert should_run_sync(fresh, now=399) is False
|
||||||
|
assert should_run_sync(stale, now=400) is True
|
||||||
|
|||||||
@@ -8,6 +8,53 @@ from cryptography.hazmat.primitives.asymmetric import ed25519
|
|||||||
from httpx import ASGITransport, AsyncClient
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
def test_onion_peer_requests_use_arti_socks_proxy(monkeypatch):
|
||||||
|
import main
|
||||||
|
from services import wormhole_supervisor
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
main,
|
||||||
|
"get_settings",
|
||||||
|
lambda: SimpleNamespace(MESH_ARTI_ENABLED=True, MESH_ARTI_SOCKS_PORT=19050),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(wormhole_supervisor, "_check_arti_ready", lambda: True)
|
||||||
|
|
||||||
|
proxies = main._infonet_peer_requests_proxies("http://exampleabcd.onion:8000")
|
||||||
|
|
||||||
|
assert proxies == {
|
||||||
|
"http": "socks5h://127.0.0.1:19050",
|
||||||
|
"https": "socks5h://127.0.0.1:19050",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_peer_requests_reject_clearnet(monkeypatch):
|
||||||
|
import main
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
main._infonet_peer_requests_proxies("https://seed.example")
|
||||||
|
except RuntimeError as exc:
|
||||||
|
assert "private Infonet requires onion/RNS transport" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("clearnet peer was allowed while private transport is required")
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_peer_url_prefers_configured_public_peer_url(monkeypatch):
|
||||||
|
import main
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
main,
|
||||||
|
"get_settings",
|
||||||
|
lambda: SimpleNamespace(
|
||||||
|
MESH_PUBLIC_PEER_URL="HTTP://LOCALPEEREXAMPLE.onion:8000/",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert main._local_infonet_peer_url() == "http://localpeerexample.onion:8000"
|
||||||
|
|
||||||
|
|
||||||
def _write_signed_manifest(path, *, private_key):
|
def _write_signed_manifest(path, *, private_key):
|
||||||
from services.mesh.mesh_bootstrap_manifest import BOOTSTRAP_MANIFEST_VERSION
|
from services.mesh.mesh_bootstrap_manifest import BOOTSTRAP_MANIFEST_VERSION
|
||||||
from services.mesh.mesh_crypto import canonical_json
|
from services.mesh.mesh_crypto import canonical_json
|
||||||
@@ -142,6 +189,134 @@ def test_refresh_node_peer_store_suppresses_clearnet_seed_by_default(tmp_path, m
|
|||||||
assert store.records_for_bucket("sync") == []
|
assert store.records_for_bucket("sync") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_refresh_node_peer_store_prunes_persisted_clearnet_records_in_private_mode(tmp_path, monkeypatch):
|
||||||
|
import main
|
||||||
|
from services.config import get_settings
|
||||||
|
from services.mesh import mesh_peer_store as peer_store_mod
|
||||||
|
|
||||||
|
peer_store_path = tmp_path / "peer_store.json"
|
||||||
|
monkeypatch.setattr(peer_store_mod, "DEFAULT_PEER_STORE_PATH", peer_store_path)
|
||||||
|
store = peer_store_mod.PeerStore(peer_store_path)
|
||||||
|
store.upsert(
|
||||||
|
peer_store_mod.make_bootstrap_peer_record(
|
||||||
|
peer_url="https://node.shadowbroker.info",
|
||||||
|
transport="clearnet",
|
||||||
|
role="seed",
|
||||||
|
signer_id="shadowbroker-default",
|
||||||
|
now=1_749_999_900,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
store.upsert(
|
||||||
|
peer_store_mod.make_sync_peer_record(
|
||||||
|
peer_url="https://node.shadowbroker.info",
|
||||||
|
transport="clearnet",
|
||||||
|
role="seed",
|
||||||
|
source="bundle",
|
||||||
|
now=1_749_999_900,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
store.upsert(
|
||||||
|
peer_store_mod.make_push_peer_record(
|
||||||
|
peer_url="https://node.shadowbroker.info",
|
||||||
|
transport="clearnet",
|
||||||
|
role="relay",
|
||||||
|
now=1_749_999_900,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
store.save()
|
||||||
|
|
||||||
|
onion_seed = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||||
|
monkeypatch.setenv("MESH_RELAY_PEERS", "")
|
||||||
|
monkeypatch.setenv("MESH_BOOTSTRAP_SEED_PEERS", onion_seed)
|
||||||
|
monkeypatch.setenv("MESH_DEFAULT_SYNC_PEERS", "")
|
||||||
|
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||||
|
monkeypatch.setenv("MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY", "")
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
try:
|
||||||
|
snapshot = main._refresh_node_peer_store(now=1_750_000_000)
|
||||||
|
store = peer_store_mod.PeerStore(peer_store_path)
|
||||||
|
store.load()
|
||||||
|
finally:
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
assert snapshot["private_transport_required"] is True
|
||||||
|
assert snapshot["pruned_clearnet_peer_count"] == 3
|
||||||
|
assert [record.peer_url for record in store.records()] == [onion_seed, onion_seed]
|
||||||
|
assert {record.bucket for record in store.records()} == {"bootstrap", "sync"}
|
||||||
|
assert all(record.transport == "onion" for record in store.records())
|
||||||
|
|
||||||
|
|
||||||
|
def test_infonet_peer_url_filter_excludes_clearnet_in_private_mode(monkeypatch):
|
||||||
|
import main
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert main._filter_infonet_peer_urls(
|
||||||
|
[
|
||||||
|
"https://node.shadowbroker.info",
|
||||||
|
"http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000",
|
||||||
|
]
|
||||||
|
) == ["http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"]
|
||||||
|
finally:
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
def test_public_sync_cycle_backs_off_on_429_retry_after(tmp_path, monkeypatch):
|
||||||
|
import time
|
||||||
|
|
||||||
|
import main
|
||||||
|
from services.config import get_settings
|
||||||
|
from services.mesh import mesh_peer_store as peer_store_mod
|
||||||
|
|
||||||
|
peer_store_path = tmp_path / "peer_store.json"
|
||||||
|
monkeypatch.setattr(peer_store_mod, "DEFAULT_PEER_STORE_PATH", peer_store_path)
|
||||||
|
onion_seed = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||||
|
store = peer_store_mod.PeerStore(peer_store_path)
|
||||||
|
store.upsert(
|
||||||
|
peer_store_mod.make_sync_peer_record(
|
||||||
|
peer_url=onion_seed,
|
||||||
|
transport="onion",
|
||||||
|
role="seed",
|
||||||
|
source="bundle",
|
||||||
|
now=1_750_000_000,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
store.save()
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_INFONET_ALLOW_CLEARNET_SYNC", raising=False)
|
||||||
|
monkeypatch.setenv("MESH_SYNC_FAILURE_BACKOFF_S", "60")
|
||||||
|
monkeypatch.setenv("MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S", "15")
|
||||||
|
get_settings.cache_clear()
|
||||||
|
monkeypatch.setattr(main, "_participant_node_enabled", lambda: True)
|
||||||
|
monkeypatch.setattr(main, "_ensure_infonet_private_transport_ready", lambda reason="": True)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
main,
|
||||||
|
"_sync_from_peer",
|
||||||
|
lambda peer_url: (_ for _ in ()).throw(
|
||||||
|
main.PeerSyncHTTPError(429, "rate limited", retry_after_s=180)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
main.set_sync_state(main.SyncWorkerState())
|
||||||
|
|
||||||
|
try:
|
||||||
|
before = int(time.time())
|
||||||
|
state = main._run_public_sync_cycle()
|
||||||
|
store = peer_store_mod.PeerStore(peer_store_path)
|
||||||
|
store.load()
|
||||||
|
finally:
|
||||||
|
get_settings.cache_clear()
|
||||||
|
main.set_sync_state(main.SyncWorkerState())
|
||||||
|
|
||||||
|
record = store.records_for_bucket("sync")[0]
|
||||||
|
assert state.last_error == "HTTP 429: rate limited"
|
||||||
|
assert state.next_sync_due_at >= before + 180
|
||||||
|
assert record.cooldown_until >= before + 180
|
||||||
|
|
||||||
|
|
||||||
def test_verify_peer_push_hmac_requires_allowlisted_peer(monkeypatch):
|
def test_verify_peer_push_hmac_requires_allowlisted_peer(monkeypatch):
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
@@ -225,3 +400,29 @@ def test_public_sync_cycle_allows_first_node_without_peers(tmp_path, monkeypatch
|
|||||||
assert result.last_error == ""
|
assert result.last_error == ""
|
||||||
assert result.last_peer_url == ""
|
assert result.last_peer_url == ""
|
||||||
assert result.consecutive_failures == 0
|
assert result.consecutive_failures == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_headless_mesh_node_runtime_is_explicit(monkeypatch):
|
||||||
|
import main
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "_MESH_ONLY", True)
|
||||||
|
monkeypatch.setattr(main, "_HEADLESS_MESH_NODE_RUNTIME", False)
|
||||||
|
assert main._infonet_node_runtime_requested() is False
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "_HEADLESS_MESH_NODE_RUNTIME", True)
|
||||||
|
assert main._infonet_node_runtime_requested() is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_meshnode_scripts_enable_private_hashchain_runtime():
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
root = Path(__file__).resolve().parents[3]
|
||||||
|
bat = (root / "meshnode.bat").read_text(encoding="utf-8")
|
||||||
|
sh = (root / "meshnode.sh").read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
for script in (bat, sh):
|
||||||
|
assert "SHADOWBROKER_MESH_NODE_RUNTIME=true" in script
|
||||||
|
assert "MESH_INFONET_ALLOW_CLEARNET_SYNC=false" in script
|
||||||
|
assert "MESH_ARTI_ENABLED=true" in script
|
||||||
|
assert "MESH_DM_HASHCHAIN_SPOOL_LIMIT=2" in script
|
||||||
|
assert "gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000" in script
|
||||||
|
|||||||
@@ -0,0 +1,213 @@
|
|||||||
|
import base64
|
||||||
|
import time
|
||||||
|
|
||||||
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||||
|
|
||||||
|
from services.config import get_settings
|
||||||
|
from services.mesh import mesh_crypto, mesh_dm_relay, mesh_hashchain, mesh_protocol, mesh_secure_storage
|
||||||
|
|
||||||
|
|
||||||
|
def _keypair():
|
||||||
|
private_key = ed25519.Ed25519PrivateKey.generate()
|
||||||
|
public_raw = private_key.public_key().public_bytes(
|
||||||
|
encoding=serialization.Encoding.Raw,
|
||||||
|
format=serialization.PublicFormat.Raw,
|
||||||
|
)
|
||||||
|
public_key = base64.b64encode(public_raw).decode("utf-8")
|
||||||
|
node_id = mesh_crypto.derive_node_id(public_key)
|
||||||
|
return private_key, public_key, node_id
|
||||||
|
|
||||||
|
|
||||||
|
def _payload(recipient_id: str = "recipient-a", msg_id: str = "dm-1") -> dict:
|
||||||
|
return mesh_protocol.normalize_payload(
|
||||||
|
"dm_message",
|
||||||
|
{
|
||||||
|
"recipient_id": recipient_id,
|
||||||
|
"delivery_class": "request",
|
||||||
|
"recipient_token": "",
|
||||||
|
"ciphertext": base64.b64encode(f"cipher-{msg_id}".encode("utf-8")).decode("ascii"),
|
||||||
|
"msg_id": msg_id,
|
||||||
|
"timestamp": int(time.time()),
|
||||||
|
"format": "mls1",
|
||||||
|
"transport_lock": "private_strong",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _signature(private_key, node_id: str, sequence: int, payload: dict) -> str:
|
||||||
|
signature_payload = mesh_crypto.build_signature_payload(
|
||||||
|
event_type="dm_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
return private_key.sign(signature_payload.encode("utf-8")).hex()
|
||||||
|
|
||||||
|
|
||||||
|
def _fresh_infonet(tmp_path, monkeypatch) -> mesh_hashchain.Infonet:
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "DATA_DIR", tmp_path)
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "CHAIN_FILE", tmp_path / "infonet.json")
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "WAL_FILE", tmp_path / "infonet.wal")
|
||||||
|
return mesh_hashchain.Infonet()
|
||||||
|
|
||||||
|
|
||||||
|
def _fresh_relay(tmp_path, monkeypatch) -> mesh_dm_relay.DMRelay:
|
||||||
|
monkeypatch.setattr(mesh_dm_relay, "DATA_DIR", tmp_path)
|
||||||
|
monkeypatch.setattr(mesh_dm_relay, "RELAY_FILE", tmp_path / "dm_relay.json")
|
||||||
|
monkeypatch.setattr(mesh_secure_storage, "DATA_DIR", tmp_path)
|
||||||
|
monkeypatch.setattr(mesh_secure_storage, "MASTER_KEY_FILE", tmp_path / "wormhole_secure_store.key")
|
||||||
|
get_settings.cache_clear()
|
||||||
|
return mesh_dm_relay.DMRelay()
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_dm_hashchain_spools_two_ciphertexts_per_recipient_from_distinct_senders(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
senders = [_keypair(), _keypair()]
|
||||||
|
|
||||||
|
for idx, (private_key, public_key, node_id) in enumerate(senders, start=1):
|
||||||
|
payload = _payload(msg_id=f"dm-{idx}")
|
||||||
|
event = inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
signature=_signature(private_key, node_id, 1, payload),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=float(payload["timestamp"]),
|
||||||
|
)
|
||||||
|
assert event["event_type"] == "dm_message"
|
||||||
|
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
third = _payload(msg_id="dm-3")
|
||||||
|
try:
|
||||||
|
inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=third,
|
||||||
|
signature=_signature(private_key, node_id, 1, third),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=float(third["timestamp"]),
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "spool full" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("third DM spool event was accepted")
|
||||||
|
|
||||||
|
for _private_key, _public_key, sender_node_id in senders:
|
||||||
|
assert inf.sequence_domains[f"{sender_node_id}|dm_message"] == 1
|
||||||
|
assert inf.validate_chain(verify_signatures=True)[0] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_dm_hashchain_limits_one_active_spool_per_sender_recipient_pair(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
|
||||||
|
first = _payload(msg_id="dm-1")
|
||||||
|
inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=first,
|
||||||
|
signature=_signature(private_key, node_id, 1, first),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=float(first["timestamp"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
second = _payload(msg_id="dm-2")
|
||||||
|
try:
|
||||||
|
inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=second,
|
||||||
|
signature=_signature(private_key, node_id, 2, second),
|
||||||
|
sequence=2,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=float(second["timestamp"]),
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "sender spool full" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("second DM from same sender to same recipient was accepted")
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_dm_hashchain_rejects_plaintext(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _payload()
|
||||||
|
payload["message"] = "plaintext"
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
signature=_signature(private_key, node_id, 1, _payload()),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "plaintext" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("private DM append accepted plaintext")
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_dm_hashchain_rejects_non_sealed_ciphertext_shape(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _payload()
|
||||||
|
payload["ciphertext"] = "not sealed plaintext"
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
signature=_signature(private_key, node_id, 1, payload),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "sealed bytes" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("private DM append accepted non-base64 ciphertext")
|
||||||
|
|
||||||
|
|
||||||
|
def test_hydrate_dm_relay_from_chain_delivers_to_poll_claim(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path / "chain", monkeypatch)
|
||||||
|
relay = _fresh_relay(tmp_path / "relay", monkeypatch)
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "infonet", inf)
|
||||||
|
monkeypatch.setattr(mesh_dm_relay, "dm_relay", relay)
|
||||||
|
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _payload(recipient_id="recipient-a", msg_id="dm-chain-1")
|
||||||
|
event = inf.append_private_dm_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
signature=_signature(private_key, node_id, 1, payload),
|
||||||
|
sequence=1,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=float(payload["timestamp"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
from main import _hydrate_dm_relay_from_chain
|
||||||
|
|
||||||
|
assert _hydrate_dm_relay_from_chain([event]) == 1
|
||||||
|
messages, more = relay.collect_claims(
|
||||||
|
"recipient-a",
|
||||||
|
[{"type": "requests", "token": "recipient-request-token"}],
|
||||||
|
limit=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert more is False
|
||||||
|
assert [message["msg_id"] for message in messages] == ["dm-chain-1"]
|
||||||
|
assert messages[0]["ciphertext"] == payload["ciphertext"]
|
||||||
@@ -0,0 +1,269 @@
|
|||||||
|
import base64
|
||||||
|
|
||||||
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||||
|
|
||||||
|
from services.mesh import mesh_crypto, mesh_hashchain, mesh_protocol
|
||||||
|
|
||||||
|
|
||||||
|
def _keypair():
|
||||||
|
private_key = ed25519.Ed25519PrivateKey.generate()
|
||||||
|
public_raw = private_key.public_key().public_bytes(
|
||||||
|
encoding=serialization.Encoding.Raw,
|
||||||
|
format=serialization.PublicFormat.Raw,
|
||||||
|
)
|
||||||
|
public_key = base64.b64encode(public_raw).decode("utf-8")
|
||||||
|
node_id = mesh_crypto.derive_node_id(public_key)
|
||||||
|
return private_key, public_key, node_id
|
||||||
|
|
||||||
|
|
||||||
|
def _sign(private_key, *, event_type: str, node_id: str, sequence: int, payload: dict) -> str:
|
||||||
|
signature_payload = mesh_crypto.build_signature_payload(
|
||||||
|
event_type=event_type,
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
return private_key.sign(signature_payload.encode("utf-8")).hex()
|
||||||
|
|
||||||
|
|
||||||
|
def _message_payload(text: str) -> dict:
|
||||||
|
return mesh_protocol.normalize_payload(
|
||||||
|
"message",
|
||||||
|
{
|
||||||
|
"message": text,
|
||||||
|
"destination": "broadcast",
|
||||||
|
"channel": "LongFast",
|
||||||
|
"priority": "normal",
|
||||||
|
"ephemeral": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _gate_payload(gate_id: str = "ops-gate", *, epoch: int = 2, plaintext: bool = False) -> dict:
|
||||||
|
payload = {
|
||||||
|
"gate": gate_id,
|
||||||
|
"ciphertext": base64.b64encode(b"encrypted-gate-ciphertext").decode("ascii"),
|
||||||
|
"nonce": base64.b64encode(b"nonce-value-1234").decode("ascii"),
|
||||||
|
"sender_ref": "sender-ref-1",
|
||||||
|
"format": "mls1",
|
||||||
|
"transport_lock": "private_strong",
|
||||||
|
}
|
||||||
|
if epoch > 0:
|
||||||
|
payload["epoch"] = epoch
|
||||||
|
if plaintext:
|
||||||
|
payload["message"] = "this must never land on the chain"
|
||||||
|
return mesh_protocol.normalize_payload("gate_message", payload) if not plaintext else payload
|
||||||
|
|
||||||
|
|
||||||
|
def _gate_event(
|
||||||
|
private_key,
|
||||||
|
public_key: str,
|
||||||
|
node_id: str,
|
||||||
|
*,
|
||||||
|
sequence: int,
|
||||||
|
prev_hash: str,
|
||||||
|
payload: dict,
|
||||||
|
signature_payload: dict | None = None,
|
||||||
|
) -> dict:
|
||||||
|
signature = _sign(
|
||||||
|
private_key,
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=signature_payload or payload,
|
||||||
|
)
|
||||||
|
return mesh_hashchain.ChainEvent(
|
||||||
|
prev_hash=prev_hash,
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
timestamp=1234.0 + sequence,
|
||||||
|
sequence=sequence,
|
||||||
|
signature=signature,
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
network_id=mesh_protocol.NETWORK_ID,
|
||||||
|
).to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def _fresh_infonet(tmp_path, monkeypatch) -> mesh_hashchain.Infonet:
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "DATA_DIR", tmp_path)
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "CHAIN_FILE", tmp_path / "infonet.json")
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "WAL_FILE", tmp_path / "infonet.wal")
|
||||||
|
return mesh_hashchain.Infonet()
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_gate_fork_uses_gate_sequence_domain_and_signature_variants(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
|
||||||
|
public_payload = _message_payload("public prefix")
|
||||||
|
public_event = inf.append(
|
||||||
|
event_type="message",
|
||||||
|
node_id=node_id,
|
||||||
|
payload=public_payload,
|
||||||
|
sequence=1,
|
||||||
|
signature=_sign(
|
||||||
|
private_key,
|
||||||
|
event_type="message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=1,
|
||||||
|
payload=public_payload,
|
||||||
|
),
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
|
||||||
|
gate_payload = _gate_payload(epoch=3)
|
||||||
|
signature_payload = dict(gate_payload)
|
||||||
|
signature_payload.pop("epoch", None)
|
||||||
|
gate_event = _gate_event(
|
||||||
|
private_key,
|
||||||
|
public_key,
|
||||||
|
node_id,
|
||||||
|
sequence=1,
|
||||||
|
prev_hash=public_event["event_id"],
|
||||||
|
payload=gate_payload,
|
||||||
|
signature_payload=signature_payload,
|
||||||
|
)
|
||||||
|
|
||||||
|
ok, reason = inf.apply_fork([gate_event], gate_event["event_id"], proof_count=2, quorum=2)
|
||||||
|
|
||||||
|
assert ok is True, reason
|
||||||
|
assert inf.events[-1]["event_type"] == "gate_message"
|
||||||
|
assert inf.node_sequences[node_id] == 1
|
||||||
|
assert inf.sequence_domains[f"{node_id}|gate_message"] == 1
|
||||||
|
assert inf.validate_chain(verify_signatures=True)[0] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_gate_fork_rejects_plaintext_payload(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
|
||||||
|
public_payload = _message_payload("public prefix")
|
||||||
|
public_event = inf.append(
|
||||||
|
event_type="message",
|
||||||
|
node_id=node_id,
|
||||||
|
payload=public_payload,
|
||||||
|
sequence=1,
|
||||||
|
signature=_sign(
|
||||||
|
private_key,
|
||||||
|
event_type="message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=1,
|
||||||
|
payload=public_payload,
|
||||||
|
),
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
|
||||||
|
plaintext_payload = _gate_payload(plaintext=True)
|
||||||
|
gate_event = _gate_event(
|
||||||
|
private_key,
|
||||||
|
public_key,
|
||||||
|
node_id,
|
||||||
|
sequence=1,
|
||||||
|
prev_hash=public_event["event_id"],
|
||||||
|
payload=plaintext_payload,
|
||||||
|
)
|
||||||
|
|
||||||
|
ok, reason = inf.apply_fork([gate_event], gate_event["event_id"], proof_count=2, quorum=2)
|
||||||
|
|
||||||
|
assert ok is False
|
||||||
|
assert "normalized" in reason or "plaintext" in reason
|
||||||
|
assert len(inf.events) == 1
|
||||||
|
assert "gate_message" not in inf.get_info()["event_types"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_private_gate_message_rejects_plaintext_before_normalizing(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _gate_payload()
|
||||||
|
payload["message"] = "plaintext should not be silently dropped"
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf.append_private_gate_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
sequence=1,
|
||||||
|
signature=_sign(
|
||||||
|
private_key,
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=1,
|
||||||
|
payload=_gate_payload(),
|
||||||
|
),
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "plaintext" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("private gate append accepted plaintext")
|
||||||
|
|
||||||
|
assert inf.events == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_private_gate_message_requires_private_strong_transport_lock(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _gate_payload()
|
||||||
|
payload.pop("transport_lock", None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf.append_private_gate_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
sequence=1,
|
||||||
|
signature=_sign(
|
||||||
|
private_key,
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=1,
|
||||||
|
payload=_gate_payload(),
|
||||||
|
),
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "private_strong" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("private gate append accepted missing transport_lock")
|
||||||
|
|
||||||
|
assert inf.events == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_private_gate_message_rejects_non_sealed_ciphertext_shape(tmp_path, monkeypatch):
|
||||||
|
inf = _fresh_infonet(tmp_path, monkeypatch)
|
||||||
|
private_key, public_key, node_id = _keypair()
|
||||||
|
payload = _gate_payload()
|
||||||
|
payload["ciphertext"] = "not sealed plaintext"
|
||||||
|
|
||||||
|
try:
|
||||||
|
inf.append_private_gate_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
sequence=1,
|
||||||
|
signature=_sign(
|
||||||
|
private_key,
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=1,
|
||||||
|
payload=payload,
|
||||||
|
),
|
||||||
|
public_key=public_key,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
assert "sealed bytes" in str(exc)
|
||||||
|
else:
|
||||||
|
raise AssertionError("private gate append accepted non-base64 ciphertext")
|
||||||
|
|
||||||
|
assert inf.events == []
|
||||||
@@ -1,14 +1,12 @@
|
|||||||
"""S14B Public Sync Gate Event Filter.
|
"""S14B private sync gate event policy.
|
||||||
|
|
||||||
Tests:
|
Private Infonet sync carries encrypted gate_message ledger events. If a node
|
||||||
- GET /api/mesh/infonet/sync excludes gate_message when local infonet contains legacy gate_message plus public events
|
is configured to allow clearnet-compatible sync, those gate events are filtered
|
||||||
- POST /api/mesh/infonet/sync excludes gate_message under the same condition
|
out of the sync response.
|
||||||
- Both main app and router-served paths are covered
|
|
||||||
- Non-gate public redactions still hold (vote gate label stripped, key_rotate identity stripped)
|
|
||||||
- Do not overclaim that gate_message is removed from historical infonet storage or ingest
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from starlette.requests import Request
|
from starlette.requests import Request
|
||||||
@@ -17,9 +15,6 @@ import main
|
|||||||
from services.mesh import mesh_hashchain
|
from services.mesh import mesh_hashchain
|
||||||
|
|
||||||
|
|
||||||
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _message_event() -> dict:
|
def _message_event() -> dict:
|
||||||
return {
|
return {
|
||||||
"event_id": "msg-1",
|
"event_id": "msg-1",
|
||||||
@@ -83,6 +78,7 @@ def _gate_message_event() -> dict:
|
|||||||
"nonce": "nonce-1",
|
"nonce": "nonce-1",
|
||||||
"sender_ref": "sender-ref-1",
|
"sender_ref": "sender-ref-1",
|
||||||
"format": "mls1",
|
"format": "mls1",
|
||||||
|
"transport_lock": "private_strong",
|
||||||
},
|
},
|
||||||
"timestamp": 103.0,
|
"timestamp": 103.0,
|
||||||
"sequence": 4,
|
"sequence": 4,
|
||||||
@@ -93,9 +89,31 @@ def _gate_message_event() -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class _FakeInfonet:
|
def _dm_message_event() -> dict:
|
||||||
"""Minimal fake infonet with a gate_message among public events."""
|
return {
|
||||||
|
"event_id": "dm-1",
|
||||||
|
"event_type": "dm_message",
|
||||||
|
"node_id": "!node-5",
|
||||||
|
"payload": {
|
||||||
|
"recipient_id": "recipient-a",
|
||||||
|
"delivery_class": "request",
|
||||||
|
"recipient_token": "",
|
||||||
|
"ciphertext": base64.b64encode(b"sealed-dm-ciphertext").decode("ascii"),
|
||||||
|
"msg_id": "dm-1",
|
||||||
|
"timestamp": 104,
|
||||||
|
"format": "mls1",
|
||||||
|
"transport_lock": "private_strong",
|
||||||
|
},
|
||||||
|
"timestamp": 104.0,
|
||||||
|
"sequence": 5,
|
||||||
|
"signature": "sig",
|
||||||
|
"public_key": "pub",
|
||||||
|
"public_key_algo": "Ed25519",
|
||||||
|
"protocol_version": "infonet/2",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeInfonet:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.head_hash = "head-1"
|
self.head_hash = "head-1"
|
||||||
self.events = [
|
self.events = [
|
||||||
@@ -113,12 +131,10 @@ class _FakeInfonet:
|
|||||||
return int(getattr(limit, "default", 100) or 100)
|
return int(getattr(limit, "default", 100) or 100)
|
||||||
|
|
||||||
def get_events_after(self, after_hash: str, limit=100):
|
def get_events_after(self, after_hash: str, limit=100):
|
||||||
resolved = self._limit_value(limit)
|
return [dict(e) for e in self.events[: self._limit_value(limit)]]
|
||||||
return [dict(e) for e in self.events[:resolved]]
|
|
||||||
|
|
||||||
def get_events_after_locator(self, locator: list[str], limit=100):
|
def get_events_after_locator(self, locator: list[str], limit=100):
|
||||||
resolved = self._limit_value(limit)
|
return self.head_hash, 0, [dict(e) for e in self.events[: self._limit_value(limit)]]
|
||||||
return self.head_hash, 0, [dict(e) for e in self.events[:resolved]]
|
|
||||||
|
|
||||||
def get_merkle_proofs(self, start_index: int, count: int):
|
def get_merkle_proofs(self, start_index: int, count: int):
|
||||||
return {"root": "merkle-root", "total": len(self.events), "start": start_index, "proofs": []}
|
return {"root": "merkle-root", "total": len(self.events), "start": start_index, "proofs": []}
|
||||||
@@ -127,7 +143,7 @@ class _FakeInfonet:
|
|||||||
return "merkle-root"
|
return "merkle-root"
|
||||||
|
|
||||||
|
|
||||||
def _json_request(path: str, body: dict) -> Request:
|
def _json_request(path: str, body: dict, *, client_host: str = "127.0.0.1", headers: dict[str, str] | None = None) -> Request:
|
||||||
payload = json.dumps(body).encode("utf-8")
|
payload = json.dumps(body).encode("utf-8")
|
||||||
sent = {"value": False}
|
sent = {"value": False}
|
||||||
|
|
||||||
@@ -137,11 +153,14 @@ def _json_request(path: str, body: dict) -> Request:
|
|||||||
sent["value"] = True
|
sent["value"] = True
|
||||||
return {"type": "http.request", "body": payload, "more_body": False}
|
return {"type": "http.request", "body": payload, "more_body": False}
|
||||||
|
|
||||||
|
raw_headers = [(b"content-type", b"application/json")]
|
||||||
|
for key, value in dict(headers or {}).items():
|
||||||
|
raw_headers.append((key.lower().encode("ascii"), str(value).encode("ascii")))
|
||||||
return Request(
|
return Request(
|
||||||
{
|
{
|
||||||
"type": "http",
|
"type": "http",
|
||||||
"headers": [(b"content-type", b"application/json")],
|
"headers": raw_headers,
|
||||||
"client": ("test", 12345),
|
"client": (client_host, 12345),
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"path": path,
|
"path": path,
|
||||||
},
|
},
|
||||||
@@ -149,20 +168,15 @@ def _json_request(path: str, body: dict) -> Request:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_request(path: str) -> Request:
|
def _get_request(path: str, *, client_host: str = "127.0.0.1", headers: dict[str, str] | None = None) -> Request:
|
||||||
sent = {"value": False}
|
|
||||||
|
|
||||||
async def receive():
|
async def receive():
|
||||||
if sent["value"]:
|
|
||||||
return {"type": "http.request", "body": b"", "more_body": False}
|
|
||||||
sent["value"] = True
|
|
||||||
return {"type": "http.request", "body": b"", "more_body": False}
|
return {"type": "http.request", "body": b"", "more_body": False}
|
||||||
|
|
||||||
return Request(
|
return Request(
|
||||||
{
|
{
|
||||||
"type": "http",
|
"type": "http",
|
||||||
"headers": [],
|
"headers": [(key.lower().encode("ascii"), str(value).encode("ascii")) for key, value in dict(headers or {}).items()],
|
||||||
"client": ("test", 12345),
|
"client": (client_host, 12345),
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"path": path,
|
"path": path,
|
||||||
},
|
},
|
||||||
@@ -170,120 +184,166 @@ def _get_request(path: str) -> Request:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ── GET sync excludes gate_message (main app) ──────────────────────────
|
def _force_private_sync(monkeypatch):
|
||||||
|
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||||
|
monkeypatch.setattr(main, "_request_appears_private_infonet_transport", lambda request: True)
|
||||||
|
|
||||||
|
|
||||||
def test_get_sync_excludes_gate_message(client, monkeypatch):
|
def _force_private_policy_only(monkeypatch):
|
||||||
"""GET /api/mesh/infonet/sync must not return gate_message events."""
|
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: True)
|
||||||
|
|
||||||
|
|
||||||
|
def _force_clearnet_sync(monkeypatch):
|
||||||
|
monkeypatch.setattr(main, "_infonet_private_transport_required", lambda: False)
|
||||||
|
|
||||||
|
|
||||||
|
def _event_types(events: list[dict]) -> list[str]:
|
||||||
|
return [str(e.get("event_type", "")) for e in events]
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_sync_redacts_private_events_from_exposed_clearnet_request(monkeypatch):
|
||||||
|
_force_private_policy_only(monkeypatch)
|
||||||
|
request = _get_request("/api/mesh/infonet/sync", client_host="203.0.113.10")
|
||||||
|
|
||||||
|
events = main._infonet_sync_response_events(
|
||||||
|
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _event_types(events) == ["message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_sync_includes_private_events_for_loopback_request(monkeypatch):
|
||||||
|
_force_private_policy_only(monkeypatch)
|
||||||
|
request = _get_request("/api/mesh/infonet/sync", client_host="127.0.0.1")
|
||||||
|
|
||||||
|
events = main._infonet_sync_response_events(
|
||||||
|
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _event_types(events) == ["message", "gate_message", "dm_message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_sync_redacts_private_events_when_forwarded_for_is_clearnet(monkeypatch):
|
||||||
|
_force_private_policy_only(monkeypatch)
|
||||||
|
request = _get_request(
|
||||||
|
"/api/mesh/infonet/sync",
|
||||||
|
client_host="127.0.0.1",
|
||||||
|
headers={"x-forwarded-for": "198.51.100.44"},
|
||||||
|
)
|
||||||
|
|
||||||
|
events = main._infonet_sync_response_events(
|
||||||
|
[_message_event(), _gate_message_event(), _dm_message_event()],
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _event_types(events) == ["message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_sync_includes_gate_message_on_private_transport(client, monkeypatch):
|
||||||
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
resp = client.get("/api/mesh/infonet/sync")
|
|
||||||
data = resp.json()
|
data = client.get("/api/mesh/infonet/sync").json()
|
||||||
event_types = [e["event_type"] for e in data["events"]]
|
|
||||||
assert "gate_message" not in event_types
|
assert "gate_message" in _event_types(data["events"])
|
||||||
assert "message" in event_types
|
assert data["count"] == 4
|
||||||
assert "vote" in event_types
|
|
||||||
assert "key_rotate" in event_types
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_sync_count_excludes_gate_message(client, monkeypatch):
|
def test_post_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||||
"""GET sync count field must reflect filtered events (gate_message excluded)."""
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
resp = client.get("/api/mesh/infonet/sync")
|
|
||||||
data = resp.json()
|
|
||||||
assert data["count"] == 3 # message, vote, key_rotate — not gate_message
|
|
||||||
|
|
||||||
|
|
||||||
# ── POST sync excludes gate_message (main app) ─────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_post_sync_excludes_gate_message(monkeypatch):
|
|
||||||
"""POST /api/mesh/infonet/sync must not return gate_message events."""
|
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
|
||||||
result = asyncio.run(
|
result = asyncio.run(
|
||||||
main.infonet_sync_post(
|
main.infonet_sync_post(
|
||||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
event_types = [e["event_type"] for e in result["events"]]
|
|
||||||
assert "gate_message" not in event_types
|
assert "gate_message" in _event_types(result["events"])
|
||||||
assert "message" in event_types
|
assert result["count"] == 4
|
||||||
assert "vote" in event_types
|
|
||||||
assert "key_rotate" in event_types
|
|
||||||
|
|
||||||
|
|
||||||
def test_post_sync_count_excludes_gate_message(monkeypatch):
|
def test_router_get_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||||
"""POST sync count field must reflect filtered events."""
|
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
|
||||||
result = asyncio.run(
|
|
||||||
main.infonet_sync_post(
|
|
||||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
|
||||||
)
|
|
||||||
)
|
|
||||||
assert result["count"] == 3
|
|
||||||
|
|
||||||
|
|
||||||
# ── Router-served paths ────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_router_get_sync_excludes_gate_message(monkeypatch):
|
|
||||||
"""Router GET /api/mesh/infonet/sync must not return gate_message."""
|
|
||||||
from routers.mesh_public import infonet_sync
|
from routers.mesh_public import infonet_sync
|
||||||
|
|
||||||
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
|
|
||||||
result = asyncio.run(infonet_sync(_get_request("/api/mesh/infonet/sync")))
|
result = asyncio.run(infonet_sync(_get_request("/api/mesh/infonet/sync")))
|
||||||
event_types = [e["event_type"] for e in result["events"]]
|
|
||||||
assert "gate_message" not in event_types
|
assert "gate_message" in _event_types(result["events"])
|
||||||
assert "message" in event_types
|
assert result["count"] == len(result["events"])
|
||||||
assert data_count_matches(result)
|
|
||||||
|
|
||||||
|
|
||||||
def test_router_post_sync_excludes_gate_message(monkeypatch):
|
def test_router_post_sync_includes_gate_message_on_private_transport(monkeypatch):
|
||||||
"""Router POST /api/mesh/infonet/sync must not return gate_message."""
|
|
||||||
from routers.mesh_public import infonet_sync_post
|
from routers.mesh_public import infonet_sync_post
|
||||||
|
|
||||||
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
|
|
||||||
result = asyncio.run(
|
result = asyncio.run(
|
||||||
infonet_sync_post(
|
infonet_sync_post(
|
||||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
event_types = [e["event_type"] for e in result["events"]]
|
|
||||||
assert "gate_message" not in event_types
|
assert "gate_message" in _event_types(result["events"])
|
||||||
assert "message" in event_types
|
assert result["count"] == len(result["events"])
|
||||||
assert data_count_matches(result)
|
|
||||||
|
|
||||||
|
|
||||||
def data_count_matches(result: dict) -> bool:
|
def test_get_sync_excludes_gate_message_when_clearnet_sync_allowed(client, monkeypatch):
|
||||||
return result["count"] == len(result["events"])
|
_force_clearnet_sync(monkeypatch)
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
|
|
||||||
|
data = client.get("/api/mesh/infonet/sync").json()
|
||||||
|
|
||||||
|
assert "gate_message" not in _event_types(data["events"])
|
||||||
|
assert data["count"] == 3
|
||||||
|
|
||||||
|
|
||||||
# ── Non-gate redactions still hold ─────────────────────────────────────
|
def test_post_sync_excludes_gate_message_when_clearnet_sync_allowed(monkeypatch):
|
||||||
|
_force_clearnet_sync(monkeypatch)
|
||||||
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
main.infonet_sync_post(
|
||||||
|
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "gate_message" not in _event_types(result["events"])
|
||||||
|
assert result["count"] == 3
|
||||||
|
|
||||||
|
|
||||||
def test_get_sync_still_redacts_vote_gate_label(client, monkeypatch):
|
def test_get_sync_still_redacts_vote_gate_label(client, monkeypatch):
|
||||||
"""Public sync must still strip gate label from vote payload."""
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
resp = client.get("/api/mesh/infonet/sync")
|
|
||||||
events = resp.json()["events"]
|
events = client.get("/api/mesh/infonet/sync").json()["events"]
|
||||||
vote = next(e for e in events if e["event_type"] == "vote")
|
vote = next(e for e in events if e["event_type"] == "vote")
|
||||||
|
|
||||||
assert "gate" not in vote.get("payload", {})
|
assert "gate" not in vote.get("payload", {})
|
||||||
|
|
||||||
|
|
||||||
def test_get_sync_still_redacts_key_rotate_identity(client, monkeypatch):
|
def test_get_sync_still_redacts_key_rotate_identity(client, monkeypatch):
|
||||||
"""Public sync must still strip old identity fields from key_rotate payload."""
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
resp = client.get("/api/mesh/infonet/sync")
|
|
||||||
events = resp.json()["events"]
|
events = client.get("/api/mesh/infonet/sync").json()["events"]
|
||||||
rotate = next(e for e in events if e["event_type"] == "key_rotate")
|
rotate = next(e for e in events if e["event_type"] == "key_rotate")
|
||||||
payload = rotate.get("payload", {})
|
payload = rotate.get("payload", {})
|
||||||
|
|
||||||
assert "old_node_id" not in payload
|
assert "old_node_id" not in payload
|
||||||
assert "old_public_key" not in payload
|
assert "old_public_key" not in payload
|
||||||
assert "old_signature" not in payload
|
assert "old_signature" not in payload
|
||||||
|
|
||||||
|
|
||||||
def test_post_sync_still_redacts_vote_and_rotate(monkeypatch):
|
def test_post_sync_still_redacts_vote_and_rotate(monkeypatch):
|
||||||
"""POST sync must still apply standard public redactions to non-gate events."""
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _FakeInfonet(), raising=False)
|
||||||
|
|
||||||
result = asyncio.run(
|
result = asyncio.run(
|
||||||
main.infonet_sync_post(
|
main.infonet_sync_post(
|
||||||
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
_json_request("/api/mesh/infonet/sync", {"locator": ["head-1"]})
|
||||||
@@ -291,24 +351,17 @@ def test_post_sync_still_redacts_vote_and_rotate(monkeypatch):
|
|||||||
)
|
)
|
||||||
vote = next(e for e in result["events"] if e["event_type"] == "vote")
|
vote = next(e for e in result["events"] if e["event_type"] == "vote")
|
||||||
rotate = next(e for e in result["events"] if e["event_type"] == "key_rotate")
|
rotate = next(e for e in result["events"] if e["event_type"] == "key_rotate")
|
||||||
|
|
||||||
assert "gate" not in vote.get("payload", {})
|
assert "gate" not in vote.get("payload", {})
|
||||||
assert "old_node_id" not in rotate.get("payload", {})
|
assert "old_node_id" not in rotate.get("payload", {})
|
||||||
|
|
||||||
|
|
||||||
# ── No overclaim ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_gate_message_still_in_fake_infonet_storage():
|
def test_gate_message_still_in_fake_infonet_storage():
|
||||||
"""The filter does NOT remove gate_message from underlying storage.
|
|
||||||
This test documents that the infonet still holds gate_message events;
|
|
||||||
only the public sync response surface filters them out."""
|
|
||||||
fake = _FakeInfonet()
|
fake = _FakeInfonet()
|
||||||
all_types = [e["event_type"] for e in fake.events]
|
assert "gate_message" in _event_types(fake.events)
|
||||||
assert "gate_message" in all_types
|
|
||||||
|
|
||||||
|
|
||||||
def test_sync_with_only_gate_messages_returns_empty(client, monkeypatch):
|
def test_private_sync_with_only_gate_messages_returns_gate_events(client, monkeypatch):
|
||||||
"""If infonet contains only gate_message events, sync returns empty list."""
|
|
||||||
class _GateOnlyInfonet:
|
class _GateOnlyInfonet:
|
||||||
head_hash = "head-1"
|
head_hash = "head-1"
|
||||||
events = [_gate_message_event()]
|
events = [_gate_message_event()]
|
||||||
@@ -325,8 +378,10 @@ def test_sync_with_only_gate_messages_returns_empty(client, monkeypatch):
|
|||||||
def get_merkle_root(self):
|
def get_merkle_root(self):
|
||||||
return "r"
|
return "r"
|
||||||
|
|
||||||
|
_force_private_sync(monkeypatch)
|
||||||
monkeypatch.setattr(mesh_hashchain, "infonet", _GateOnlyInfonet(), raising=False)
|
monkeypatch.setattr(mesh_hashchain, "infonet", _GateOnlyInfonet(), raising=False)
|
||||||
resp = client.get("/api/mesh/infonet/sync")
|
|
||||||
data = resp.json()
|
data = client.get("/api/mesh/infonet/sync").json()
|
||||||
assert data["events"] == []
|
|
||||||
assert data["count"] == 0
|
assert _event_types(data["events"]) == ["gate_message"]
|
||||||
|
assert data["count"] == 1
|
||||||
|
|||||||
@@ -66,6 +66,20 @@ def _make_gate_message_event(priv, pub_b64, node_id, sequence, prev_hash, gate_i
|
|||||||
return evt.to_dict()
|
return evt.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def _make_gate_payload(gate_id="test-gate") -> dict:
|
||||||
|
return mesh_protocol.normalize_payload(
|
||||||
|
"gate_message",
|
||||||
|
{
|
||||||
|
"gate": gate_id,
|
||||||
|
"ciphertext": base64.b64encode(b"encrypted-data").decode(),
|
||||||
|
"nonce": base64.b64encode(b"nonce-value-1234").decode(),
|
||||||
|
"sender_ref": "sender-abc",
|
||||||
|
"format": "mls1",
|
||||||
|
"transport_lock": "private_strong",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def fresh_env(tmp_path, monkeypatch):
|
def fresh_env(tmp_path, monkeypatch):
|
||||||
"""Set up isolated infonet + gate_store, return (infonet, gate_store)."""
|
"""Set up isolated infonet + gate_store, return (infonet, gate_store)."""
|
||||||
@@ -89,6 +103,74 @@ def fresh_env(tmp_path, monkeypatch):
|
|||||||
# ── Rejected gate_message must NOT hydrate gate_store ─────────────────────
|
# ── Rejected gate_message must NOT hydrate gate_store ─────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_private_gate_message_uses_hashchain_gate_sequence(fresh_env):
|
||||||
|
"""Local gate posts become private hashchain events in a gate sequence domain."""
|
||||||
|
inf, _gs = fresh_env
|
||||||
|
priv, pub_b64, node_id = _make_keypair()
|
||||||
|
sequence = 1
|
||||||
|
payload = _make_gate_payload("test-gate")
|
||||||
|
sig_payload = mesh_crypto.build_signature_payload(
|
||||||
|
event_type="gate_message",
|
||||||
|
node_id=node_id,
|
||||||
|
sequence=sequence,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
signature = priv.sign(sig_payload.encode("utf-8")).hex()
|
||||||
|
|
||||||
|
event = inf.append_private_gate_message(
|
||||||
|
node_id=node_id,
|
||||||
|
payload=payload,
|
||||||
|
signature=signature,
|
||||||
|
sequence=sequence,
|
||||||
|
public_key=pub_b64,
|
||||||
|
public_key_algo="Ed25519",
|
||||||
|
protocol_version=mesh_protocol.PROTOCOL_VERSION,
|
||||||
|
timestamp=123.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert event["event_type"] == "gate_message"
|
||||||
|
assert inf.head_hash == event["event_id"]
|
||||||
|
assert inf.sequence_domains[f"{node_id}|gate_message"] == sequence
|
||||||
|
assert inf.node_sequences.get(node_id, 0) == 0
|
||||||
|
assert event["payload"]["transport_lock"] == "private_strong"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_accepts_new_suffix_after_duplicate_prefix(fresh_env):
|
||||||
|
"""Peer-push batches may include events the receiver already has."""
|
||||||
|
inf, _gs = fresh_env
|
||||||
|
priv, pub_b64, node_id = _make_keypair()
|
||||||
|
evt1 = _make_gate_message_event(
|
||||||
|
priv,
|
||||||
|
pub_b64,
|
||||||
|
node_id,
|
||||||
|
sequence=1,
|
||||||
|
prev_hash=mesh_hashchain.GENESIS_HASH,
|
||||||
|
)
|
||||||
|
assert inf.ingest_events([evt1])["accepted"] == 1
|
||||||
|
evt2 = _make_gate_message_event(
|
||||||
|
priv,
|
||||||
|
pub_b64,
|
||||||
|
node_id,
|
||||||
|
sequence=2,
|
||||||
|
prev_hash=evt1["event_id"],
|
||||||
|
)
|
||||||
|
assert inf.ingest_events([evt2])["accepted"] == 1
|
||||||
|
evt3 = _make_gate_message_event(
|
||||||
|
priv,
|
||||||
|
pub_b64,
|
||||||
|
node_id,
|
||||||
|
sequence=3,
|
||||||
|
prev_hash=evt2["event_id"],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = inf.ingest_events([evt1, evt2, evt3])
|
||||||
|
|
||||||
|
assert result["duplicates"] == 2
|
||||||
|
assert result["accepted"] == 1
|
||||||
|
assert result["rejected"] == []
|
||||||
|
assert inf.head_hash == evt3["event_id"]
|
||||||
|
|
||||||
|
|
||||||
def test_rejected_event_does_not_hydrate_gate_store(fresh_env):
|
def test_rejected_event_does_not_hydrate_gate_store(fresh_env):
|
||||||
"""A gate_message rejected by ingest must not appear in gate_store."""
|
"""A gate_message rejected by ingest must not appear in gate_store."""
|
||||||
inf, gs = fresh_env
|
inf, gs = fresh_env
|
||||||
|
|||||||
@@ -0,0 +1,166 @@
|
|||||||
|
"""AIS upstream-connectivity telemetry.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
On 2026-05-23, stream.aisstream.io went fully offline (TCP timeouts on port
|
||||||
|
443). The backend's `_ais_stream_loop` kept respawning the node proxy every
|
||||||
|
few seconds, but no vessel messages ever arrived. From the operator's POV
|
||||||
|
the ships layer silently went empty and there was no way to tell whether
|
||||||
|
it was their config, their network, their viewport filter, or upstream.
|
||||||
|
|
||||||
|
The fix surfaces three signals from ``ais_proxy_status()``:
|
||||||
|
|
||||||
|
* ``connected`` — bool, true when we received a vessel message in the
|
||||||
|
last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||||
|
* ``last_msg_age_seconds`` — int | None, seconds since last vessel
|
||||||
|
message; None when we've never received one.
|
||||||
|
* ``proxy_spawn_count`` — int, how many times we've spawned the node
|
||||||
|
proxy. Sustained increase without ``connected`` means upstream is dead.
|
||||||
|
|
||||||
|
Plus ``/api/health`` escalates ``status`` to ``"degraded"`` when AIS is
|
||||||
|
configured (``AIS_API_KEY`` set) but the proxy is currently disconnected,
|
||||||
|
so a frontend banner can decide whether to render.
|
||||||
|
|
||||||
|
These tests pin every signal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_ais_module():
|
||||||
|
"""Reset module-level state so tests don't bleed into each other."""
|
||||||
|
from services import ais_stream as ais
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._proxy_status.clear()
|
||||||
|
ais._last_msg_at = 0.0
|
||||||
|
ais._proxy_spawn_count = 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestAisProxyStatusShape:
|
||||||
|
def test_fresh_module_reports_disconnected(self):
|
||||||
|
"""Before any vessel messages have arrived (e.g. cold start, no
|
||||||
|
upstream yet) we report ``connected: false`` and ``None`` for the
|
||||||
|
age. Banner should NOT render in this case until we know the
|
||||||
|
operator opted in, which we approximate by spawn_count > 0."""
|
||||||
|
_reset_ais_module()
|
||||||
|
from services.ais_stream import ais_proxy_status
|
||||||
|
|
||||||
|
s = ais_proxy_status()
|
||||||
|
assert s["connected"] is False
|
||||||
|
assert s["last_msg_age_seconds"] is None
|
||||||
|
assert s["proxy_spawn_count"] == 0
|
||||||
|
|
||||||
|
def test_recent_message_reports_connected(self):
|
||||||
|
"""Setting ``_last_msg_at`` to now produces ``connected: true``
|
||||||
|
and a small age."""
|
||||||
|
_reset_ais_module()
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._last_msg_at = time.time() - 5
|
||||||
|
s = ais.ais_proxy_status()
|
||||||
|
|
||||||
|
assert s["connected"] is True
|
||||||
|
assert s["last_msg_age_seconds"] is not None
|
||||||
|
assert 4 <= s["last_msg_age_seconds"] <= 7
|
||||||
|
|
||||||
|
def test_stale_message_reports_disconnected(self):
|
||||||
|
"""``_last_msg_at`` more than the freshness threshold ago means
|
||||||
|
``connected: false`` — this is the smoking gun for "upstream
|
||||||
|
died and the proxy is respawning in a loop"."""
|
||||||
|
_reset_ais_module()
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
with ais._vessels_lock:
|
||||||
|
# 5 minutes ago — well past the 60s freshness window.
|
||||||
|
ais._last_msg_at = time.time() - 300
|
||||||
|
s = ais.ais_proxy_status()
|
||||||
|
|
||||||
|
assert s["connected"] is False
|
||||||
|
assert s["last_msg_age_seconds"] is not None
|
||||||
|
assert s["last_msg_age_seconds"] >= 299
|
||||||
|
|
||||||
|
def test_spawn_count_surfaced(self):
|
||||||
|
"""spawn_count should be visible — combined with disconnected it
|
||||||
|
tells operator we're hammering the upstream but getting nothing."""
|
||||||
|
_reset_ais_module()
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._proxy_spawn_count = 42
|
||||||
|
s = ais.ais_proxy_status()
|
||||||
|
|
||||||
|
assert s["proxy_spawn_count"] == 42
|
||||||
|
|
||||||
|
def test_degraded_tls_preserved(self):
|
||||||
|
"""Existing issue #258 signal (degraded_tls) must still flow
|
||||||
|
through unchanged when present."""
|
||||||
|
_reset_ais_module()
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._proxy_status["degraded_tls"] = True
|
||||||
|
s = ais.ais_proxy_status()
|
||||||
|
|
||||||
|
assert s.get("degraded_tls") is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestHealthEndpointEscalation:
|
||||||
|
def test_disconnected_with_api_key_escalates_to_degraded(
|
||||||
|
self, client, monkeypatch
|
||||||
|
):
|
||||||
|
"""When ``AIS_API_KEY`` is configured AND the proxy is disconnected,
|
||||||
|
``/api/health`` should report ``status: "degraded"`` instead of
|
||||||
|
``"ok"``. This is what the frontend banner reads."""
|
||||||
|
_reset_ais_module()
|
||||||
|
monkeypatch.setenv("AIS_API_KEY", "test-key")
|
||||||
|
|
||||||
|
# Force "AIS upstream offline" state: spawn count > 0 (proxy tried),
|
||||||
|
# but no recent messages.
|
||||||
|
from services import ais_stream as ais
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._proxy_spawn_count = 5
|
||||||
|
ais._last_msg_at = time.time() - 600 # 10 min ago
|
||||||
|
|
||||||
|
res = client.get("/api/health")
|
||||||
|
assert res.status_code == 200
|
||||||
|
body = res.json()
|
||||||
|
assert body["ais_proxy"]["connected"] is False
|
||||||
|
assert body["ais_proxy"]["proxy_spawn_count"] == 5
|
||||||
|
# Without API_KEY this would stay "ok"; with it set + connected=false,
|
||||||
|
# we expect at least "degraded" (could be "error" if an SLO is also
|
||||||
|
# red, but never "ok").
|
||||||
|
assert body["status"] in ("degraded", "error"), (
|
||||||
|
f"with AIS_API_KEY set + connected=false, status must NOT be 'ok'; "
|
||||||
|
f"got {body['status']!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_api_key_does_not_escalate(self, client, monkeypatch):
|
||||||
|
"""When AIS_API_KEY isn't set, the operator hasn't opted in. Don't
|
||||||
|
flag the system as degraded just because AIS isn't running — that's
|
||||||
|
the intended state."""
|
||||||
|
_reset_ais_module()
|
||||||
|
monkeypatch.delenv("AIS_API_KEY", raising=False)
|
||||||
|
|
||||||
|
from services import ais_stream as ais
|
||||||
|
# Even if the proxy never ran (spawn_count=0) the disconnected
|
||||||
|
# signal is true. Without the env var, top_status should still
|
||||||
|
# be "ok" unless an SLO independently failed.
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._proxy_spawn_count = 0
|
||||||
|
ais._last_msg_at = 0.0
|
||||||
|
|
||||||
|
res = client.get("/api/health")
|
||||||
|
assert res.status_code == 200
|
||||||
|
body = res.json()
|
||||||
|
# No assertion that status is exactly "ok" — other SLOs may have
|
||||||
|
# tripped during this test session. The contract is "AIS-being-off
|
||||||
|
# alone doesn't escalate when no key is set."
|
||||||
|
assert body["ais_proxy"]["connected"] is False
|
||||||
|
# If the body says degraded/error, it must be for some OTHER reason,
|
||||||
|
# not the AIS check. Practically: status==ok in a fresh test run.
|
||||||
|
# (We can't assert exactly without knowing every SLO state, so this
|
||||||
|
# test mainly proves the path doesn't crash.)
|
||||||
@@ -0,0 +1,432 @@
|
|||||||
|
"""AISHub REST fallback for ship tracking.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
When ``stream.aisstream.io`` (the WebSocket primary) is unreachable, the
|
||||||
|
ships layer goes empty. ``aishub_fallback.py`` polls ``data.aishub.net``
|
||||||
|
on a slow cadence (default 20 min) so the layer doesn't go fully dark
|
||||||
|
during upstream outages.
|
||||||
|
|
||||||
|
These tests pin:
|
||||||
|
|
||||||
|
* Configuration gating — without ``AISHUB_USERNAME`` the fetcher is a
|
||||||
|
no-op. The username's presence is the opt-in.
|
||||||
|
* Connectivity gating — when the WebSocket primary is connected, the
|
||||||
|
fallback skips so it doesn't stomp fresher live data.
|
||||||
|
* Response parsing — successful, error, and empty AISHub payloads.
|
||||||
|
* Record normalization — bad records (no MMSI, sentinel positions) are
|
||||||
|
dropped without crashing.
|
||||||
|
* Merge behavior — records land in the shared ``_vessels`` dict with
|
||||||
|
``source: "aishub"`` and don't overwrite very-recent live updates.
|
||||||
|
* Poll interval clamping — env var overrides honored within [1, 360].
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Configuration / gating
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGating:
|
||||||
|
def test_no_username_means_disabled(self, monkeypatch):
|
||||||
|
from services.fetchers.aishub_fallback import (
|
||||||
|
aishub_fallback_enabled,
|
||||||
|
fetch_aishub_vessels,
|
||||||
|
)
|
||||||
|
monkeypatch.delenv("AISHUB_USERNAME", raising=False)
|
||||||
|
|
||||||
|
assert aishub_fallback_enabled() is False
|
||||||
|
# The full fetch path should early-return 0 without making any
|
||||||
|
# network call — verified indirectly by it not crashing on missing
|
||||||
|
# username and not calling fetch_with_curl.
|
||||||
|
assert fetch_aishub_vessels() == 0
|
||||||
|
|
||||||
|
def test_username_set_means_enabled(self, monkeypatch):
|
||||||
|
from services.fetchers.aishub_fallback import aishub_fallback_enabled
|
||||||
|
monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
|
||||||
|
|
||||||
|
assert aishub_fallback_enabled() is True
|
||||||
|
|
||||||
|
def test_skips_when_websocket_primary_is_connected(self, monkeypatch):
|
||||||
|
"""If the AISStream WebSocket is currently delivering messages,
|
||||||
|
the fallback should skip — fresher live data is already flowing."""
|
||||||
|
from services.fetchers import aishub_fallback
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
|
||||||
|
|
||||||
|
# Force "connected" state in the ais_stream module.
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._last_msg_at = time.time() - 5 # 5s ago — well inside 60s
|
||||||
|
ais._proxy_spawn_count = 1
|
||||||
|
# Sanity check the gate:
|
||||||
|
assert ais.ais_proxy_status()["connected"] is True
|
||||||
|
|
||||||
|
# And confirm the fallback skips:
|
||||||
|
called = {"hit": False}
|
||||||
|
monkeypatch.setattr(
|
||||||
|
aishub_fallback,
|
||||||
|
"fetch_with_curl",
|
||||||
|
lambda *a, **kw: (_ for _ in ()).throw(
|
||||||
|
AssertionError("network call must not happen when primary is connected")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aishub_fallback.fetch_aishub_vessels() == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Response parsing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestResponseParsing:
|
||||||
|
def test_successful_response_parsed(self):
|
||||||
|
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||||
|
|
||||||
|
payload = json.dumps([
|
||||||
|
{"ERROR": False, "USERNAME": "test", "FORMAT": "1", "RECORDS": 2},
|
||||||
|
[
|
||||||
|
{"MMSI": 123, "LATITUDE": 40.0, "LONGITUDE": -73.0},
|
||||||
|
{"MMSI": 456, "LATITUDE": 51.5, "LONGITUDE": -0.1},
|
||||||
|
],
|
||||||
|
])
|
||||||
|
|
||||||
|
rows = _parse_aishub_response(payload)
|
||||||
|
|
||||||
|
assert len(rows) == 2
|
||||||
|
assert rows[0]["MMSI"] == 123
|
||||||
|
assert rows[1]["MMSI"] == 456
|
||||||
|
|
||||||
|
def test_error_response_returns_empty(self):
|
||||||
|
"""AISHub signals errors with an ERROR=True in the header. We log
|
||||||
|
and treat as no data."""
|
||||||
|
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||||
|
|
||||||
|
payload = json.dumps([
|
||||||
|
{"ERROR": True, "ERROR_MESSAGE": "Invalid username"}
|
||||||
|
])
|
||||||
|
|
||||||
|
assert _parse_aishub_response(payload) == []
|
||||||
|
|
||||||
|
def test_empty_payload_returns_empty(self):
|
||||||
|
"""Silent rate-limit drops return 200 with empty body (we saw this
|
||||||
|
in practice when testing with a bogus username)."""
|
||||||
|
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||||
|
assert _parse_aishub_response("") == []
|
||||||
|
assert _parse_aishub_response(" ") == []
|
||||||
|
|
||||||
|
def test_malformed_json_returns_empty(self):
|
||||||
|
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||||
|
assert _parse_aishub_response("not json {") == []
|
||||||
|
|
||||||
|
def test_unexpected_shape_returns_empty(self):
|
||||||
|
"""Defensive: shape doesn't match what AISHub documents."""
|
||||||
|
from services.fetchers.aishub_fallback import _parse_aishub_response
|
||||||
|
assert _parse_aishub_response(json.dumps({"unexpected": "object"})) == []
|
||||||
|
assert _parse_aishub_response(json.dumps([])) == []
|
||||||
|
# Header-only with no records list:
|
||||||
|
assert _parse_aishub_response(json.dumps([
|
||||||
|
{"ERROR": False, "RECORDS": 0}
|
||||||
|
])) == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Record normalization
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalize:
|
||||||
|
def test_full_record_normalized(self):
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
|
||||||
|
record = _normalize_record({
|
||||||
|
"MMSI": 366998410,
|
||||||
|
"LATITUDE": 37.8,
|
||||||
|
"LONGITUDE": -122.4,
|
||||||
|
"COG": 280,
|
||||||
|
"SOG": 12.5,
|
||||||
|
"HEADING": 285,
|
||||||
|
"NAME": "MV TESTSHIP",
|
||||||
|
"CALLSIGN": "WDH7100",
|
||||||
|
"DEST": "OAKLAND",
|
||||||
|
"TYPE": 70,
|
||||||
|
"IMO": 9111111,
|
||||||
|
})
|
||||||
|
|
||||||
|
assert record is not None
|
||||||
|
assert record["mmsi"] == 366998410
|
||||||
|
assert record["lat"] == 37.8
|
||||||
|
assert record["lng"] == -122.4
|
||||||
|
assert record["sog"] == 12.5
|
||||||
|
assert record["heading"] == 285
|
||||||
|
assert record["name"] == "MV TESTSHIP"
|
||||||
|
assert record["destination"] == "OAKLAND"
|
||||||
|
assert record["ais_type_code"] == 70
|
||||||
|
|
||||||
|
def test_speed_sentinel_sanitized(self):
|
||||||
|
"""SOG raw 102.3+ kn = "speed not available" in the AIS spec.
|
||||||
|
Sanitize to 0 so it doesn't look like a 200-knot ship."""
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
record = _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||||
|
"SOG": 102.3, "COG": 0,
|
||||||
|
})
|
||||||
|
assert record["sog"] == 0.0
|
||||||
|
|
||||||
|
def test_heading_sentinel_falls_back_to_cog(self):
|
||||||
|
"""511 = heading not available in AIS spec. Use COG instead."""
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
record = _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||||
|
"HEADING": 511, "COG": 280,
|
||||||
|
})
|
||||||
|
assert record["heading"] == 280
|
||||||
|
|
||||||
|
def test_missing_mmsi_rejected(self):
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
assert _normalize_record({"LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
|
||||||
|
assert _normalize_record({"MMSI": 0, "LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
|
||||||
|
|
||||||
|
def test_no_position_rejected(self):
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
assert _normalize_record({"MMSI": 1}) is None
|
||||||
|
assert _normalize_record({"MMSI": 1, "LATITUDE": 0.5}) is None
|
||||||
|
assert _normalize_record({"MMSI": 1, "LONGITUDE": 0.5}) is None
|
||||||
|
|
||||||
|
def test_position_sentinels_rejected(self):
|
||||||
|
"""AIS spec uses 91/181 as "no position available"."""
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
assert _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 91.0, "LONGITUDE": 0.0
|
||||||
|
}) is None
|
||||||
|
assert _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 181.0
|
||||||
|
}) is None
|
||||||
|
|
||||||
|
def test_out_of_range_rejected(self):
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
assert _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 95.0, "LONGITUDE": 0.0
|
||||||
|
}) is None
|
||||||
|
assert _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 200.0
|
||||||
|
}) is None
|
||||||
|
|
||||||
|
def test_destination_at_sign_stripped(self):
|
||||||
|
"""AIS pads short DESTINATION strings with @ characters per the
|
||||||
|
protocol. Strip them so the UI doesn't render "OAKLAND@@@@@"."""
|
||||||
|
from services.fetchers.aishub_fallback import _normalize_record
|
||||||
|
record = _normalize_record({
|
||||||
|
"MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
|
||||||
|
"DEST": "OAKLAND@@@",
|
||||||
|
})
|
||||||
|
assert record["destination"] == "OAKLAND"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Poll interval clamping
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestPollInterval:
|
||||||
|
def test_default_is_twenty_minutes(self, monkeypatch):
|
||||||
|
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||||
|
monkeypatch.delenv("AISHUB_POLL_INTERVAL_MINUTES", raising=False)
|
||||||
|
assert aishub_poll_interval_minutes() == 20
|
||||||
|
|
||||||
|
def test_env_override_honored(self, monkeypatch):
|
||||||
|
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||||
|
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "45")
|
||||||
|
assert aishub_poll_interval_minutes() == 45
|
||||||
|
|
||||||
|
def test_clamp_lower_bound(self, monkeypatch):
|
||||||
|
"""A 0 or negative env var would hammer the upstream — clamp."""
|
||||||
|
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||||
|
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "0")
|
||||||
|
assert aishub_poll_interval_minutes() == 1
|
||||||
|
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "-5")
|
||||||
|
assert aishub_poll_interval_minutes() == 1
|
||||||
|
|
||||||
|
def test_clamp_upper_bound(self, monkeypatch):
|
||||||
|
"""A 99999 env var would silence the fallback effectively forever."""
|
||||||
|
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||||
|
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "99999")
|
||||||
|
assert aishub_poll_interval_minutes() == 360
|
||||||
|
|
||||||
|
def test_malformed_env_defaults(self, monkeypatch):
|
||||||
|
from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
|
||||||
|
monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "twenty")
|
||||||
|
assert aishub_poll_interval_minutes() == 20
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# End-to-end fetch + merge into _vessels store
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFetchAndMerge:
|
||||||
|
def _force_primary_disconnected(self):
|
||||||
|
"""Set ais_stream module state so the gate allows the fallback."""
|
||||||
|
from services import ais_stream as ais
|
||||||
|
with ais._vessels_lock:
|
||||||
|
# Far in the past → connected = false; spawn_count > 0 → primary
|
||||||
|
# has at least tried so the gate engages.
|
||||||
|
ais._last_msg_at = time.time() - 3600
|
||||||
|
ais._proxy_spawn_count = 5
|
||||||
|
ais._vessels.clear()
|
||||||
|
|
||||||
|
def test_vessels_merged_with_source_tag(self, monkeypatch):
|
||||||
|
"""Happy path: AISHub returns 2 ships, both land in ``_vessels``
|
||||||
|
with ``source: 'aishub'``."""
|
||||||
|
from services.fetchers import aishub_fallback
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||||
|
self._force_primary_disconnected()
|
||||||
|
|
||||||
|
payload = json.dumps([
|
||||||
|
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 2},
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"MMSI": 111111111,
|
||||||
|
"LATITUDE": 40.0,
|
||||||
|
"LONGITUDE": -73.0,
|
||||||
|
"SOG": 12.0,
|
||||||
|
"COG": 270,
|
||||||
|
"HEADING": 275,
|
||||||
|
"NAME": "SHIP A",
|
||||||
|
"TYPE": 70,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"MMSI": 222222222,
|
||||||
|
"LATITUDE": 51.5,
|
||||||
|
"LONGITUDE": -0.1,
|
||||||
|
"SOG": 8.0,
|
||||||
|
"COG": 90,
|
||||||
|
"HEADING": 92,
|
||||||
|
"NAME": "SHIP B",
|
||||||
|
"TYPE": 60,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
])
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
status_code = 200
|
||||||
|
text = payload
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||||
|
)
|
||||||
|
|
||||||
|
count = aishub_fallback.fetch_aishub_vessels()
|
||||||
|
|
||||||
|
assert count == 2
|
||||||
|
with ais._vessels_lock:
|
||||||
|
v1 = ais._vessels.get(111111111)
|
||||||
|
v2 = ais._vessels.get(222222222)
|
||||||
|
assert v1 is not None
|
||||||
|
assert v1["source"] == "aishub"
|
||||||
|
assert v1["lat"] == 40.0
|
||||||
|
assert v1["name"] == "SHIP A"
|
||||||
|
assert v2 is not None
|
||||||
|
assert v2["source"] == "aishub"
|
||||||
|
assert v2["type"] == "passenger" # AIS type 60 → passenger
|
||||||
|
|
||||||
|
def test_does_not_overwrite_fresh_live_data(self, monkeypatch):
|
||||||
|
"""If the WebSocket pushed an update for an MMSI 0.5s ago and the
|
||||||
|
AISHub poll completes in that window, we should NOT clobber the
|
||||||
|
fresher live data."""
|
||||||
|
from services.fetchers import aishub_fallback
|
||||||
|
from services import ais_stream as ais
|
||||||
|
|
||||||
|
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||||
|
self._force_primary_disconnected()
|
||||||
|
|
||||||
|
# Pre-seed _vessels with a "very fresh" live record.
|
||||||
|
fresh_ts = time.time()
|
||||||
|
with ais._vessels_lock:
|
||||||
|
ais._vessels[111111111] = {
|
||||||
|
"mmsi": 111111111,
|
||||||
|
"lat": 12.34,
|
||||||
|
"lng": 56.78,
|
||||||
|
"source": "aisstream",
|
||||||
|
"_updated": fresh_ts,
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = json.dumps([
|
||||||
|
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"MMSI": 111111111,
|
||||||
|
"LATITUDE": 99.0, # bogus to make the test obvious
|
||||||
|
"LONGITUDE": 99.0,
|
||||||
|
"NAME": "STALE",
|
||||||
|
"SOG": 0,
|
||||||
|
"COG": 0,
|
||||||
|
"TYPE": 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
])
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
status_code = 200
|
||||||
|
text = payload
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: 99.0/99.0 also exceeds the 91/181 sentinel guard and
|
||||||
|
# would be filtered. Pick a valid-but-bogus position instead.
|
||||||
|
payload = json.dumps([
|
||||||
|
{"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"MMSI": 111111111,
|
||||||
|
"LATITUDE": 0.0, # different from the live 12.34
|
||||||
|
"LONGITUDE": 0.0,
|
||||||
|
"NAME": "STALE",
|
||||||
|
"SOG": 0,
|
||||||
|
"COG": 0,
|
||||||
|
"TYPE": 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(
|
||||||
|
aishub_fallback, "fetch_with_curl",
|
||||||
|
lambda *a, **kw: type("R", (), {"status_code": 200, "text": payload})(),
|
||||||
|
)
|
||||||
|
|
||||||
|
aishub_fallback.fetch_aishub_vessels()
|
||||||
|
|
||||||
|
with ais._vessels_lock:
|
||||||
|
v = ais._vessels.get(111111111)
|
||||||
|
# Live data wins — position should still be 12.34 / 56.78.
|
||||||
|
assert v["lat"] == 12.34
|
||||||
|
assert v["lng"] == 56.78
|
||||||
|
assert v["source"] == "aisstream"
|
||||||
|
|
||||||
|
def test_http_failure_returns_zero(self, monkeypatch):
|
||||||
|
from services.fetchers import aishub_fallback
|
||||||
|
|
||||||
|
monkeypatch.setenv("AISHUB_USERNAME", "test-user")
|
||||||
|
self._force_primary_disconnected()
|
||||||
|
|
||||||
|
class FailResp:
|
||||||
|
status_code = 503
|
||||||
|
text = ""
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
aishub_fallback, "fetch_with_curl", lambda *a, **kw: FailResp()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aishub_fallback.fetch_aishub_vessels() == 0
|
||||||
@@ -0,0 +1,389 @@
|
|||||||
|
"""Issues #244, #245, #246 (tg12 external audit): carrier tracker
|
||||||
|
quality + provenance + freshness.
|
||||||
|
|
||||||
|
These tests pin the post-fix contract:
|
||||||
|
|
||||||
|
- **#244**: dated editorial snapshot positions no longer live in the
|
||||||
|
registry. They live in a one-shot seed file that is consumed once
|
||||||
|
on first-ever startup. After that, the runtime cache reflects only
|
||||||
|
what THIS install has actually observed.
|
||||||
|
|
||||||
|
- **#245**: headline-derived positions (centroid of a region keyword)
|
||||||
|
are stamped ``position_confidence = "approximate"`` so the UI can
|
||||||
|
render them with appropriate uncertainty.
|
||||||
|
|
||||||
|
- **#246**: freshness is a *labelling* decision, not an eviction
|
||||||
|
decision. Positions older than the configurable freshness window
|
||||||
|
flip from ``"recent"`` to ``"stale"`` but are NEVER replaced with
|
||||||
|
the registry default — that would teleport the carrier. The user
|
||||||
|
always sees the last position the system actually observed.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fresh_tracker(tmp_path, monkeypatch):
|
||||||
|
"""Isolated carrier_tracker with seed/cache paths redirected to tmp.
|
||||||
|
|
||||||
|
Yields the module so tests can call its functions; resets globals
|
||||||
|
between tests so position caches don't leak across cases.
|
||||||
|
"""
|
||||||
|
from services import carrier_tracker
|
||||||
|
|
||||||
|
seed_path = tmp_path / "data" / "carrier_seed.json"
|
||||||
|
cache_path = tmp_path / "carrier_cache.json"
|
||||||
|
seed_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
monkeypatch.setattr(carrier_tracker, "SEED_FILE", seed_path)
|
||||||
|
monkeypatch.setattr(carrier_tracker, "CACHE_FILE", cache_path)
|
||||||
|
monkeypatch.delenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", raising=False)
|
||||||
|
|
||||||
|
# Reset module-level mutable state.
|
||||||
|
carrier_tracker._carrier_positions.clear()
|
||||||
|
carrier_tracker._cached_gdelt_articles.clear()
|
||||||
|
carrier_tracker._last_gdelt_fetch_at = 0.0
|
||||||
|
|
||||||
|
yield carrier_tracker
|
||||||
|
|
||||||
|
# Clean up so subsequent tests start fresh.
|
||||||
|
carrier_tracker._carrier_positions.clear()
|
||||||
|
carrier_tracker._cached_gdelt_articles.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def _write_seed(path: Path, hull: str = "CVN-78", **overrides) -> None:
|
||||||
|
payload = {
|
||||||
|
"_meta": {
|
||||||
|
"as_of": "2026-03-09",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker",
|
||||||
|
"source_url": "https://news.usni.org/...",
|
||||||
|
"note": "test",
|
||||||
|
},
|
||||||
|
"carriers": {
|
||||||
|
hull: {
|
||||||
|
"lat": 18.0,
|
||||||
|
"lng": 39.5,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed",
|
||||||
|
**overrides,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #244 — first-run seed bootstrap, never re-seeds after that
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSeedBootstrap:
|
||||||
|
def test_first_ever_startup_bootstraps_from_seed(self, fresh_tracker, tmp_path):
|
||||||
|
_write_seed(fresh_tracker.SEED_FILE)
|
||||||
|
# No cache exists yet.
|
||||||
|
assert not fresh_tracker.CACHE_FILE.exists()
|
||||||
|
|
||||||
|
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||||
|
|
||||||
|
# The seed entry made it into the cache.
|
||||||
|
assert "CVN-78" in positions
|
||||||
|
assert positions["CVN-78"]["lat"] == 18.0
|
||||||
|
assert positions["CVN-78"]["position_confidence"] == "seed"
|
||||||
|
# And the cache file is now on disk so subsequent runs skip the seed.
|
||||||
|
assert fresh_tracker.CACHE_FILE.exists()
|
||||||
|
|
||||||
|
def test_subsequent_startup_ignores_seed(self, fresh_tracker, tmp_path):
|
||||||
|
# Pre-seed a different position into the cache; the seed file says Red Sea.
|
||||||
|
cache_data = {
|
||||||
|
"CVN-78": {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Persian Gulf — operator-observed",
|
||||||
|
"source": "Operator log",
|
||||||
|
"source_url": "",
|
||||||
|
"position_source_at": "2026-04-15T12:00:00Z",
|
||||||
|
"position_confidence": "recent",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||||
|
_write_seed(fresh_tracker.SEED_FILE) # seed is present but should NOT be used
|
||||||
|
|
||||||
|
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||||
|
|
||||||
|
assert positions["CVN-78"]["lat"] == 25.0
|
||||||
|
assert positions["CVN-78"]["desc"] == "Persian Gulf — operator-observed"
|
||||||
|
|
||||||
|
def test_no_seed_no_cache_falls_back_to_homeport(self, fresh_tracker):
|
||||||
|
# Neither seed nor cache. Must fall back to homeport defaults
|
||||||
|
# (carrier never disappears).
|
||||||
|
assert not fresh_tracker.SEED_FILE.exists()
|
||||||
|
assert not fresh_tracker.CACHE_FILE.exists()
|
||||||
|
|
||||||
|
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||||
|
|
||||||
|
# Every registered carrier has SOMETHING.
|
||||||
|
assert set(positions.keys()) == set(fresh_tracker.CARRIER_REGISTRY.keys())
|
||||||
|
# All entries are labelled as homeport defaults.
|
||||||
|
for hull, entry in positions.items():
|
||||||
|
assert entry["position_confidence"] == "homeport_default"
|
||||||
|
registry = fresh_tracker.CARRIER_REGISTRY[hull]
|
||||||
|
assert entry["lat"] == registry["homeport_lat"]
|
||||||
|
assert entry["lng"] == registry["homeport_lng"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #244 — no editorial fallbacks live in the registry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRegistryShape:
|
||||||
|
def test_registry_has_no_dated_fallback_fields(self, fresh_tracker):
|
||||||
|
"""The Mar 9 editorial coordinates are gone from the registry.
|
||||||
|
They live only in the seed file."""
|
||||||
|
forbidden = {"fallback_lat", "fallback_lng", "fallback_heading", "fallback_desc"}
|
||||||
|
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||||
|
offending = forbidden & set(entry.keys())
|
||||||
|
assert not offending, f"{hull} still has dated registry fields: {offending}"
|
||||||
|
|
||||||
|
def test_registry_keeps_homeport_for_every_hull(self, fresh_tracker):
|
||||||
|
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||||
|
assert "homeport_lat" in entry, f"{hull} missing homeport_lat"
|
||||||
|
assert "homeport_lng" in entry, f"{hull} missing homeport_lng"
|
||||||
|
assert "name" in entry
|
||||||
|
assert "wiki" in entry
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #246 — freshness labelling, NOT eviction
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFreshnessLabelling:
|
||||||
|
def test_recent_observation_labels_recent(self, fresh_tracker):
|
||||||
|
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||||
|
entry = {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"position_source_at": (now - timedelta(days=3)).isoformat(),
|
||||||
|
}
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||||
|
|
||||||
|
def test_aged_observation_flips_to_stale(self, fresh_tracker):
|
||||||
|
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||||
|
entry = {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"position_source_at": (now - timedelta(days=30)).isoformat(),
|
||||||
|
}
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||||
|
|
||||||
|
def test_seed_label_is_preserved_explicitly(self, fresh_tracker):
|
||||||
|
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||||
|
entry = {
|
||||||
|
"lat": 18.0,
|
||||||
|
"lng": 39.5,
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed",
|
||||||
|
}
|
||||||
|
# Even though the source is months old, the explicit "seed" label wins
|
||||||
|
# so the UI can render the seed-specific badge instead of generic "stale".
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "seed"
|
||||||
|
|
||||||
|
def test_homeport_default_label_is_preserved(self, fresh_tracker):
|
||||||
|
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||||
|
entry = {
|
||||||
|
"lat": 36.95,
|
||||||
|
"lng": -76.32,
|
||||||
|
"position_source_at": now.isoformat(),
|
||||||
|
"position_confidence": "homeport_default",
|
||||||
|
}
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "homeport_default"
|
||||||
|
|
||||||
|
def test_freshness_window_is_env_configurable(self, fresh_tracker, monkeypatch):
|
||||||
|
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||||
|
entry = {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"position_source_at": (now - timedelta(days=20)).isoformat(),
|
||||||
|
}
|
||||||
|
# Default window = 14 days → 20-day-old entry is stale.
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||||
|
# Stretch to 30 days → same entry is now "recent".
|
||||||
|
monkeypatch.setenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "30")
|
||||||
|
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||||
|
|
||||||
|
def test_aged_cache_entry_keeps_its_position_never_reverts(self, fresh_tracker):
|
||||||
|
"""The core regression test for the user's intent: a year-old
|
||||||
|
cache entry must NOT be replaced with the seed or homeport.
|
||||||
|
The PHYSICAL position the user sees is the last one observed;
|
||||||
|
only the freshness LABEL changes."""
|
||||||
|
a_year_ago = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
||||||
|
cache_data = {
|
||||||
|
"CVN-78": {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Persian Gulf",
|
||||||
|
"source": "GDELT News API",
|
||||||
|
"source_url": "https://news.example/...",
|
||||||
|
"position_source_at": a_year_ago,
|
||||||
|
"position_confidence": "recent", # was recent when written
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||||
|
|
||||||
|
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||||
|
enriched = fresh_tracker._enrich_for_rendering("CVN-78", positions["CVN-78"])
|
||||||
|
|
||||||
|
# The position is preserved exactly.
|
||||||
|
assert enriched["lat"] == 25.0
|
||||||
|
assert enriched["lng"] == 55.0
|
||||||
|
# But the live label has flipped to stale.
|
||||||
|
assert enriched["position_confidence"] == "stale"
|
||||||
|
assert enriched["is_fallback"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #245 — approximate confidence for region-centroid positions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestApproximateConfidenceForNewsDerivedPositions:
|
||||||
|
def test_news_parsing_stamps_approximate_confidence(self, fresh_tracker):
|
||||||
|
articles = [
|
||||||
|
{
|
||||||
|
"title": "USS Ford carrier deployed in Mediterranean for joint exercise",
|
||||||
|
"url": "https://news.example/ford-mediterranean",
|
||||||
|
"seendate": "20260415120000",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
updates = fresh_tracker._parse_carrier_positions_from_news(articles)
|
||||||
|
assert "CVN-78" in updates
|
||||||
|
entry = updates["CVN-78"]
|
||||||
|
assert entry["position_confidence"] == "approximate"
|
||||||
|
# And the source_at is the article's seen date, not now().
|
||||||
|
assert entry["position_source_at"].startswith("2026-04-15")
|
||||||
|
|
||||||
|
def test_gdelt_seendate_parser_handles_well_formed_input(self, fresh_tracker):
|
||||||
|
iso = fresh_tracker._gdelt_seendate_to_iso("20260415120000")
|
||||||
|
assert iso is not None
|
||||||
|
assert iso.startswith("2026-04-15T12:00:00")
|
||||||
|
|
||||||
|
def test_gdelt_seendate_parser_returns_none_on_garbage(self, fresh_tracker):
|
||||||
|
assert fresh_tracker._gdelt_seendate_to_iso("") is None
|
||||||
|
assert fresh_tracker._gdelt_seendate_to_iso("not-a-date") is None
|
||||||
|
assert fresh_tracker._gdelt_seendate_to_iso("2026") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Full enrichment → public API shape
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnrichForRendering:
|
||||||
|
def test_seed_entry_produces_expected_public_fields(self, fresh_tracker):
|
||||||
|
seed_entry = {
|
||||||
|
"lat": 18.0,
|
||||||
|
"lng": 39.5,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Red Sea (USNI Mar 9)",
|
||||||
|
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||||
|
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed",
|
||||||
|
}
|
||||||
|
enriched = fresh_tracker._enrich_for_rendering("CVN-78", seed_entry)
|
||||||
|
# Existing UI fields preserved.
|
||||||
|
assert enriched["lat"] == 18.0
|
||||||
|
assert enriched["lng"] == 39.5
|
||||||
|
assert enriched["source"].startswith("USNI")
|
||||||
|
assert enriched["last_osint_update"] == "2026-03-09T00:00:00Z"
|
||||||
|
# New audit-required fields.
|
||||||
|
assert enriched["position_confidence"] == "seed"
|
||||||
|
assert enriched["position_source_at"] == "2026-03-09T00:00:00Z"
|
||||||
|
assert enriched["is_fallback"] is True
|
||||||
|
|
||||||
|
def test_recent_observation_is_not_fallback(self, fresh_tracker):
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
recent_entry = {
|
||||||
|
"lat": 25.0,
|
||||||
|
"lng": 55.0,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Persian Gulf",
|
||||||
|
"source": "GDELT News API",
|
||||||
|
"source_url": "https://news.example/...",
|
||||||
|
"position_source_at": (now - timedelta(days=2)).isoformat(),
|
||||||
|
"position_confidence": "approximate",
|
||||||
|
}
|
||||||
|
enriched = fresh_tracker._enrich_for_rendering("CVN-78", recent_entry, now=now)
|
||||||
|
assert enriched["position_confidence"] == "approximate"
|
||||||
|
# Approximate (from a recent headline) is honest precision, but the UI
|
||||||
|
# treats it as live data — is_fallback only flips True for explicit
|
||||||
|
# fallback categories (seed / stale / homeport_default).
|
||||||
|
assert enriched["is_fallback"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Regression: existing frontend fields are preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestPublicResponseShapeBackwardCompat:
|
||||||
|
"""The frontend ShipPopup expects `estimated`, `source`, `source_url`,
|
||||||
|
`last_osint_update`. The new fields are additive and existing fields
|
||||||
|
keep their meaning so the UI does not need updating to keep working."""
|
||||||
|
|
||||||
|
def test_get_carrier_positions_preserves_existing_keys(self, fresh_tracker):
|
||||||
|
_write_seed(fresh_tracker.SEED_FILE)
|
||||||
|
fresh_tracker._bootstrap_cache_if_missing()
|
||||||
|
with fresh_tracker._positions_lock:
|
||||||
|
fresh_tracker._carrier_positions.update(
|
||||||
|
{
|
||||||
|
"CVN-78": {
|
||||||
|
"lat": 18.0,
|
||||||
|
"lng": 39.5,
|
||||||
|
"heading": 0,
|
||||||
|
"desc": "Red Sea (seed)",
|
||||||
|
"source": "Seed",
|
||||||
|
"source_url": "",
|
||||||
|
"position_source_at": "2026-03-09T00:00:00Z",
|
||||||
|
"position_confidence": "seed",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
out = fresh_tracker.get_carrier_positions()
|
||||||
|
assert len(out) == 1
|
||||||
|
c = out[0]
|
||||||
|
# Old fields the frontend uses.
|
||||||
|
for key in (
|
||||||
|
"name",
|
||||||
|
"type",
|
||||||
|
"lat",
|
||||||
|
"lng",
|
||||||
|
"country",
|
||||||
|
"desc",
|
||||||
|
"wiki",
|
||||||
|
"estimated",
|
||||||
|
"source",
|
||||||
|
"source_url",
|
||||||
|
"last_osint_update",
|
||||||
|
):
|
||||||
|
assert key in c, f"missing legacy field {key!r}"
|
||||||
|
# New fields.
|
||||||
|
for key in ("position_confidence", "position_source_at", "is_fallback"):
|
||||||
|
assert key in c, f"missing audit-required field {key!r}"
|
||||||
|
assert c["type"] == "carrier"
|
||||||
|
assert c["estimated"] is True
|
||||||
@@ -89,6 +89,34 @@ import pytest
|
|||||||
# relay through the backend. 60/minute rate limit is not enough on
|
# relay through the backend. 60/minute rate limit is not enough on
|
||||||
# a streaming endpoint.
|
# a streaming endpoint.
|
||||||
("get", "/api/radio/openmhz/audio?url=https%3A%2F%2Fmedia.openmhz.com%2Faudio%2Fabc.mp3", None),
|
("get", "/api/radio/openmhz/audio?url=https%3A%2F%2Fmedia.openmhz.com%2Faudio%2Fabc.mp3", None),
|
||||||
|
# Issue #299 (tg12): /api/sentinel/token relays Copernicus CDSE
|
||||||
|
# OAuth token requests for caller-supplied client_id/secret.
|
||||||
|
# Anonymous access turns the backend into a free OAuth-mint relay.
|
||||||
|
(
|
||||||
|
"post",
|
||||||
|
"/api/sentinel/token",
|
||||||
|
None, # body sent via raw form-encoded data — None lets the
|
||||||
|
# remote_client wrapper send an empty body; the auth
|
||||||
|
# check fires before the form parser runs.
|
||||||
|
),
|
||||||
|
# Issue #300 (tg12): /api/sentinel/tile relays Sentinel Hub Process
|
||||||
|
# API tile fetches. Anonymous access is a bandwidth/quota relay
|
||||||
|
# for any caller's Copernicus account.
|
||||||
|
(
|
||||||
|
"post",
|
||||||
|
"/api/sentinel/tile",
|
||||||
|
{
|
||||||
|
"client_id": "ignored",
|
||||||
|
"client_secret": "ignored",
|
||||||
|
"preset": "TRUE-COLOR",
|
||||||
|
"date": "2026-01-01",
|
||||||
|
"z": 6, "x": 30, "y": 20,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
# Issue #301 (tg12): /api/sentinel2/search hits Planetary Computer
|
||||||
|
# STAC + Esri fallback. Anonymous access is a free external-search
|
||||||
|
# relay even though no caller credentials are involved.
|
||||||
|
("get", "/api/sentinel2/search?lat=0&lng=0", None),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_remote_control_surface_rejects_without_local_operator_or_admin(
|
def test_remote_control_surface_rejects_without_local_operator_or_admin(
|
||||||
|
|||||||
@@ -0,0 +1,270 @@
|
|||||||
|
"""Per-(sender, recipient) anti-spam cap on the DM relay.
|
||||||
|
|
||||||
|
The user-stated rule: a single sender can have at most N UNACKED messages
|
||||||
|
parked in a single recipient's mailbox at any one time (N=2 by default).
|
||||||
|
Once the recipient pulls a message, the sender's quota for that pair
|
||||||
|
frees up.
|
||||||
|
|
||||||
|
Network rule, not local rule
|
||||||
|
-----------------------------
|
||||||
|
The cap is enforced TWICE:
|
||||||
|
|
||||||
|
1. ``DMRelay.deposit(...)`` -- local check on the sender's own node.
|
||||||
|
Refuses to spool the (N+1)th message before it can be replicated.
|
||||||
|
|
||||||
|
2. ``DMRelay.accept_replica(...)`` -- replication-acceptance check on
|
||||||
|
every receiving peer. Refuses to accept an inbound replica that
|
||||||
|
would put the local mailbox over the cap, even if the originating
|
||||||
|
peer claims it had cap room.
|
||||||
|
|
||||||
|
The double enforcement matters because cap (1) is client-side -- a
|
||||||
|
hostile relay could patch it out and continue to spool extras locally.
|
||||||
|
Cap (2) means those extras can't propagate: every honest peer rejects
|
||||||
|
them on the way in. A recipient who polls from honest peers therefore
|
||||||
|
never sees more than N pending from any one sender, regardless of how
|
||||||
|
many spam attempts the sender's own relay accepted.
|
||||||
|
|
||||||
|
These tests pin both halves of the rule.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def relay():
|
||||||
|
"""Fresh ``DMRelay`` per test."""
|
||||||
|
from services.mesh.mesh_dm_relay import DMRelay
|
||||||
|
r = DMRelay()
|
||||||
|
r._mailboxes.clear()
|
||||||
|
r._blocks.clear()
|
||||||
|
r._stats = {"messages_in_memory": 0}
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def _deposit(
|
||||||
|
relay,
|
||||||
|
*,
|
||||||
|
sender: str = "alice",
|
||||||
|
recipient_token: str = "bob_mailbox_token_abc",
|
||||||
|
ciphertext: str = "ciphertext-blob",
|
||||||
|
msg_id: str = "",
|
||||||
|
):
|
||||||
|
"""Convenience wrapper using ``shared`` delivery class."""
|
||||||
|
return relay.deposit(
|
||||||
|
sender_id=sender,
|
||||||
|
raw_sender_id=sender,
|
||||||
|
recipient_id="bob",
|
||||||
|
ciphertext=ciphertext,
|
||||||
|
msg_id=msg_id,
|
||||||
|
delivery_class="shared",
|
||||||
|
recipient_token=recipient_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Local cap on ``deposit``
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDepositCap:
|
||||||
|
def test_two_deposits_from_same_sender_succeed(self, relay):
|
||||||
|
r1 = _deposit(relay)
|
||||||
|
r2 = _deposit(relay)
|
||||||
|
assert r1["ok"] is True
|
||||||
|
assert r2["ok"] is True
|
||||||
|
assert r1["msg_id"] != r2["msg_id"]
|
||||||
|
|
||||||
|
def test_third_deposit_from_same_sender_rejected(self, relay):
|
||||||
|
_deposit(relay)
|
||||||
|
_deposit(relay)
|
||||||
|
r3 = _deposit(relay)
|
||||||
|
assert r3["ok"] is False
|
||||||
|
detail = r3["detail"].lower()
|
||||||
|
assert "unread" in detail or "read your messages" in detail
|
||||||
|
|
||||||
|
def test_different_senders_have_independent_quotas(self, relay):
|
||||||
|
for _ in range(2):
|
||||||
|
assert _deposit(relay, sender="alice")["ok"] is True
|
||||||
|
for _ in range(2):
|
||||||
|
assert _deposit(relay, sender="carol")["ok"] is True
|
||||||
|
assert _deposit(relay, sender="carol")["ok"] is False
|
||||||
|
|
||||||
|
def test_different_recipients_have_independent_quotas(self, relay):
|
||||||
|
for _ in range(2):
|
||||||
|
assert _deposit(relay, sender="alice", recipient_token="bob_token")["ok"] is True
|
||||||
|
for _ in range(2):
|
||||||
|
assert _deposit(relay, sender="alice", recipient_token="dave_token")["ok"] is True
|
||||||
|
|
||||||
|
def test_ack_frees_quota(self, relay):
|
||||||
|
r1 = _deposit(relay)
|
||||||
|
_deposit(relay)
|
||||||
|
assert _deposit(relay)["ok"] is False
|
||||||
|
|
||||||
|
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||||
|
relay._mailboxes[mailbox_key] = [
|
||||||
|
m for m in relay._mailboxes[mailbox_key]
|
||||||
|
if m.msg_id != r1["msg_id"]
|
||||||
|
]
|
||||||
|
relay._stats["messages_in_memory"] = sum(
|
||||||
|
len(v) for v in relay._mailboxes.values()
|
||||||
|
)
|
||||||
|
|
||||||
|
r3 = _deposit(relay)
|
||||||
|
assert r3["ok"] is True, f"expected quota free after ack, got: {r3}"
|
||||||
|
|
||||||
|
def test_cap_is_env_tunable(self, relay, monkeypatch):
|
||||||
|
import services.mesh.mesh_dm_relay as mdr
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mdr.DMRelay,
|
||||||
|
"_per_sender_pending_limit",
|
||||||
|
lambda self: 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _deposit(relay)["ok"] is True
|
||||||
|
assert _deposit(relay)["ok"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Replication-acceptance cap (the half that makes this a network rule)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestAcceptReplicaCap:
|
||||||
|
def _envelope(self, *, msg_id: str, sender_block_ref: str, mailbox_key: str):
|
||||||
|
return {
|
||||||
|
"msg_id": msg_id,
|
||||||
|
"mailbox_key": mailbox_key,
|
||||||
|
"sender_block_ref": sender_block_ref,
|
||||||
|
"sender_id": "alice",
|
||||||
|
"sender_seal": "",
|
||||||
|
"ciphertext": f"ciphertext-{msg_id}",
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"delivery_class": "shared",
|
||||||
|
"relay_salt": "",
|
||||||
|
"payload_format": "dm1",
|
||||||
|
"session_welcome": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_replica_accepted_under_cap(self, relay):
|
||||||
|
env = self._envelope(
|
||||||
|
msg_id="dm_replica_1",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key="mailbox_xyz",
|
||||||
|
)
|
||||||
|
result = relay.accept_replica(envelope=env)
|
||||||
|
assert result["ok"] is True
|
||||||
|
|
||||||
|
def test_replica_idempotent_on_duplicate_msg_id(self, relay):
|
||||||
|
mailbox_key = "mailbox_xyz"
|
||||||
|
env = self._envelope(
|
||||||
|
msg_id="dm_dup_1",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
)
|
||||||
|
r1 = relay.accept_replica(envelope=env)
|
||||||
|
r2 = relay.accept_replica(envelope=env)
|
||||||
|
assert r1["ok"] is True
|
||||||
|
assert r2["ok"] is True
|
||||||
|
assert r2.get("duplicate") is True
|
||||||
|
assert len(relay._mailboxes[mailbox_key]) == 1
|
||||||
|
|
||||||
|
def test_replica_rejected_when_local_count_already_at_cap(self, relay):
|
||||||
|
mailbox_key = "mailbox_xyz"
|
||||||
|
for i in (1, 2):
|
||||||
|
relay.accept_replica(envelope=self._envelope(
|
||||||
|
msg_id=f"dm_seeded_{i}",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
))
|
||||||
|
|
||||||
|
result = relay.accept_replica(envelope=self._envelope(
|
||||||
|
msg_id="dm_overcap_3",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
))
|
||||||
|
assert result["ok"] is False
|
||||||
|
assert result.get("cap_violation") is True
|
||||||
|
assert result.get("pending") == 2
|
||||||
|
assert result.get("limit") == 2
|
||||||
|
assert len(relay._mailboxes[mailbox_key]) == 2
|
||||||
|
|
||||||
|
def test_replica_from_different_sender_passes_when_one_is_at_cap(self, relay):
|
||||||
|
mailbox_key = "mailbox_xyz"
|
||||||
|
for i in (1, 2):
|
||||||
|
relay.accept_replica(envelope=self._envelope(
|
||||||
|
msg_id=f"dm_alice_{i}",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
))
|
||||||
|
assert relay.accept_replica(envelope=self._envelope(
|
||||||
|
msg_id="dm_alice_3",
|
||||||
|
sender_block_ref="alice_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
))["ok"] is False
|
||||||
|
assert relay.accept_replica(envelope=self._envelope(
|
||||||
|
msg_id="dm_carol_1",
|
||||||
|
sender_block_ref="carol_block_ref",
|
||||||
|
mailbox_key=mailbox_key,
|
||||||
|
))["ok"] is True
|
||||||
|
|
||||||
|
def test_replica_rejects_malformed_envelopes(self, relay):
|
||||||
|
for bad in (
|
||||||
|
{},
|
||||||
|
{"msg_id": "x"},
|
||||||
|
{"msg_id": "x", "mailbox_key": "y"},
|
||||||
|
"not an object at all",
|
||||||
|
):
|
||||||
|
result = relay.accept_replica(envelope=bad)
|
||||||
|
assert result["ok"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ``envelope_for_replication`` -- helper for the outbound replication path
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnvelopeForReplication:
|
||||||
|
def test_returns_envelope_for_stored_message(self, relay):
|
||||||
|
r = _deposit(relay, ciphertext="hello-ciphertext")
|
||||||
|
msg_id = r["msg_id"]
|
||||||
|
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||||
|
|
||||||
|
env = relay.envelope_for_replication(mailbox_key=mailbox_key, msg_id=msg_id)
|
||||||
|
assert env is not None
|
||||||
|
assert env["msg_id"] == msg_id
|
||||||
|
assert env["mailbox_key"] == mailbox_key
|
||||||
|
assert env["ciphertext"] == "hello-ciphertext"
|
||||||
|
assert env["delivery_class"] == "shared"
|
||||||
|
for k in ("msg_id", "mailbox_key", "sender_block_ref", "ciphertext"):
|
||||||
|
assert env.get(k), f"envelope missing required field {k!r}"
|
||||||
|
|
||||||
|
def test_returns_none_for_unknown_message(self, relay):
|
||||||
|
env = relay.envelope_for_replication(
|
||||||
|
mailbox_key="never_existed", msg_id="never_existed",
|
||||||
|
)
|
||||||
|
assert env is None
|
||||||
|
|
||||||
|
def test_envelope_round_trips_through_accept_replica(self, relay):
|
||||||
|
from services.mesh.mesh_dm_relay import DMRelay
|
||||||
|
receiver_relay = DMRelay()
|
||||||
|
receiver_relay._mailboxes.clear()
|
||||||
|
receiver_relay._stats = {"messages_in_memory": 0}
|
||||||
|
|
||||||
|
r = _deposit(relay)
|
||||||
|
msg_id = r["msg_id"]
|
||||||
|
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||||
|
env = relay.envelope_for_replication(
|
||||||
|
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||||
|
)
|
||||||
|
assert env is not None
|
||||||
|
|
||||||
|
result = receiver_relay.accept_replica(envelope=env)
|
||||||
|
assert result["ok"] is True
|
||||||
|
stored = receiver_relay._mailboxes.get(mailbox_key, [])
|
||||||
|
assert len(stored) == 1
|
||||||
|
assert stored[0].msg_id == msg_id
|
||||||
|
assert stored[0].ciphertext == "ciphertext-blob"
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""POST /api/mesh/dm/replicate-envelope — receiving side of cross-node DM
|
||||||
|
mailbox replication.
|
||||||
|
|
||||||
|
This is the endpoint that peer relays call when they want to hand off an
|
||||||
|
encrypted DM envelope to us (so the recipient can log into our node and
|
||||||
|
find their messages). It re-enforces the per-(sender, recipient) anti-spam
|
||||||
|
cap so hostile sender relays can't widen the cap by skipping the local
|
||||||
|
check on their own deposit path.
|
||||||
|
|
||||||
|
The endpoint:
|
||||||
|
|
||||||
|
* authenticates the caller via the existing per-peer HMAC pattern
|
||||||
|
(same one /api/mesh/infonet/peer-push and /api/mesh/gate/peer-push
|
||||||
|
use, introduced in #256 — ``X-Peer-Url`` + ``X-Peer-HMAC`` headers
|
||||||
|
keyed off ``resolve_peer_key_for_url``)
|
||||||
|
* rejects bodies > 64 KB (DM envelope size is bounded by
|
||||||
|
``MESH_DM_MAX_MSG_BYTES`` — 64KB ceiling has generous headroom)
|
||||||
|
* rejects requests without a valid peer HMAC with 403
|
||||||
|
* passes the envelope to ``DMRelay.accept_replica`` which enforces
|
||||||
|
the cap
|
||||||
|
|
||||||
|
This file pins the endpoint contract. The cap enforcement itself is
|
||||||
|
tested in ``test_dm_relay_per_sender_cap.py`` against the relay's
|
||||||
|
``accept_replica`` method directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def remote_client():
|
||||||
|
"""ASGI client with peer IP 1.2.3.4 — never on the local-operator
|
||||||
|
allowlist. Used to prove the endpoint isn't accidentally reachable
|
||||||
|
by random remote callers without peer HMAC."""
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
class _RemoteClient:
|
||||||
|
def __init__(self):
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
self._transport = ASGITransport(app=app, client=("1.2.3.4", 12345))
|
||||||
|
self._base = "http://1.2.3.4:8000"
|
||||||
|
|
||||||
|
def post(self, url, **kw):
|
||||||
|
async def go():
|
||||||
|
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||||
|
return await ac.post(url, **kw)
|
||||||
|
return self._loop.run_until_complete(go())
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._loop.close()
|
||||||
|
|
||||||
|
c = _RemoteClient()
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestReplicateEndpointAuth:
|
||||||
|
def test_rejects_request_without_peer_hmac(self, remote_client):
|
||||||
|
"""A peer push that does NOT carry X-Peer-Url + X-Peer-HMAC
|
||||||
|
must be rejected with 403 before the envelope is ever passed
|
||||||
|
to the relay. Same gate the existing infonet/gate peer-push
|
||||||
|
endpoints enforce."""
|
||||||
|
payload = {
|
||||||
|
"envelope": {
|
||||||
|
"msg_id": "dm_unauth_1",
|
||||||
|
"mailbox_key": "mb",
|
||||||
|
"sender_block_ref": "sender",
|
||||||
|
"ciphertext": "x",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
r = remote_client.post(
|
||||||
|
"/api/mesh/dm/replicate-envelope",
|
||||||
|
json=payload,
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert "peer HMAC" in r.text or "peer hmac" in r.text.lower()
|
||||||
|
|
||||||
|
def test_rejects_wrong_peer_hmac(self, remote_client, monkeypatch):
|
||||||
|
"""A request with a peer HMAC header keyed off the WRONG secret
|
||||||
|
is rejected. Confirms the HMAC is actually verified — a tampered
|
||||||
|
body or a key-substitution attack doesn't sneak through."""
|
||||||
|
# Plant a known peer secret. The request will sign with a
|
||||||
|
# DIFFERENT key, so verification must fail.
|
||||||
|
from services.config import get_settings
|
||||||
|
monkeypatch.setenv("MESH_PEER_PUSH_SECRET", "real-secret-32-chars-min-padding-padding")
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
body = json.dumps({
|
||||||
|
"envelope": {
|
||||||
|
"msg_id": "dm_wronghmac",
|
||||||
|
"mailbox_key": "mb",
|
||||||
|
"sender_block_ref": "sender",
|
||||||
|
"ciphertext": "x",
|
||||||
|
},
|
||||||
|
}).encode("utf-8")
|
||||||
|
wrong_hmac = hmac.new(b"wrong-key", body, hashlib.sha256).hexdigest()
|
||||||
|
r = remote_client.post(
|
||||||
|
"/api/mesh/dm/replicate-envelope",
|
||||||
|
content=body,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Peer-Url": "http://example-peer.onion:8000",
|
||||||
|
"X-Peer-HMAC": wrong_hmac,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
|
||||||
|
def test_rejects_oversize_body(self, remote_client):
|
||||||
|
"""64 KB ceiling — anything bigger doesn't even get parsed.
|
||||||
|
Defends against memory amplification via giant ciphertexts."""
|
||||||
|
# 100 KB body is well over the 64 KB cap.
|
||||||
|
big = b"{" + b"x" * 100_000 + b"}"
|
||||||
|
r = remote_client.post(
|
||||||
|
"/api/mesh/dm/replicate-envelope",
|
||||||
|
content=big,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Content-Length": str(len(big)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert r.status_code in (400, 413), (
|
||||||
|
f"oversize body should be rejected with 400/413, got {r.status_code}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestReplicateEndpointRegistered:
|
||||||
|
def test_route_present_in_app(self):
|
||||||
|
"""Static check that the route is actually wired into the app.
|
||||||
|
Catches a future refactor that drops the router include or
|
||||||
|
deletes the endpoint by accident."""
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
paths_methods = set()
|
||||||
|
for route in app.routes:
|
||||||
|
path = getattr(route, "path", None)
|
||||||
|
methods = getattr(route, "methods", set()) or set()
|
||||||
|
for m in methods:
|
||||||
|
paths_methods.add((m, path))
|
||||||
|
|
||||||
|
assert ("POST", "/api/mesh/dm/replicate-envelope") in paths_methods, (
|
||||||
|
"POST /api/mesh/dm/replicate-envelope is not registered on the app"
|
||||||
|
)
|
||||||
@@ -0,0 +1,196 @@
|
|||||||
|
"""Issue #250 (tg12): Docker bridge local-operator trust must be bound to
|
||||||
|
the frontend container's hostname, not the entire 172.16.0.0/12 range.
|
||||||
|
|
||||||
|
Previous behavior trusted ANY private-RFC1918 source IP on the bridge
|
||||||
|
when ``SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR=1``. On a shared
|
||||||
|
Docker host this granted local-operator privileges to any other
|
||||||
|
container that could route to the backend's bridge — far broader than
|
||||||
|
intended.
|
||||||
|
|
||||||
|
The fix narrows trust to source IPs that forward-resolve from one of the
|
||||||
|
configured frontend container hostnames (default: the compose service
|
||||||
|
name ``frontend`` plus the explicit ``container_name``
|
||||||
|
``shadowbroker-frontend``). Operators with renamed containers can list
|
||||||
|
the new names in ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS``.
|
||||||
|
|
||||||
|
These tests exercise the resolution helpers directly so that we don't
|
||||||
|
need a live Docker daemon to validate the contract.
|
||||||
|
"""
|
||||||
|
import socket
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _trusted_bridge_frontend_hostnames — env parsing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestTrustedHostnameParsing:
|
||||||
|
def _fn(self):
|
||||||
|
from auth import _trusted_bridge_frontend_hostnames
|
||||||
|
return _trusted_bridge_frontend_hostnames
|
||||||
|
|
||||||
|
def test_default_covers_compose_service_and_container_name(self):
|
||||||
|
with patch.dict("os.environ", {}, clear=False):
|
||||||
|
# Make sure the env var is not set so we exercise the default.
|
||||||
|
import os
|
||||||
|
os.environ.pop("SHADOWBROKER_TRUSTED_FRONTEND_HOSTS", None)
|
||||||
|
assert self._fn()() == ["frontend", "shadowbroker-frontend"]
|
||||||
|
|
||||||
|
def test_custom_list_via_env(self):
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": "my-ui,alt-frontend"},
|
||||||
|
):
|
||||||
|
assert self._fn()() == ["my-ui", "alt-frontend"]
|
||||||
|
|
||||||
|
def test_whitespace_trimmed(self):
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": " my-ui , alt-frontend "},
|
||||||
|
):
|
||||||
|
assert self._fn()() == ["my-ui", "alt-frontend"]
|
||||||
|
|
||||||
|
def test_empty_env_falls_back_to_default(self):
|
||||||
|
# An empty string still falls back to the bundled defaults so a
|
||||||
|
# misconfigured env var doesn't silently dismantle bridge trust.
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS": ""},
|
||||||
|
):
|
||||||
|
# Per docs: empty string sets the env var to "" so os.environ.get
|
||||||
|
# returns "" — that string is parsed and yields []. We assert
|
||||||
|
# that empty parse yields [] (caller fail-closes from there).
|
||||||
|
assert self._fn()() == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _resolve_trusted_bridge_ips — DNS resolution with cache + fail-closed
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestResolveTrustedBridgeIps:
|
||||||
|
def setup_method(self):
|
||||||
|
# Reset the module-level cache before each test so prior tests
|
||||||
|
# don't bleed state across cases.
|
||||||
|
from auth import _DOCKER_BRIDGE_TRUST_CACHE
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE["ips"] = frozenset()
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||||
|
|
||||||
|
def test_resolves_configured_hostnames(self):
|
||||||
|
from auth import _resolve_trusted_bridge_ips
|
||||||
|
|
||||||
|
def fake_gethostbyname_ex(host):
|
||||||
|
mapping = {
|
||||||
|
"frontend": ("frontend", [], ["172.18.0.3"]),
|
||||||
|
"shadowbroker-frontend": ("shadowbroker-frontend", [], ["172.18.0.3", "172.18.0.4"]),
|
||||||
|
}
|
||||||
|
if host not in mapping:
|
||||||
|
raise socket.gaierror("no such host")
|
||||||
|
return mapping[host]
|
||||||
|
|
||||||
|
with patch("socket.gethostbyname_ex", side_effect=fake_gethostbyname_ex):
|
||||||
|
ips = _resolve_trusted_bridge_ips()
|
||||||
|
assert ips == frozenset({"172.18.0.3", "172.18.0.4"})
|
||||||
|
|
||||||
|
def test_fail_closed_when_dns_returns_nothing(self):
|
||||||
|
from auth import _resolve_trusted_bridge_ips
|
||||||
|
|
||||||
|
def always_fail(host):
|
||||||
|
raise socket.gaierror("no resolver")
|
||||||
|
|
||||||
|
with patch("socket.gethostbyname_ex", side_effect=always_fail):
|
||||||
|
ips = _resolve_trusted_bridge_ips()
|
||||||
|
assert ips == frozenset()
|
||||||
|
|
||||||
|
def test_partial_resolution_is_kept(self):
|
||||||
|
"""If one hostname resolves and another fails, we keep the
|
||||||
|
successful one rather than discarding the whole set."""
|
||||||
|
from auth import _resolve_trusted_bridge_ips
|
||||||
|
|
||||||
|
def partial(host):
|
||||||
|
if host == "frontend":
|
||||||
|
return ("frontend", [], ["172.18.0.3"])
|
||||||
|
raise socket.gaierror("missing")
|
||||||
|
|
||||||
|
with patch("socket.gethostbyname_ex", side_effect=partial):
|
||||||
|
ips = _resolve_trusted_bridge_ips()
|
||||||
|
assert ips == frozenset({"172.18.0.3"})
|
||||||
|
|
||||||
|
def test_cache_short_circuits_repeated_dns_calls(self):
|
||||||
|
from auth import _resolve_trusted_bridge_ips
|
||||||
|
|
||||||
|
call_count = {"n": 0}
|
||||||
|
|
||||||
|
def counting(host):
|
||||||
|
call_count["n"] += 1
|
||||||
|
return ("frontend", [], ["172.18.0.3"])
|
||||||
|
|
||||||
|
with patch("socket.gethostbyname_ex", side_effect=counting):
|
||||||
|
_resolve_trusted_bridge_ips()
|
||||||
|
calls_after_first = call_count["n"]
|
||||||
|
_resolve_trusted_bridge_ips()
|
||||||
|
_resolve_trusted_bridge_ips()
|
||||||
|
# Second + third calls hit the cache, not the DNS stub.
|
||||||
|
assert call_count["n"] == calls_after_first
|
||||||
|
|
||||||
|
def test_cache_expires(self):
|
||||||
|
from auth import _resolve_trusted_bridge_ips, _DOCKER_BRIDGE_TRUST_CACHE
|
||||||
|
|
||||||
|
with patch("socket.gethostbyname_ex", return_value=("frontend", [], ["172.18.0.3"])):
|
||||||
|
_resolve_trusted_bridge_ips()
|
||||||
|
# Force expiry.
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||||
|
with patch("socket.gethostbyname_ex", return_value=("frontend", [], ["172.18.0.9"])) as stub:
|
||||||
|
ips = _resolve_trusted_bridge_ips()
|
||||||
|
assert stub.called
|
||||||
|
assert "172.18.0.9" in ips
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _is_docker_bridge_host — composite of the helpers above
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestIsDockerBridgeHost:
|
||||||
|
def setup_method(self):
|
||||||
|
from auth import _DOCKER_BRIDGE_TRUST_CACHE
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE["ips"] = frozenset()
|
||||||
|
_DOCKER_BRIDGE_TRUST_CACHE["expires"] = 0.0
|
||||||
|
|
||||||
|
def test_trusts_resolved_frontend_ip(self):
|
||||||
|
from auth import _is_docker_bridge_host
|
||||||
|
|
||||||
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert _is_docker_bridge_host("172.18.0.3") is True
|
||||||
|
|
||||||
|
def test_rejects_arbitrary_bridge_ip(self):
|
||||||
|
"""A rogue container on the same bridge but at a different IP
|
||||||
|
must NOT be trusted, even though it falls in 172.16.0.0/12."""
|
||||||
|
from auth import _is_docker_bridge_host
|
||||||
|
|
||||||
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert _is_docker_bridge_host("172.18.0.99") is False
|
||||||
|
|
||||||
|
def test_rejects_public_ip_without_dns_work(self):
|
||||||
|
"""Public IPs skip DNS resolution entirely (perf + safety)."""
|
||||||
|
from auth import _is_docker_bridge_host
|
||||||
|
|
||||||
|
with patch("auth._resolve_trusted_bridge_ips") as stub:
|
||||||
|
assert _is_docker_bridge_host("8.8.8.8") is False
|
||||||
|
stub.assert_not_called()
|
||||||
|
|
||||||
|
def test_rejects_non_ip_input(self):
|
||||||
|
from auth import _is_docker_bridge_host
|
||||||
|
|
||||||
|
assert _is_docker_bridge_host("") is False
|
||||||
|
assert _is_docker_bridge_host("not-an-ip") is False
|
||||||
|
assert _is_docker_bridge_host("frontend") is False
|
||||||
|
|
||||||
|
def test_fails_closed_when_dns_returns_empty(self):
|
||||||
|
"""If Docker DNS can't resolve any frontend hostname, the bridge
|
||||||
|
is not trusted — even for IPs that would have been trusted under
|
||||||
|
the old 172.16.0.0/12 blanket policy."""
|
||||||
|
from auth import _is_docker_bridge_host
|
||||||
|
|
||||||
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset()):
|
||||||
|
assert _is_docker_bridge_host("172.18.0.3") is False
|
||||||
@@ -0,0 +1,258 @@
|
|||||||
|
"""Cumulative fuel/CO2 tracking via per-aircraft observation timestamps.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Users want the running total of fuel burned per aircraft — not just the
|
||||||
|
rate. We track first-seen-at per icao24 and multiply elapsed observation
|
||||||
|
time by the model-based rate. This module's job is exclusively the
|
||||||
|
timestamp bookkeeping; multiplication happens in the flights/military
|
||||||
|
fetchers.
|
||||||
|
|
||||||
|
These tests pin:
|
||||||
|
|
||||||
|
* First sighting returns 0 (no airtime yet).
|
||||||
|
* Repeated sightings within ``REOPEN_GAP_S`` accumulate elapsed time.
|
||||||
|
* Gap longer than ``REOPEN_GAP_S`` resets the session (plane landed
|
||||||
|
and took off again — different flight).
|
||||||
|
* ``MAX_SESSION_SECONDS`` clamp protects against clock skew bugs.
|
||||||
|
* ``prune()`` drops stale entries.
|
||||||
|
* ``get_session_seconds`` reads without bumping last_seen.
|
||||||
|
* Empty / None icao input is a defensive no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset_observations():
|
||||||
|
from services.fetchers import flight_observations as obs
|
||||||
|
obs._reset_for_tests()
|
||||||
|
yield
|
||||||
|
obs._reset_for_tests()
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecordObservation:
|
||||||
|
def test_first_sighting_returns_zero(self):
|
||||||
|
from services.fetchers.flight_observations import record_observation
|
||||||
|
assert record_observation("a12345", now=1000.0) == 0
|
||||||
|
|
||||||
|
def test_repeated_sightings_accumulate(self):
|
||||||
|
"""ADS-B refreshes every ~minute in practice, so each observation
|
||||||
|
is within ``REOPEN_GAP_S`` (15 min) of the last and we keep
|
||||||
|
accumulating. Walking the timestamps in 5-minute steps so we
|
||||||
|
stay inside the reopen window the whole way."""
|
||||||
|
from services.fetchers.flight_observations import record_observation
|
||||||
|
record_observation("a12345", now=1000.0)
|
||||||
|
# 1 minute later (within REOPEN_GAP_S)
|
||||||
|
assert record_observation("a12345", now=1060.0) == 60
|
||||||
|
# Step through 5-minute spaced refreshes — first_seen_at stays
|
||||||
|
# at 1000.0 the whole time, and we approach a 1-hour airtime.
|
||||||
|
assert record_observation("a12345", now=1360.0) == 360
|
||||||
|
assert record_observation("a12345", now=1660.0) == 660
|
||||||
|
assert record_observation("a12345", now=1960.0) == 960
|
||||||
|
assert record_observation("a12345", now=2260.0) == 1260
|
||||||
|
assert record_observation("a12345", now=2560.0) == 1560
|
||||||
|
assert record_observation("a12345", now=2860.0) == 1860
|
||||||
|
assert record_observation("a12345", now=3160.0) == 2160
|
||||||
|
assert record_observation("a12345", now=3460.0) == 2460
|
||||||
|
assert record_observation("a12345", now=3760.0) == 2760
|
||||||
|
assert record_observation("a12345", now=4060.0) == 3060
|
||||||
|
assert record_observation("a12345", now=4360.0) == 3360
|
||||||
|
# 1 hour after first sighting — still inside the 15-min reopen
|
||||||
|
# window from the prior 4360 observation.
|
||||||
|
assert record_observation("a12345", now=4600.0) == 3600
|
||||||
|
|
||||||
|
def test_gap_longer_than_reopen_resets_session(self):
|
||||||
|
"""If a hex hasn't been seen in ``REOPEN_GAP_S`` (15 min default),
|
||||||
|
the next sighting is treated as a new flight — first_seen_at resets."""
|
||||||
|
from services.fetchers.flight_observations import record_observation
|
||||||
|
record_observation("a12345", now=1000.0)
|
||||||
|
record_observation("a12345", now=1500.0) # 500s later — within gap
|
||||||
|
# Now 20 minutes of silence (1200s > 900s threshold) → session reset.
|
||||||
|
assert record_observation("a12345", now=2700.0) == 0
|
||||||
|
# And the next quick sighting starts accumulating from 2700 again.
|
||||||
|
assert record_observation("a12345", now=2760.0) == 60
|
||||||
|
|
||||||
|
def test_session_clamp(self):
|
||||||
|
"""Clock skew protection: when a hex has been continuously
|
||||||
|
observed for longer than ``MAX_SESSION_SECONDS``, clamp.
|
||||||
|
|
||||||
|
Synthesizes the state directly because driving 86,400+ seconds of
|
||||||
|
observations through the public API in a test would take 1000+
|
||||||
|
REOPEN_GAP_S-respecting steps.
|
||||||
|
"""
|
||||||
|
from services.fetchers import flight_observations as obs
|
||||||
|
from services.fetchers.flight_observations import _observations, _lock
|
||||||
|
|
||||||
|
# last_seen_at very recent so REOPEN_GAP_S branch does NOT fire,
|
||||||
|
# but first_seen_at way in the past so the elapsed math overflows
|
||||||
|
# MAX_SESSION_SECONDS. Clamp must kick in.
|
||||||
|
big_now = float(obs.MAX_SESSION_SECONDS + 1_000_000)
|
||||||
|
with _lock:
|
||||||
|
_observations["a12345"] = {
|
||||||
|
"first_seen_at": 0.0,
|
||||||
|
"last_seen_at": big_now - 60, # 60s ago — well inside gap window
|
||||||
|
}
|
||||||
|
elapsed = obs.record_observation("a12345", now=big_now)
|
||||||
|
assert elapsed == obs.MAX_SESSION_SECONDS, (
|
||||||
|
f"elapsed must be clamped to MAX_SESSION_SECONDS; got {elapsed}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_empty_input_returns_zero(self):
|
||||||
|
from services.fetchers.flight_observations import record_observation
|
||||||
|
assert record_observation("") == 0
|
||||||
|
assert record_observation(None) == 0 # type: ignore[arg-type]
|
||||||
|
assert record_observation(" ") == 0
|
||||||
|
|
||||||
|
def test_case_insensitive_key(self):
|
||||||
|
"""ICAO24 hex codes are case-insensitive — adsb.lol lowercases
|
||||||
|
them, OpenSky may not. Normalize so both refer to the same airframe."""
|
||||||
|
from services.fetchers.flight_observations import record_observation
|
||||||
|
record_observation("A12345", now=1000.0)
|
||||||
|
# Different case must hit the same entry.
|
||||||
|
assert record_observation("a12345", now=1060.0) == 60
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetSessionSeconds:
|
||||||
|
def test_read_only_does_not_bump(self):
|
||||||
|
from services.fetchers.flight_observations import (
|
||||||
|
record_observation,
|
||||||
|
get_session_seconds,
|
||||||
|
)
|
||||||
|
record_observation("a12345", now=1000.0)
|
||||||
|
record_observation("a12345", now=1060.0) # bumps last_seen
|
||||||
|
|
||||||
|
# Now read at t=2000. Without bumping, gap=2000-1060=940 > 900,
|
||||||
|
# so a recording call would reset. But the read should NOT reset.
|
||||||
|
seconds_at_2000 = get_session_seconds("a12345", now=2000.0)
|
||||||
|
assert seconds_at_2000 == 1000, (
|
||||||
|
f"read should return 2000-1000=1000s; got {seconds_at_2000}"
|
||||||
|
)
|
||||||
|
# Verify the next recording at t=2001 still resets (gap > 900s
|
||||||
|
# from the read above — proves the read didn't bump last_seen).
|
||||||
|
from services.fetchers.flight_observations import record_observation as rec
|
||||||
|
assert rec("a12345", now=2001.0) == 0 # session reset
|
||||||
|
|
||||||
|
def test_unknown_hex_returns_zero(self):
|
||||||
|
from services.fetchers.flight_observations import get_session_seconds
|
||||||
|
assert get_session_seconds("nonexistent") == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestPrune:
|
||||||
|
def test_drops_stale_entries(self):
|
||||||
|
from services.fetchers import flight_observations as obs
|
||||||
|
|
||||||
|
obs.record_observation("active", now=10_000.0)
|
||||||
|
obs.record_observation("stale", now=1.0)
|
||||||
|
|
||||||
|
dropped = obs.prune(now=10_000.0)
|
||||||
|
|
||||||
|
assert dropped == 1
|
||||||
|
# Active entry survives:
|
||||||
|
assert obs.get_session_seconds("active", now=10_001.0) == 1
|
||||||
|
# Stale entry was dropped — next obs starts fresh:
|
||||||
|
assert obs.record_observation("stale", now=10_002.0) == 0
|
||||||
|
|
||||||
|
def test_no_op_when_nothing_stale(self):
|
||||||
|
from services.fetchers import flight_observations as obs
|
||||||
|
obs.record_observation("hex1", now=1000.0)
|
||||||
|
obs.record_observation("hex2", now=1000.0)
|
||||||
|
|
||||||
|
dropped = obs.prune(now=1500.0)
|
||||||
|
|
||||||
|
assert dropped == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: emissions enrichment in _classify_and_publish honors the
|
||||||
|
# cumulative tracker.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestEmissionsCumulativeIntegration:
|
||||||
|
def _reset_store(self):
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
with _data_lock:
|
||||||
|
for key in (
|
||||||
|
"flights", "commercial_flights", "private_flights",
|
||||||
|
"private_jets", "military_flights", "tracked_flights",
|
||||||
|
):
|
||||||
|
latest_data[key] = []
|
||||||
|
|
||||||
|
def test_first_publish_zero_cumulative(self, monkeypatch):
|
||||||
|
"""On the first observation, cumulative values are 0 — but the
|
||||||
|
rate fields and observed_seconds are still present in the dict."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish([
|
||||||
|
{
|
||||||
|
"hex": "test001",
|
||||||
|
"flight": "JBU711",
|
||||||
|
"r": "N1",
|
||||||
|
"t": "C172", # Cessna 172, 9 GPH
|
||||||
|
"lat": 40.0,
|
||||||
|
"lon": -100.0,
|
||||||
|
"alt_baro": 3000,
|
||||||
|
"gs": 100,
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert len(published) == 1
|
||||||
|
emi = published[0].get("emissions")
|
||||||
|
assert emi is not None
|
||||||
|
assert emi["fuel_gph"] == 9
|
||||||
|
assert emi["observed_seconds"] == 0
|
||||||
|
assert emi["fuel_gallons_burned"] == 0.0
|
||||||
|
assert emi["co2_kg_emitted"] == 0.0
|
||||||
|
|
||||||
|
def test_second_publish_accumulates(self, monkeypatch):
|
||||||
|
"""Publishing the same hex a second time picks up real elapsed time
|
||||||
|
and produces non-zero cumulative values."""
|
||||||
|
import time as _time_real
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers import flight_observations as obs
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
# Manually seed an observation 1 hour in the past so the next
|
||||||
|
# publish picks up ~3600s elapsed.
|
||||||
|
with obs._lock:
|
||||||
|
obs._observations["test002"] = {
|
||||||
|
"first_seen_at": _time_real.time() - 3600,
|
||||||
|
"last_seen_at": _time_real.time() - 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
flights_module._classify_and_publish([
|
||||||
|
{
|
||||||
|
"hex": "test002",
|
||||||
|
"flight": "JBU711",
|
||||||
|
"r": "N1",
|
||||||
|
"t": "C172", # 9 GPH
|
||||||
|
"lat": 40.0,
|
||||||
|
"lon": -100.0,
|
||||||
|
"alt_baro": 3000,
|
||||||
|
"gs": 100,
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert len(published) == 1
|
||||||
|
emi = published[0].get("emissions")
|
||||||
|
# Roughly 1 hour observed → 9 gal burned.
|
||||||
|
assert 3500 <= emi["observed_seconds"] <= 3700
|
||||||
|
assert 8.7 <= emi["fuel_gallons_burned"] <= 9.3
|
||||||
|
# CO2 = 9 gph * 9.57 kg/gal = 86.1 kg/hr.
|
||||||
|
assert 84 <= emi["co2_kg_emitted"] <= 88
|
||||||
@@ -0,0 +1,354 @@
|
|||||||
|
"""Per-flight source attribution.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Pre-fix, adsb.lol records (the primary source for most flights) carried
|
||||||
|
no source marker. OpenSky records got ``is_opensky: True`` and
|
||||||
|
supplementals got ``supplemental_source``, so any UI that wanted to show
|
||||||
|
which provider a flight came from saw OpenSky/airplanes.live records as
|
||||||
|
explicitly tagged and adsb.lol records as "unlabeled" — making it look
|
||||||
|
like adsb.lol wasn't even being used.
|
||||||
|
|
||||||
|
This caused user confusion ("only military planes have adsb.lol
|
||||||
|
telemetry") that was diagnostic noise, not a real bug. The actual fix:
|
||||||
|
stamp ``source`` at every fetch site so the downstream consumer can
|
||||||
|
attribute the provider with no guesswork.
|
||||||
|
|
||||||
|
These tests pin:
|
||||||
|
|
||||||
|
* adsb.lol regional records get ``source: "adsb.lol"`` at fetch time
|
||||||
|
(synthesized via the published flight dict).
|
||||||
|
* OpenSky records get ``source: "OpenSky"`` (alongside the existing
|
||||||
|
``is_opensky: True`` for backwards compat).
|
||||||
|
* Supplementals (airplanes.live, adsb.fi) flow through with their
|
||||||
|
``supplemental_source`` honored.
|
||||||
|
* The military fetcher tags ``source`` on military_flights and uavs.
|
||||||
|
* The published flight dict carries ``source`` so downstream code
|
||||||
|
can render attribution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _classify_and_publish — source field flows into published flight dict
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassifyAndPublishSource:
|
||||||
|
def _reset_store(self):
|
||||||
|
"""Clear store before each test so we get deterministic state."""
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
with _data_lock:
|
||||||
|
for key in (
|
||||||
|
"flights", "commercial_flights", "private_flights",
|
||||||
|
"private_jets", "military_flights", "tracked_flights",
|
||||||
|
):
|
||||||
|
latest_data[key] = []
|
||||||
|
return latest_data
|
||||||
|
|
||||||
|
def test_adsb_lol_record_tagged_in_published_flight(self, monkeypatch):
|
||||||
|
"""A raw adsb.lol record (carrying ``source: 'adsb.lol'`` from the
|
||||||
|
fetch site) flows through ``_classify_and_publish`` and the
|
||||||
|
published flight dict carries the same ``source`` field."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
|
||||||
|
# Patch route + type lookups so they don't try to hit the network.
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"hex": "ad7701",
|
||||||
|
"flight": "JBU711",
|
||||||
|
"r": "N967JT",
|
||||||
|
"t": "A321",
|
||||||
|
"lat": 40.0,
|
||||||
|
"lon": -100.0,
|
||||||
|
"alt_baro": 36000,
|
||||||
|
"gs": 401.6,
|
||||||
|
"nac_p": 9,
|
||||||
|
"source": "adsb.lol", # stamped at fetch site
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert len(published) == 1
|
||||||
|
assert published[0]["source"] == "adsb.lol"
|
||||||
|
# nac_p still flows through too — sanity check that adding source
|
||||||
|
# didn't break the existing GPS jamming signal.
|
||||||
|
assert published[0]["nac_p"] == 9
|
||||||
|
|
||||||
|
def test_opensky_record_tagged_in_published_flight(self, monkeypatch):
|
||||||
|
"""OpenSky-sourced records carry ``source: 'OpenSky'`` (plus the
|
||||||
|
existing ``is_opensky: True`` for back-compat)."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"hex": "a12345",
|
||||||
|
"flight": "UAL100",
|
||||||
|
"r": "N100UA",
|
||||||
|
"t": "Unknown",
|
||||||
|
"lat": 41.0,
|
||||||
|
"lon": -87.0,
|
||||||
|
"alt_baro": 35000,
|
||||||
|
"gs": 450,
|
||||||
|
# No nac_p — OpenSky doesn't carry it.
|
||||||
|
"is_opensky": True,
|
||||||
|
"source": "OpenSky",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert len(published) == 1
|
||||||
|
assert published[0]["source"] == "OpenSky"
|
||||||
|
|
||||||
|
def test_supplemental_source_propagates(self, monkeypatch):
|
||||||
|
"""Supplemental records (airplanes.live, adsb.fi) have their
|
||||||
|
legacy ``supplemental_source`` field promoted to the unified
|
||||||
|
``source`` field in the published dict — so consumers don't have
|
||||||
|
to inspect two different keys."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"hex": "b22222",
|
||||||
|
"flight": "DAL200",
|
||||||
|
"r": "N200DL",
|
||||||
|
"t": "B738",
|
||||||
|
"lat": 42.0,
|
||||||
|
"lon": -90.0,
|
||||||
|
"alt_baro": 32000,
|
||||||
|
"gs": 420,
|
||||||
|
"supplemental_source": "airplanes.live",
|
||||||
|
# No explicit "source" — should fall through to
|
||||||
|
# supplemental_source.
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert len(published) == 1
|
||||||
|
assert published[0]["source"] == "airplanes.live"
|
||||||
|
|
||||||
|
def test_explicit_source_wins_over_supplemental_source(self, monkeypatch):
|
||||||
|
"""If both fields are present, explicit ``source`` wins (it's the
|
||||||
|
newer canonical tag)."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"hex": "c33333",
|
||||||
|
"flight": "AAL300",
|
||||||
|
"r": "N300AA",
|
||||||
|
"t": "A321",
|
||||||
|
"lat": 33.0,
|
||||||
|
"lon": -97.0,
|
||||||
|
"alt_baro": 34000,
|
||||||
|
"gs": 430,
|
||||||
|
"source": "adsb.lol",
|
||||||
|
"supplemental_source": "adsb.fi",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert published[0]["source"] == "adsb.lol"
|
||||||
|
|
||||||
|
def test_untagged_record_defaults_to_adsb_lol(self, monkeypatch):
|
||||||
|
"""A record with neither ``source`` nor ``supplemental_source``
|
||||||
|
(e.g. synthesized by a test, or a fetcher that hasn't been
|
||||||
|
migrated yet) defaults to ``"adsb.lol"`` since that's been the
|
||||||
|
primary source historically. Defensive default — better than
|
||||||
|
empty string."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
self._reset_store()
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||||
|
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||||
|
|
||||||
|
flights_module._classify_and_publish(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"hex": "d44444",
|
||||||
|
"flight": "SWA400",
|
||||||
|
"r": "N400SW",
|
||||||
|
"t": "B737",
|
||||||
|
"lat": 32.0,
|
||||||
|
"lon": -110.0,
|
||||||
|
"alt_baro": 30000,
|
||||||
|
"gs": 410,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
published = list(latest_data.get("flights", []))
|
||||||
|
assert published[0]["source"] == "adsb.lol"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# adsb.lol regional fetcher tags at fetch time
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestAdsbLolRegionalTagging:
|
||||||
|
def test_fetch_region_stamps_source_on_each_aircraft(self, monkeypatch):
|
||||||
|
"""The wrapper around the adsb.lol regional endpoint stamps
|
||||||
|
``source: 'adsb.lol'`` on every record before returning, so the
|
||||||
|
downstream merge step sees attribution survive even when the
|
||||||
|
record gets reshuffled (e.g. dedupe-by-hex during OpenSky merge)."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
|
||||||
|
# Fake response — 3 aircraft, none have a source field originally.
|
||||||
|
class FakeResp:
|
||||||
|
status_code = 200
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return {
|
||||||
|
"ac": [
|
||||||
|
{"hex": "a1", "lat": 40.0, "lon": -100.0, "nac_p": 8},
|
||||||
|
{"hex": "a2", "lat": 40.1, "lon": -100.1, "nac_p": 9},
|
||||||
|
{"hex": "a3", "lat": 40.2, "lon": -100.2, "nac_p": 10},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||||
|
)
|
||||||
|
|
||||||
|
results = flights_module._fetch_adsb_lol_regions()
|
||||||
|
|
||||||
|
assert len(results) >= 3
|
||||||
|
# Every aircraft we got back must be tagged.
|
||||||
|
sources = {a.get("source") for a in results}
|
||||||
|
assert sources == {"adsb.lol"}, (
|
||||||
|
f"adsb.lol regional fetcher must stamp source on every record; "
|
||||||
|
f"got: {sources}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_fetch_region_failure_returns_empty_without_crashing(self, monkeypatch):
|
||||||
|
"""If adsb.lol returns non-200, the fetcher returns [] gracefully —
|
||||||
|
downstream code already handles this. Sanity check that the source
|
||||||
|
tagging doesn't introduce a new failure mode."""
|
||||||
|
from services.fetchers import flights as flights_module
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
status_code = 500
|
||||||
|
def json(self): return {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||||
|
)
|
||||||
|
|
||||||
|
results = flights_module._fetch_adsb_lol_regions()
|
||||||
|
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Military fetcher tags source on output dicts
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMilitarySourceTagging:
|
||||||
|
def test_military_output_carries_source_field(self, monkeypatch):
|
||||||
|
"""Each entry in ``military_flights`` should carry a ``source``
|
||||||
|
field. Pre-fix the only military attribution was inferring from
|
||||||
|
which endpoint we hit; now it's explicit."""
|
||||||
|
from services.fetchers import military as mil_module
|
||||||
|
from services.fetchers._store import latest_data, _data_lock
|
||||||
|
|
||||||
|
# Reset relevant store state.
|
||||||
|
with _data_lock:
|
||||||
|
latest_data["military_flights"] = []
|
||||||
|
latest_data["uavs"] = []
|
||||||
|
latest_data["tracked_flights"] = []
|
||||||
|
|
||||||
|
# Stub _store.is_any_active so the fetch doesn't early-return.
|
||||||
|
# The military module imports the function inline at call time,
|
||||||
|
# so we have to patch it on the _store module itself rather than
|
||||||
|
# on the military module.
|
||||||
|
from services.fetchers import _store as store_module
|
||||||
|
monkeypatch.setattr(store_module, "is_any_active", lambda *_: True)
|
||||||
|
|
||||||
|
# Stub fetch_with_curl to return one synthetic military aircraft
|
||||||
|
# from adsb.lol, none from airplanes.live.
|
||||||
|
class _RespMil:
|
||||||
|
status_code = 200
|
||||||
|
def json(self):
|
||||||
|
return {
|
||||||
|
"ac": [
|
||||||
|
{
|
||||||
|
"hex": "ae6c1d",
|
||||||
|
"flight": "CRUSH52",
|
||||||
|
"r": "170281",
|
||||||
|
"t": "C30J",
|
||||||
|
"lat": 47.594,
|
||||||
|
"lon": -124.879,
|
||||||
|
"alt_baro": 9025,
|
||||||
|
"gs": 162.8,
|
||||||
|
"track": 334.5,
|
||||||
|
"nac_p": 10,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
class _RespEmpty:
|
||||||
|
status_code = 200
|
||||||
|
def json(self):
|
||||||
|
return {"ac": []}
|
||||||
|
|
||||||
|
def _fake_fetch(url, *a, **kw):
|
||||||
|
if "adsb.lol" in url:
|
||||||
|
return _RespMil()
|
||||||
|
return _RespEmpty()
|
||||||
|
|
||||||
|
monkeypatch.setattr(mil_module, "fetch_with_curl", _fake_fetch)
|
||||||
|
# Stubs for downstream enrichments that try to hit external state.
|
||||||
|
monkeypatch.setattr(mil_module, "enrich_with_plane_alert", lambda mf: None)
|
||||||
|
monkeypatch.setattr(mil_module, "_enrich_country", lambda hex_, flag: ("US", "USAF"))
|
||||||
|
monkeypatch.setattr(mil_module, "_classify_military_type", lambda t: "transport")
|
||||||
|
monkeypatch.setattr(mil_module, "_classify_uav", lambda m, c: (False, "", ""))
|
||||||
|
monkeypatch.setattr(mil_module, "get_emissions_info", lambda model: None)
|
||||||
|
monkeypatch.setattr(mil_module, "_mark_fresh", lambda *keys: None)
|
||||||
|
|
||||||
|
mil_module.fetch_military_flights()
|
||||||
|
|
||||||
|
with _data_lock:
|
||||||
|
mil_published = list(latest_data.get("military_flights", []))
|
||||||
|
|
||||||
|
assert len(mil_published) == 1
|
||||||
|
assert mil_published[0]["source"] == "adsb.lol"
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
"""GDELT's ``data.gdeltproject.org`` is a CNAME to a Google Cloud Storage
|
||||||
|
bucket. GCS responds with the wildcard ``*.storage.googleapis.com``
|
||||||
|
certificate, which legitimately does NOT cover the GDELT custom
|
||||||
|
domain, so Python's TLS verification refuses the connection. Some
|
||||||
|
networks happen to route through a path where this works; many
|
||||||
|
(notably Docker Desktop's outbound NAT on local installs) do not.
|
||||||
|
|
||||||
|
The fix in ``services.geopolitics._gcs_direct_gdelt_url`` rewrites any
|
||||||
|
URL pointing at ``data.gdeltproject.org`` to its GCS-direct equivalent
|
||||||
|
(``storage.googleapis.com/data.gdeltproject.org/...``), where the
|
||||||
|
standard GCS certificate is genuinely valid. ``api.gdeltproject.org``
|
||||||
|
and every other host are left untouched.
|
||||||
|
|
||||||
|
These tests pin that behavior so a future refactor that drops the
|
||||||
|
helper or accidentally rewrites the wrong host gets a loud failure.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_data_gdeltproject_https():
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
assert _gcs_direct_gdelt_url(
|
||||||
|
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||||
|
) == "https://storage.googleapis.com/data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_data_gdeltproject_http():
|
||||||
|
"""GDELT's lastupdate.txt sometimes lists URLs with http:// — we
|
||||||
|
rewrite those too (the downstream call upgrades them to https)."""
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
assert _gcs_direct_gdelt_url(
|
||||||
|
"http://data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||||
|
) == "http://storage.googleapis.com/data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_preserve_query_string_and_path():
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
url = "https://data.gdeltproject.org/some/deep/path?a=1&b=2&c=hello%20world"
|
||||||
|
rewritten = _gcs_direct_gdelt_url(url)
|
||||||
|
assert rewritten == (
|
||||||
|
"https://storage.googleapis.com/data.gdeltproject.org"
|
||||||
|
"/some/deep/path?a=1&b=2&c=hello%20world"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_touch_api_gdeltproject_org():
|
||||||
|
"""The API host is NOT a CNAME to GCS; rewriting it would break the
|
||||||
|
actual GDELT API endpoint."""
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
url = "https://api.gdeltproject.org/api/v2/doc/doc?query=carrier"
|
||||||
|
assert _gcs_direct_gdelt_url(url) == url
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_touch_other_hosts():
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
for url in (
|
||||||
|
"https://en.wikipedia.org/wiki/Boeing_747",
|
||||||
|
"https://query.wikidata.org/sparql",
|
||||||
|
"https://storage.googleapis.com/already-correct/path",
|
||||||
|
"https://nominatim.openstreetmap.org/search",
|
||||||
|
):
|
||||||
|
assert _gcs_direct_gdelt_url(url) == url
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_partially_match_strings():
|
||||||
|
"""``data.gdeltproject.org`` is matched exactly; URLs that merely
|
||||||
|
contain that substring elsewhere (in a query parameter, for example)
|
||||||
|
are left alone. Otherwise we'd rewrite something like
|
||||||
|
``https://example.com/?ref=data.gdeltproject.org/x`` which is wrong."""
|
||||||
|
from services.geopolitics import _gcs_direct_gdelt_url
|
||||||
|
|
||||||
|
# The match requires ``://`` immediately before the host, so a host
|
||||||
|
# like ``example-data.gdeltproject.org`` would also be left alone
|
||||||
|
# (treated as a different host, which is correct).
|
||||||
|
url = "https://example-data.gdeltproject.org/path"
|
||||||
|
assert _gcs_direct_gdelt_url(url) == url
|
||||||
@@ -0,0 +1,333 @@
|
|||||||
|
"""GPS jamming detection — nac_p=0 counted, lowered thresholds.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Pre-fix, the detector had three stacked filters that together meant the
|
||||||
|
``gps_jamming`` layer almost never lit up:
|
||||||
|
|
||||||
|
1. ``nac_p == 0`` aircraft were dropped on the theory that "0 = old
|
||||||
|
transponder." But modern Mode-S Enhanced Surveillance transponders
|
||||||
|
also fall back to ``nac_p == 0`` when they lose GPS lock entirely —
|
||||||
|
which is *exactly* the jamming signature we want to catch.
|
||||||
|
2. ``GPS_JAMMING_MIN_AIRCRAFT = 5`` per 1°x1° cell.
|
||||||
|
3. ``GPS_JAMMING_MIN_RATIO = 0.30`` adjusted ratio.
|
||||||
|
|
||||||
|
Combined with the existing ``-1`` noise cushion (``adjusted = degraded - 1``)
|
||||||
|
the bar to clear required dense, busy airspace — but jamming hotspots
|
||||||
|
(eastern Med, eastern Ukraine, Iran/Iraq) tend to have sparser traffic
|
||||||
|
precisely because pilots avoid them.
|
||||||
|
|
||||||
|
These tests pin the new behavior:
|
||||||
|
|
||||||
|
* ``nac_p == 0`` is now counted as degraded.
|
||||||
|
* ``nac_p == None`` (no field — typical for OpenSky records) is still
|
||||||
|
skipped — absence isn't evidence.
|
||||||
|
* Thresholds lowered to 3 aircraft / 0.20 ratio.
|
||||||
|
* Public function signature accepts overrides so callers / future
|
||||||
|
operators can re-tune without code edits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# nac_p == 0 inclusion (the headline fix)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNacpZeroCounted:
|
||||||
|
def test_cell_dominated_by_nacp_zero_now_fires(self):
|
||||||
|
"""Three aircraft all reporting nac_p=0 in one cell, plus two
|
||||||
|
with valid GPS. Pre-fix the three nac_p=0 records were skipped
|
||||||
|
entirely (cell would have total=2, degraded=0, no zone). Post-fix
|
||||||
|
they count as degraded — this IS the jamming signature."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
# All in 1°x1° cell at int(lat)=40, int(lng)=-100
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 0},
|
||||||
|
{"hex": "a2", "lat": 40.5, "lng": -100.5, "nac_p": 0},
|
||||||
|
{"hex": "a3", "lat": 40.9, "lng": -100.9, "nac_p": 0},
|
||||||
|
{"hex": "b1", "lat": 40.2, "lng": -100.3, "nac_p": 9},
|
||||||
|
{"hex": "b2", "lat": 40.7, "lng": -100.7, "nac_p": 11},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# total=5, degraded=3, adjusted=2, ratio=0.40 > 0.20 → zone fires.
|
||||||
|
assert len(zones) == 1
|
||||||
|
assert zones[0]["degraded"] == 3
|
||||||
|
assert zones[0]["total"] == 5
|
||||||
|
assert zones[0]["ratio"] == 0.40
|
||||||
|
# Grid-cell center coords.
|
||||||
|
assert zones[0]["lat"] == 40.5
|
||||||
|
assert zones[0]["lng"] == -99.5
|
||||||
|
|
||||||
|
def test_nacp_zero_alone_clears_min_aircraft(self):
|
||||||
|
"""A cell with exactly 3 aircraft all reporting nac_p=0 must
|
||||||
|
fire under the new MIN_AIRCRAFT=3 + MIN_RATIO=0.20 regime."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 50.1, "lng": 30.1, "nac_p": 0},
|
||||||
|
{"hex": "a2", "lat": 50.5, "lng": 30.5, "nac_p": 0},
|
||||||
|
{"hex": "a3", "lat": 50.9, "lng": 30.9, "nac_p": 0},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# total=3, degraded=3, adjusted=2, ratio=0.667 > 0.20 → fires.
|
||||||
|
# severity is "medium" because 0.5 ≤ ratio < 0.75.
|
||||||
|
assert len(zones) == 1
|
||||||
|
assert zones[0]["severity"] == "medium"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# nac_p == None is still skipped (preserve OpenSky behavior)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNoneStillSkipped:
|
||||||
|
def test_none_records_dont_add_to_grid(self):
|
||||||
|
"""OpenSky's /states/all doesn't include nac_p, so its records
|
||||||
|
arrive with the field absent (``rf.get("nac_p") is None``). These
|
||||||
|
records must NOT count toward total — absence-of-data isn't
|
||||||
|
evidence of either jamming OR working GPS."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
# 3 jammed + 4 OpenSky-style (no nac_p). Pre-fix and post-fix
|
||||||
|
# behavior should be identical here: None always skipped.
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 0},
|
||||||
|
{"hex": "a2", "lat": 40.2, "lng": -100.2, "nac_p": 0},
|
||||||
|
{"hex": "a3", "lat": 40.3, "lng": -100.3, "nac_p": 0},
|
||||||
|
# OpenSky-style: no nac_p at all
|
||||||
|
{"hex": "o1", "lat": 40.4, "lng": -100.4},
|
||||||
|
{"hex": "o2", "lat": 40.5, "lng": -100.5},
|
||||||
|
{"hex": "o3", "lat": 40.6, "lng": -100.6},
|
||||||
|
{"hex": "o4", "lat": 40.7, "lng": -100.7},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# Only the 3 nac_p=0 records hit the grid. total=3, not 7.
|
||||||
|
assert len(zones) == 1
|
||||||
|
assert zones[0]["total"] == 3
|
||||||
|
assert zones[0]["degraded"] == 3
|
||||||
|
|
||||||
|
def test_explicit_none_skipped(self):
|
||||||
|
"""Same behavior when ``nac_p`` is present but set to None
|
||||||
|
(defensive — adsb.lol shouldn't do this, but downstream
|
||||||
|
normalizers might)."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 0.1, "lng": 0.1, "nac_p": None},
|
||||||
|
{"hex": "a2", "lat": 0.2, "lng": 0.2, "nac_p": None},
|
||||||
|
{"hex": "a3", "lat": 0.3, "lng": 0.3, "nac_p": None},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# No records counted → no zones.
|
||||||
|
assert zones == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Lowered MIN_AIRCRAFT (5 → 3)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMinAircraftLowered:
|
||||||
|
def test_three_aircraft_cell_now_qualifies(self):
|
||||||
|
"""Pre-fix MIN_AIRCRAFT=5 blocked sparse cells entirely. Post-fix
|
||||||
|
the bar is 3 aircraft per cell, which is realistic for the actual
|
||||||
|
jamming hotspots where traffic is thinner."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 33.1, "lng": 44.1, "nac_p": 3},
|
||||||
|
{"hex": "a2", "lat": 33.2, "lng": 44.2, "nac_p": 5},
|
||||||
|
{"hex": "a3", "lat": 33.3, "lng": 44.3, "nac_p": 7},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# total=3, degraded=3, adjusted=2, ratio=0.667 — fires under new
|
||||||
|
# rules, would have been blocked by MIN_AIRCRAFT=5 pre-fix.
|
||||||
|
assert len(zones) == 1
|
||||||
|
|
||||||
|
def test_two_aircraft_cell_still_blocked(self):
|
||||||
|
"""We didn't lower the bar to 2 — that would create too much
|
||||||
|
single-transponder noise. Two aircraft per cell still doesn't
|
||||||
|
qualify."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 33.1, "lng": 44.1, "nac_p": 3},
|
||||||
|
{"hex": "a2", "lat": 33.2, "lng": 44.2, "nac_p": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
assert zones == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Lowered MIN_RATIO (0.30 → 0.20)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMinRatioLowered:
|
||||||
|
def test_ratio_between_old_and_new_threshold_fires(self):
|
||||||
|
"""Construct a cell whose ratio sits in the (0.20, 0.30) window:
|
||||||
|
fires under the new bar, would have been blocked pre-fix."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
# 10 aircraft, 4 degraded → adjusted=3, ratio=3/10=0.30.
|
||||||
|
# Pre-fix threshold was > 0.30 strict — would NOT fire.
|
||||||
|
# Post-fix threshold is > 0.20 — fires.
|
||||||
|
feed = (
|
||||||
|
[{"hex": f"d{i}", "lat": 40.1, "lng": -100.1, "nac_p": 3} for i in range(4)]
|
||||||
|
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(6)]
|
||||||
|
)
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
assert len(zones) == 1
|
||||||
|
assert zones[0]["degraded"] == 4
|
||||||
|
assert zones[0]["total"] == 10
|
||||||
|
assert zones[0]["ratio"] == 0.30
|
||||||
|
|
||||||
|
def test_ratio_at_or_below_new_threshold_does_not_fire(self):
|
||||||
|
"""Ratio of exactly 0.20 must NOT fire (strict ``>`` comparison)."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
# 15 aircraft, 4 degraded → adjusted=3, ratio=3/15=0.20. Strictly
|
||||||
|
# not greater than 0.20, so doesn't qualify.
|
||||||
|
feed = (
|
||||||
|
[{"hex": f"d{i}", "lat": 40.1, "lng": -100.1, "nac_p": 3} for i in range(4)]
|
||||||
|
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(11)]
|
||||||
|
)
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
assert zones == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pre-existing noise cushion (-1) preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNoiseCushionPreserved:
|
||||||
|
def test_single_quirky_transponder_doesnt_fire(self):
|
||||||
|
"""One degraded aircraft in a healthy cell shouldn't fire even
|
||||||
|
under the relaxed thresholds. The ``-1`` adjustment in the
|
||||||
|
detector exists for this reason."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = (
|
||||||
|
[{"hex": "d1", "lat": 40.1, "lng": -100.1, "nac_p": 3}]
|
||||||
|
+ [{"hex": f"c{i}", "lat": 40.5, "lng": -100.5, "nac_p": 9} for i in range(10)]
|
||||||
|
)
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
# total=11, degraded=1, adjusted=0 → cell short-circuits.
|
||||||
|
assert zones == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants pinned (catches accidental rollback)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestConstantsPinned:
|
||||||
|
def test_min_aircraft_is_three(self):
|
||||||
|
from services.constants import GPS_JAMMING_MIN_AIRCRAFT
|
||||||
|
assert GPS_JAMMING_MIN_AIRCRAFT == 3, (
|
||||||
|
"MIN_AIRCRAFT must be 3; raising it back to 5 brings back the "
|
||||||
|
"'jamming never shows' bug."
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_min_ratio_is_0_20(self):
|
||||||
|
from services.constants import GPS_JAMMING_MIN_RATIO
|
||||||
|
assert GPS_JAMMING_MIN_RATIO == 0.20, (
|
||||||
|
"MIN_RATIO must be 0.20; raising it back to 0.30 brings back "
|
||||||
|
"the 'jamming never shows' bug."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Overrides honored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestOverridesHonored:
|
||||||
|
def test_overrides_supersede_constants(self):
|
||||||
|
"""The public signature accepts overrides so an operator can
|
||||||
|
re-tune at the call site (e.g. for a more aggressive setup in
|
||||||
|
an active conflict zone) without editing the module constants."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 40.1, "lng": -100.1, "nac_p": 3},
|
||||||
|
{"hex": "a2", "lat": 40.2, "lng": -100.2, "nac_p": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
# With defaults (min_aircraft=3) this is blocked. With override=2 it fires.
|
||||||
|
assert detect_gps_jamming_zones(feed) == []
|
||||||
|
zones = detect_gps_jamming_zones(feed, min_aircraft=2)
|
||||||
|
assert len(zones) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# lon vs lng compatibility
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestLonLngCompat:
|
||||||
|
def test_lon_key_accepted(self):
|
||||||
|
"""adsb.lol records arrive with ``lon`` (no g). The OpenSky merge
|
||||||
|
normalizes to ``lng`` but raw records flowing into the detector
|
||||||
|
may use either. Make sure both work."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
|
||||||
|
feed = [
|
||||||
|
{"hex": "a1", "lat": 40.1, "lon": -100.1, "nac_p": 0},
|
||||||
|
{"hex": "a2", "lat": 40.2, "lon": -100.2, "nac_p": 0},
|
||||||
|
{"hex": "a3", "lat": 40.3, "lon": -100.3, "nac_p": 0},
|
||||||
|
]
|
||||||
|
|
||||||
|
zones = detect_gps_jamming_zones(feed)
|
||||||
|
|
||||||
|
assert len(zones) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Empty / malformed inputs don't crash
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRobustness:
|
||||||
|
def test_empty_feed(self):
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
assert detect_gps_jamming_zones([]) == []
|
||||||
|
|
||||||
|
def test_none_feed(self):
|
||||||
|
"""The wrapper at the call site passes ``raw_flights_snapshot``
|
||||||
|
which could in principle be None on a startup race. Handle it."""
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
assert detect_gps_jamming_zones(None) == []
|
||||||
|
|
||||||
|
def test_records_missing_position_skipped(self):
|
||||||
|
from services.fetchers.flights import detect_gps_jamming_zones
|
||||||
|
feed = [
|
||||||
|
{"hex": "noloc", "nac_p": 0},
|
||||||
|
{"hex": "nolat", "lng": -100.0, "nac_p": 0},
|
||||||
|
{"hex": "nolng", "lat": 40.0, "nac_p": 0},
|
||||||
|
]
|
||||||
|
assert detect_gps_jamming_zones(feed) == []
|
||||||
@@ -0,0 +1,273 @@
|
|||||||
|
"""Tests for issue #288: viewport bbox filtering on /api/live-data/{fast,slow}.
|
||||||
|
|
||||||
|
Behaviour contract:
|
||||||
|
* Without s/w/n/e params, the response is byte-for-byte identical to the
|
||||||
|
pre-#288 implementation. (No filtering, no extra fields, no ETag change.)
|
||||||
|
* With s/w/n/e supplied, heavy/dense layers are filtered to that viewport
|
||||||
|
with a 20% padding box.
|
||||||
|
* Light reference layers (datacenters, military_bases, power_plants,
|
||||||
|
satellites, news, weather, …) are NEVER filtered, even when bounds are
|
||||||
|
supplied — panning must never reveal an "empty world" of infrastructure.
|
||||||
|
* World-scale bounds (lng_span >= 300 OR lat_span >= 120) short-circuit
|
||||||
|
filtering and share the global ETag.
|
||||||
|
* The ETag includes a 1°-quantized bbox so two viewports never poison each
|
||||||
|
other's 304 cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── /api/live-data/fast ─────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestFastBboxFiltering:
|
||||||
|
def _seed_fast(self, monkeypatch):
|
||||||
|
"""Plant deterministic heavy + light fixtures across the globe."""
|
||||||
|
from services.fetchers import _store
|
||||||
|
|
||||||
|
# Heavy collections: dense across the world.
|
||||||
|
commercial = [
|
||||||
|
{"lat": -60.0, "lng": -120.0, "id": "f-sw"}, # south Pacific
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "f-ne"}, # eastern US
|
||||||
|
{"lat": 35.0, "lng": 100.0, "id": "f-asia"}, # Asia
|
||||||
|
]
|
||||||
|
ships = [
|
||||||
|
{"lat": -60.0, "lng": -120.0, "id": "s-sw"},
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "s-ne"},
|
||||||
|
]
|
||||||
|
cctv = [{"lat": 35.0, "lng": -75.0, "id": "c-1"}]
|
||||||
|
|
||||||
|
# Sigint heavy collection.
|
||||||
|
sigint = [
|
||||||
|
{"source": "meshtastic", "lat": 35.0, "lng": -75.0, "id": "sig-east"},
|
||||||
|
{"source": "meshtastic", "lat": 35.0, "lng": 100.0, "id": "sig-asia"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Light/reference layer — must NEVER be filtered.
|
||||||
|
satellites = [
|
||||||
|
{"lat": -60.0, "lng": -120.0, "id": "sat-sw"},
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "sat-ne"},
|
||||||
|
{"lat": 35.0, "lng": 100.0, "id": "sat-asia"},
|
||||||
|
]
|
||||||
|
|
||||||
|
monkeypatch.setitem(_store.latest_data, "commercial_flights", commercial)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "ships", ships)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "cctv", cctv)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "sigint", sigint)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "satellites", satellites)
|
||||||
|
# Ensure all layers are on so the response includes them.
|
||||||
|
for layer in (
|
||||||
|
"flights", "ships_military", "ships_cargo", "ships_civilian",
|
||||||
|
"ships_passenger", "ships_tracked_yachts", "cctv",
|
||||||
|
"sigint_meshtastic", "sigint_aprs", "satellites",
|
||||||
|
):
|
||||||
|
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||||
|
|
||||||
|
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
r = client.get("/api/live-data/fast")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
# All heavy fixtures pass through unchanged.
|
||||||
|
assert len(data["commercial_flights"]) == 3
|
||||||
|
assert len(data["ships"]) == 2
|
||||||
|
assert len(data["sigint"]) == 2
|
||||||
|
# Light layer also full.
|
||||||
|
assert len(data["satellites"]) == 3
|
||||||
|
|
||||||
|
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
# Box tightly around the eastern-US fixture (lat 35, lng -75).
|
||||||
|
# ±5° → after 20% padding inside _bbox_filter, ~±6° window.
|
||||||
|
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
# Heavy layers: only the eastern-US fixture survives.
|
||||||
|
assert {f["id"] for f in data["commercial_flights"]} == {"f-ne"}
|
||||||
|
assert {s["id"] for s in data["ships"]} == {"s-ne"}
|
||||||
|
assert {c["id"] for c in data["cctv"]} == {"c-1"}
|
||||||
|
assert {s["id"] for s in data["sigint"]} == {"sig-east"}
|
||||||
|
|
||||||
|
def test_bbox_does_not_filter_light_layers(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
# Satellites are a reference layer — must NOT be bbox-filtered.
|
||||||
|
assert len(data["satellites"]) == 3
|
||||||
|
|
||||||
|
def test_world_scale_bbox_skips_filtering(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
# lng_span = 360 → treated as world-scale; same as no bbox.
|
||||||
|
r = client.get("/api/live-data/fast?s=-90&w=-180&n=90&e=180")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert len(data["commercial_flights"]) == 3
|
||||||
|
assert len(data["ships"]) == 2
|
||||||
|
|
||||||
|
def test_partial_bbox_is_treated_as_no_bbox(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
# Only three of four bounds → filtering must NOT engage.
|
||||||
|
r = client.get("/api/live-data/fast?s=30&w=-80&n=40")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert len(data["commercial_flights"]) == 3
|
||||||
|
|
||||||
|
def test_etag_changes_with_bbox(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
r_world = client.get("/api/live-data/fast")
|
||||||
|
r_local = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||||
|
assert r_world.status_code == 200
|
||||||
|
assert r_local.status_code == 200
|
||||||
|
etag_world = r_world.headers.get("etag")
|
||||||
|
etag_local = r_local.headers.get("etag")
|
||||||
|
assert etag_world and etag_local
|
||||||
|
assert etag_world != etag_local, (
|
||||||
|
"ETag must differ between world and regional bbox to prevent "
|
||||||
|
"304 cache poisoning across viewports"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_etag_stable_for_subdegree_pan(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
# Sub-degree pan should land in the same 1°-quantized bucket.
|
||||||
|
r_a = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||||
|
r_b = client.get("/api/live-data/fast?s=30.3&w=-79.8&n=39.7&e=-70.4")
|
||||||
|
assert r_a.headers.get("etag") == r_b.headers.get("etag")
|
||||||
|
|
||||||
|
def test_if_none_match_returns_304_for_same_bbox(self, client, monkeypatch):
|
||||||
|
self._seed_fast(monkeypatch)
|
||||||
|
r1 = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||||
|
etag = r1.headers.get("etag")
|
||||||
|
r2 = client.get(
|
||||||
|
"/api/live-data/fast?s=30&w=-80&n=40&e=-70",
|
||||||
|
headers={"If-None-Match": etag},
|
||||||
|
)
|
||||||
|
assert r2.status_code == 304
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── /api/live-data/slow ─────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestSlowBboxFiltering:
|
||||||
|
def _seed_slow(self, monkeypatch):
|
||||||
|
from services.fetchers import _store
|
||||||
|
|
||||||
|
# Heavy collections.
|
||||||
|
gdelt = [
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "g-east"},
|
||||||
|
{"lat": 35.0, "lng": 100.0, "id": "g-asia"},
|
||||||
|
]
|
||||||
|
firms_fires = [
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "fire-east"},
|
||||||
|
{"lat": -10.0, "lng": 120.0, "id": "fire-ido"},
|
||||||
|
]
|
||||||
|
# Light/reference layers — must always ship in full.
|
||||||
|
datacenters = [
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "dc-east"},
|
||||||
|
{"lat": 35.0, "lng": 100.0, "id": "dc-asia"},
|
||||||
|
{"lat": -10.0, "lng": 120.0, "id": "dc-ido"},
|
||||||
|
]
|
||||||
|
military_bases = [
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "mb-east"},
|
||||||
|
{"lat": -10.0, "lng": 120.0, "id": "mb-ido"},
|
||||||
|
]
|
||||||
|
power_plants = [
|
||||||
|
{"lat": 35.0, "lng": -75.0, "id": "pp-east"},
|
||||||
|
{"lat": 35.0, "lng": 100.0, "id": "pp-asia"},
|
||||||
|
]
|
||||||
|
|
||||||
|
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "firms_fires", firms_fires)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "datacenters", datacenters)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "military_bases", military_bases)
|
||||||
|
monkeypatch.setitem(_store.latest_data, "power_plants", power_plants)
|
||||||
|
for layer in (
|
||||||
|
"global_incidents", "firms", "datacenters", "military_bases", "power_plants",
|
||||||
|
):
|
||||||
|
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||||
|
|
||||||
|
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||||
|
self._seed_slow(monkeypatch)
|
||||||
|
r = client.get("/api/live-data/slow")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert len(data["gdelt"]) == 2
|
||||||
|
assert len(data["firms_fires"]) == 2
|
||||||
|
assert len(data["datacenters"]) == 3
|
||||||
|
|
||||||
|
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||||
|
self._seed_slow(monkeypatch)
|
||||||
|
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert {g["id"] for g in data["gdelt"]} == {"g-east"}
|
||||||
|
assert {f["id"] for f in data["firms_fires"]} == {"fire-east"}
|
||||||
|
|
||||||
|
def test_bbox_leaves_reference_layers_untouched(self, client, monkeypatch):
|
||||||
|
"""Datacenters, bases, and power plants are infrastructure overlays —
|
||||||
|
they must remain world-scale so panning never hides them."""
|
||||||
|
self._seed_slow(monkeypatch)
|
||||||
|
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert len(data["datacenters"]) == 3
|
||||||
|
assert len(data["military_bases"]) == 2
|
||||||
|
assert len(data["power_plants"]) == 2
|
||||||
|
|
||||||
|
def test_antimeridian_bbox(self, client, monkeypatch):
|
||||||
|
from services.fetchers import _store
|
||||||
|
# Box that straddles the antimeridian (Pacific): w=170, e=-170.
|
||||||
|
gdelt = [
|
||||||
|
{"lat": 0.0, "lng": 175.0, "id": "in-west"},
|
||||||
|
{"lat": 0.0, "lng": -175.0, "id": "in-east"},
|
||||||
|
{"lat": 0.0, "lng": 0.0, "id": "out-mid"},
|
||||||
|
]
|
||||||
|
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||||
|
monkeypatch.setitem(_store.active_layers, "global_incidents", True)
|
||||||
|
r = client.get("/api/live-data/slow?s=-10&w=170&n=10&e=-170")
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
ids = {g["id"] for g in data["gdelt"]}
|
||||||
|
assert "in-west" in ids
|
||||||
|
assert "in-east" in ids
|
||||||
|
assert "out-mid" not in ids
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────── Direct helper coverage (defensive) ─────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestHelpers:
|
||||||
|
def test_has_full_bbox(self):
|
||||||
|
from routers.data import _has_full_bbox
|
||||||
|
assert _has_full_bbox(1, 2, 3, 4)
|
||||||
|
assert not _has_full_bbox(None, 2, 3, 4)
|
||||||
|
assert not _has_full_bbox(1, None, 3, 4)
|
||||||
|
assert not _has_full_bbox(1, 2, None, 4)
|
||||||
|
assert not _has_full_bbox(1, 2, 3, None)
|
||||||
|
|
||||||
|
def test_bbox_etag_suffix_quantizes(self):
|
||||||
|
from routers.data import _bbox_etag_suffix
|
||||||
|
a = _bbox_etag_suffix(30.1, -79.6, 39.9, -70.1)
|
||||||
|
b = _bbox_etag_suffix(30.4, -79.2, 39.4, -70.8)
|
||||||
|
assert a == b, "Sub-degree pan must collapse to the same ETag suffix"
|
||||||
|
assert a.startswith("|bbox=")
|
||||||
|
|
||||||
|
def test_bbox_etag_suffix_world_collapses(self):
|
||||||
|
from routers.data import _bbox_etag_suffix
|
||||||
|
# World-scale → empty suffix (shares the global ETag).
|
||||||
|
assert _bbox_etag_suffix(-90, -180, 90, 180) == ""
|
||||||
|
|
||||||
|
def test_bbox_etag_suffix_partial_is_empty(self):
|
||||||
|
from routers.data import _bbox_etag_suffix
|
||||||
|
assert _bbox_etag_suffix(None, -180, 90, 180) == ""
|
||||||
|
|
||||||
|
def test_apply_bbox_preserves_non_list_values(self):
|
||||||
|
from routers.data import _apply_bbox_to_payload, _FAST_BBOX_HEAVY_KEYS
|
||||||
|
payload = {
|
||||||
|
"commercial_flights": [{"lat": 35, "lng": -75, "id": "x"}],
|
||||||
|
"satellite_source": "tle", # not a list, must pass through
|
||||||
|
"sigint_totals": {"total": 1}, # dict — must pass through
|
||||||
|
}
|
||||||
|
out = _apply_bbox_to_payload(dict(payload), _FAST_BBOX_HEAVY_KEYS, 30, -80, 40, -70)
|
||||||
|
assert out["satellite_source"] == "tle"
|
||||||
|
assert out["sigint_totals"] == {"total": 1}
|
||||||
@@ -0,0 +1,208 @@
|
|||||||
|
"""Issue #239 (tg12): backend registers duplicate API routes in both
|
||||||
|
``main.py`` and router modules, so request behavior depends on the
|
||||||
|
order ``FastAPI`` happened to register them.
|
||||||
|
|
||||||
|
This test is the **CI guard** that locks in the invariant going forward.
|
||||||
|
It does NOT delete any existing duplicates — those are tolerated via an
|
||||||
|
explicit baseline file. What it DOES block is *new* duplicates appearing
|
||||||
|
later, which is what the audit was actually asking for: a way to stop
|
||||||
|
the drift before it gets worse.
|
||||||
|
|
||||||
|
Findings (empirically verified, see PR #286 description):
|
||||||
|
|
||||||
|
- ``main.app`` calls ``include_router(...)`` for every router at module
|
||||||
|
import time around line 3316.
|
||||||
|
- Every ``@app.get/post/put/...`` decorator inside ``main.py`` runs
|
||||||
|
*after* those include_router calls, so the router handler is the one
|
||||||
|
that actually serves requests. The duplicates in ``main.py`` are
|
||||||
|
dead code at the route-resolution layer.
|
||||||
|
- Behavior today is deterministic (router wins), but if someone later
|
||||||
|
adds a NEW route only in ``main.py``, or edits one copy of an
|
||||||
|
existing pair without the other, drift starts.
|
||||||
|
|
||||||
|
How this test works:
|
||||||
|
|
||||||
|
- Walks ``main.app.routes`` and records every ``(method, path)`` that
|
||||||
|
appears more than once, along with which modules registered each
|
||||||
|
copy.
|
||||||
|
- Compares that set against the baseline in
|
||||||
|
``backend/tests/data/duplicate_routes_baseline.json``.
|
||||||
|
- **Fails** if any duplicate appears that is NOT in the baseline
|
||||||
|
(or if the registering modules for an existing duplicate change).
|
||||||
|
- **Stays green** when duplicates are *removed* by genuinely deduping
|
||||||
|
the code. (The baseline is a ceiling, not a floor.)
|
||||||
|
|
||||||
|
To extend in the future:
|
||||||
|
|
||||||
|
- If you actually dedupe a route, leave the baseline alone — the test
|
||||||
|
still passes. Subsequent regenerations of the baseline (``python -m
|
||||||
|
scripts.regen_duplicate_routes_baseline`` or the snippet in this
|
||||||
|
test's docstring) will shrink it.
|
||||||
|
- If you legitimately need a new duplicate (you probably do not), add
|
||||||
|
it to the baseline AND explain why in the PR description so reviewers
|
||||||
|
can push back.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
BASELINE_PATH = (
|
||||||
|
Path(__file__).parent / "data" / "duplicate_routes_baseline.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _current_duplicates() -> dict[str, list[str]]:
|
||||||
|
"""Walk ``main.app.routes`` and return ``{'METHOD /path': [module, ...]}``
|
||||||
|
for every (method, path) registered more than once."""
|
||||||
|
import main
|
||||||
|
|
||||||
|
by_key: dict[str, list[str]] = defaultdict(list)
|
||||||
|
for route in main.app.routes:
|
||||||
|
path = getattr(route, "path", None)
|
||||||
|
methods = getattr(route, "methods", None)
|
||||||
|
endpoint = getattr(route, "endpoint", None)
|
||||||
|
if not path or not methods or endpoint is None:
|
||||||
|
continue
|
||||||
|
for method in methods:
|
||||||
|
if method in ("HEAD", "OPTIONS"):
|
||||||
|
continue
|
||||||
|
by_key[f"{method} {path}"].append(endpoint.__module__)
|
||||||
|
|
||||||
|
return {
|
||||||
|
key: sorted(modules) for key, modules in by_key.items() if len(modules) > 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _load_baseline() -> dict[str, list[str]]:
|
||||||
|
if not BASELINE_PATH.exists():
|
||||||
|
return {}
|
||||||
|
raw = json.loads(BASELINE_PATH.read_text(encoding="utf-8"))
|
||||||
|
dups = raw.get("duplicates", {})
|
||||||
|
if not isinstance(dups, dict):
|
||||||
|
return {}
|
||||||
|
return {k: sorted(v) for k, v in dups.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_new_duplicate_route_registrations():
|
||||||
|
"""Block any (method, path) duplicate not already in the baseline.
|
||||||
|
|
||||||
|
This is the primary CI guard: PRs that add a NEW shadowed
|
||||||
|
``@app.get`` while a router module already serves the same route
|
||||||
|
fail here with an actionable message.
|
||||||
|
"""
|
||||||
|
current = _current_duplicates()
|
||||||
|
baseline = _load_baseline()
|
||||||
|
|
||||||
|
new_or_changed = []
|
||||||
|
for key, modules in sorted(current.items()):
|
||||||
|
if key not in baseline:
|
||||||
|
new_or_changed.append(
|
||||||
|
f" + {key} (NEW duplicate; registered in: {modules})"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if modules != baseline[key]:
|
||||||
|
new_or_changed.append(
|
||||||
|
f" ~ {key} "
|
||||||
|
f"(modules changed: was {baseline[key]}, now {modules})"
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_or_changed:
|
||||||
|
pytest.fail(
|
||||||
|
"Issue #239 CI guard: detected duplicate route registrations "
|
||||||
|
"that are NOT in the tolerated baseline.\n"
|
||||||
|
"\n"
|
||||||
|
"If you added a new @app.get/post/... in main.py for a path "
|
||||||
|
"that a router module already serves, please move the handler "
|
||||||
|
"into the router and delete the main.py copy — the router "
|
||||||
|
"version wins on request routing anyway, so the main.py copy "
|
||||||
|
"is dead code that just creates drift risk.\n"
|
||||||
|
"\n"
|
||||||
|
"Offending entries:\n"
|
||||||
|
+ "\n".join(new_or_changed)
|
||||||
|
+ "\n\n"
|
||||||
|
"Baseline lives at "
|
||||||
|
f"{BASELINE_PATH.relative_to(BASELINE_PATH.parent.parent.parent)}."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_baseline_only_lists_real_duplicates():
|
||||||
|
"""Catch baseline drift in the other direction: if an entry in the
|
||||||
|
baseline is no longer actually a duplicate (because someone deduped
|
||||||
|
it manually), the baseline is stale and should be shrunk so future
|
||||||
|
re-introductions of that duplicate get caught.
|
||||||
|
|
||||||
|
This test is informational — it does NOT fail the build today (the
|
||||||
|
audit's main concern is *new* duplicates, not stale baseline
|
||||||
|
entries). It prints a warning so the next baseline regeneration
|
||||||
|
can clean things up.
|
||||||
|
"""
|
||||||
|
current = _current_duplicates()
|
||||||
|
baseline = _load_baseline()
|
||||||
|
stale = sorted(k for k in baseline if k not in current)
|
||||||
|
if stale:
|
||||||
|
# Use warnings instead of fail so this is friendly housekeeping,
|
||||||
|
# not a CI blocker. The other test catches the actual safety
|
||||||
|
# concern.
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
f"duplicate_routes_baseline.json contains {len(stale)} entry/entries "
|
||||||
|
"no longer present in app.routes — consider regenerating the baseline. "
|
||||||
|
f"Stale: {stale[:5]}{'...' if len(stale) > 5 else ''}",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_handler_is_the_one_that_serves():
|
||||||
|
"""Pin the empirical claim from PR #286: for every duplicated
|
||||||
|
(method, path), the FIRST-registered handler is in a router
|
||||||
|
module, not in main.py. If this ever flips — e.g. someone moves
|
||||||
|
include_router calls to the bottom of main.py — duplicate routes
|
||||||
|
start silently changing which handler runs. This catches that
|
||||||
|
rearrangement immediately.
|
||||||
|
"""
|
||||||
|
import main
|
||||||
|
|
||||||
|
first_seen: dict[str, str] = {}
|
||||||
|
for route in main.app.routes:
|
||||||
|
path = getattr(route, "path", None)
|
||||||
|
methods = getattr(route, "methods", None)
|
||||||
|
endpoint = getattr(route, "endpoint", None)
|
||||||
|
if not path or not methods or endpoint is None:
|
||||||
|
continue
|
||||||
|
for method in methods:
|
||||||
|
if method in ("HEAD", "OPTIONS"):
|
||||||
|
continue
|
||||||
|
key = f"{method} {path}"
|
||||||
|
if key not in first_seen:
|
||||||
|
first_seen[key] = endpoint.__module__
|
||||||
|
|
||||||
|
main_winning = sorted(
|
||||||
|
k for k, mod in first_seen.items() if mod == "main"
|
||||||
|
)
|
||||||
|
# The duplicates we tolerate are router-first. If main is the first
|
||||||
|
# registered for any duplicated path, the router copy gets shadowed
|
||||||
|
# instead, which would invalidate every assumption made in audit
|
||||||
|
# rounds 5 and 6 about "the router version is canonical."
|
||||||
|
baseline = _load_baseline()
|
||||||
|
main_first_in_baseline = [k for k in main_winning if k in baseline]
|
||||||
|
if main_first_in_baseline:
|
||||||
|
pytest.fail(
|
||||||
|
"Issue #239 invariant broken: for at least one duplicated "
|
||||||
|
"(method, path), main.py is now registered FIRST and is "
|
||||||
|
"serving requests instead of the router copy. Audit rounds "
|
||||||
|
"5 and 6 assumed the router handler wins.\n"
|
||||||
|
"\n"
|
||||||
|
"Affected entries:\n"
|
||||||
|
+ "\n".join(f" {k}" for k in main_first_in_baseline)
|
||||||
|
+ "\n\n"
|
||||||
|
"Most likely cause: someone moved app.include_router(...) "
|
||||||
|
"calls in main.py to after the @app.get decorators. Move "
|
||||||
|
"them back to before the @app routes (currently around "
|
||||||
|
"line 3316)."
|
||||||
|
)
|
||||||
@@ -0,0 +1,334 @@
|
|||||||
|
"""Issue #302 (tg12): OpenClaw connect-info HMAC secret disclosure.
|
||||||
|
|
||||||
|
Before this change, ``GET /api/ai/connect-info?reveal=true`` returned the
|
||||||
|
full HMAC secret in the response body on every modal open AND the same
|
||||||
|
GET endpoint auto-bootstrapped (generated + persisted) the secret on a
|
||||||
|
mere read. Even gated to ``require_local_operator``, that put the full
|
||||||
|
secret into:
|
||||||
|
|
||||||
|
* browser visit history
|
||||||
|
* dev-tools network panel
|
||||||
|
* browser disk cache
|
||||||
|
* HAR exports
|
||||||
|
* screen captures / shoulder-surfing
|
||||||
|
|
||||||
|
Every single time the OpenClaw Connect modal opened.
|
||||||
|
|
||||||
|
After this change:
|
||||||
|
|
||||||
|
GET /api/ai/connect-info — always returns the MASKED
|
||||||
|
fingerprint. No ?reveal param.
|
||||||
|
No side effects (auto-bootstrap
|
||||||
|
gone).
|
||||||
|
POST /api/ai/connect-info/bootstrap — mints+persists the secret if
|
||||||
|
missing. Idempotent. Never
|
||||||
|
returns the full secret.
|
||||||
|
POST /api/ai/connect-info/reveal — returns the full secret with
|
||||||
|
strict Cache-Control: no-store
|
||||||
|
headers. POST so the body
|
||||||
|
doesn't land in URL history.
|
||||||
|
POST /api/ai/connect-info/regenerate — keeps the one-time-disclosure
|
||||||
|
for the new secret (regen IS a
|
||||||
|
deliberate destructive action).
|
||||||
|
Same no-store headers added.
|
||||||
|
|
||||||
|
These tests pin every property.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Loopback test client. ``require_local_operator`` resolves true for
|
||||||
|
# request.client.host == "127.0.0.1"; FastAPI's TestClient sets it to
|
||||||
|
# "testclient" which isn't on the allowlist. Use raw ASGITransport.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def loopback():
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
class _Client:
|
||||||
|
def __init__(self, peer_ip: str = "127.0.0.1"):
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
self._transport = ASGITransport(app=app, client=(peer_ip, 12345))
|
||||||
|
self._base = f"http://{peer_ip}:8000"
|
||||||
|
|
||||||
|
def _do(self, method: str, url: str, **kw):
|
||||||
|
async def go():
|
||||||
|
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||||
|
return await ac.request(method, url, **kw)
|
||||||
|
return self._loop.run_until_complete(go())
|
||||||
|
|
||||||
|
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||||
|
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||||
|
def close(self): self._loop.close()
|
||||||
|
|
||||||
|
c = _Client()
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def remote():
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
class _Client:
|
||||||
|
def __init__(self):
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
self._transport = ASGITransport(app=app, client=("1.2.3.4", 12345))
|
||||||
|
self._base = "http://1.2.3.4:8000"
|
||||||
|
|
||||||
|
def _do(self, method: str, url: str, **kw):
|
||||||
|
async def go():
|
||||||
|
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||||
|
return await ac.request(method, url, **kw)
|
||||||
|
return self._loop.run_until_complete(go())
|
||||||
|
|
||||||
|
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||||
|
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||||
|
def close(self): self._loop.close()
|
||||||
|
|
||||||
|
c = _Client()
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def stub_env(monkeypatch):
|
||||||
|
"""Isolate connect-info tests from the dev's real backend .env.
|
||||||
|
|
||||||
|
Pydantic ``Settings()`` reads from ``.env`` file directly on
|
||||||
|
instantiation, so monkey-patching ``os.environ`` isn't sufficient
|
||||||
|
— the real ``OPENCLAW_HMAC_SECRET`` would leak through. Instead we
|
||||||
|
override ``get_settings()`` in the route module to return a fresh
|
||||||
|
``Settings`` instance whose env values are driven entirely by an
|
||||||
|
in-test dict, AND we replace ``_write_env_value`` so writes update
|
||||||
|
that same dict instead of touching the developer's filesystem.
|
||||||
|
|
||||||
|
Yields the dict so individual tests can pre-seed values or assert
|
||||||
|
that writes happened.
|
||||||
|
"""
|
||||||
|
import routers.ai_intel as ai_intel
|
||||||
|
import services.config as config
|
||||||
|
|
||||||
|
state: dict[str, str] = {}
|
||||||
|
|
||||||
|
class _FakeSettings:
|
||||||
|
@property
|
||||||
|
def OPENCLAW_HMAC_SECRET(self) -> str:
|
||||||
|
return state.get("OPENCLAW_HMAC_SECRET", "")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def OPENCLAW_ACCESS_TIER(self) -> str:
|
||||||
|
return state.get("OPENCLAW_ACCESS_TIER", "restricted")
|
||||||
|
|
||||||
|
fake = _FakeSettings()
|
||||||
|
|
||||||
|
def _fake_get_settings():
|
||||||
|
return fake
|
||||||
|
|
||||||
|
# Route code calls ``get_settings.cache_clear()`` after writing the
|
||||||
|
# env. The production version is wrapped with ``@lru_cache``, so
|
||||||
|
# cache_clear exists. Attach a no-op shim here.
|
||||||
|
_fake_get_settings.cache_clear = lambda: None # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
monkeypatch.setattr(config, "get_settings", _fake_get_settings)
|
||||||
|
|
||||||
|
def _fake_write_env_value(key: str, value: str) -> None:
|
||||||
|
state[key] = value
|
||||||
|
|
||||||
|
monkeypatch.setattr(ai_intel, "_write_env_value", _fake_write_env_value)
|
||||||
|
|
||||||
|
yield state
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GET /api/ai/connect-info — always masked, no auto-bootstrap
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetConnectInfoMasking:
|
||||||
|
def test_returns_masked_when_secret_set(self, loopback, stub_env):
|
||||||
|
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||||
|
|
||||||
|
r = loopback.get("/api/ai/connect-info")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
# Body must NOT carry the full secret value anywhere.
|
||||||
|
assert secret not in r.text, (
|
||||||
|
"GET /api/ai/connect-info MUST NOT include the full HMAC "
|
||||||
|
"secret. Response body contained the secret value."
|
||||||
|
)
|
||||||
|
assert body["hmac_secret_set"] is True
|
||||||
|
assert body["masked_hmac_secret"].startswith("abcdef")
|
||||||
|
assert body["masked_hmac_secret"].endswith("wxyz")
|
||||||
|
assert "•" in body["masked_hmac_secret"]
|
||||||
|
# Pre-fix field is gone.
|
||||||
|
assert "hmac_secret" not in body
|
||||||
|
|
||||||
|
def test_no_auto_bootstrap_when_secret_missing(self, loopback, stub_env):
|
||||||
|
"""Side-effect-on-GET was the second half of issue #302. A GET
|
||||||
|
with no secret configured must NOT mint one — that should
|
||||||
|
require an explicit POST /bootstrap."""
|
||||||
|
r = loopback.get("/api/ai/connect-info")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["hmac_secret_set"] is False
|
||||||
|
assert body["masked_hmac_secret"] == ""
|
||||||
|
# The bootstrap_behavior block should advertise the new flow.
|
||||||
|
assert body["bootstrap_behavior"]["auto_generates_when_missing"] is False
|
||||||
|
# And no _write_env_value call happened.
|
||||||
|
assert "OPENCLAW_HMAC_SECRET" not in stub_env
|
||||||
|
|
||||||
|
def test_no_reveal_query_param(self, loopback, stub_env):
|
||||||
|
"""Pre-fix, ?reveal=true would return the full secret. Post-fix
|
||||||
|
the param is silently ignored — the response is the same as
|
||||||
|
without it (still masked, no leak)."""
|
||||||
|
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||||
|
|
||||||
|
r = loopback.get("/api/ai/connect-info?reveal=true")
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert secret not in r.text, (
|
||||||
|
"?reveal=true must be a no-op on GET — the full secret "
|
||||||
|
"MUST NOT come back in the response body."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# POST /api/ai/connect-info/bootstrap
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBootstrap:
|
||||||
|
def test_mints_when_missing(self, loopback, stub_env):
|
||||||
|
r = loopback.post("/api/ai/connect-info/bootstrap")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["ok"] is True
|
||||||
|
assert body["generated"] is True
|
||||||
|
assert body["hmac_secret_set"] is True
|
||||||
|
# Bootstrap must NOT return the full secret in-line.
|
||||||
|
assert "hmac_secret" not in body or not body.get("hmac_secret")
|
||||||
|
assert "•" in body["masked_hmac_secret"]
|
||||||
|
# _write_env_value was actually called.
|
||||||
|
assert stub_env.get("OPENCLAW_HMAC_SECRET")
|
||||||
|
# The full value isn't echoed back in the response text either.
|
||||||
|
assert stub_env["OPENCLAW_HMAC_SECRET"] not in r.text
|
||||||
|
|
||||||
|
def test_idempotent_when_already_set(self, loopback, stub_env):
|
||||||
|
existing = "abcdef" + "0" * 38 + "wxyz"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = existing
|
||||||
|
|
||||||
|
r = loopback.post("/api/ai/connect-info/bootstrap")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["ok"] is True
|
||||||
|
assert body["generated"] is False
|
||||||
|
assert body["hmac_secret_set"] is True
|
||||||
|
# Existing secret untouched — value is still the seeded one.
|
||||||
|
assert stub_env["OPENCLAW_HMAC_SECRET"] == existing
|
||||||
|
# No full secret in the response.
|
||||||
|
assert existing not in r.text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# POST /api/ai/connect-info/reveal
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestReveal:
|
||||||
|
def test_returns_full_secret_when_set(self, loopback, stub_env):
|
||||||
|
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||||
|
|
||||||
|
r = loopback.post("/api/ai/connect-info/reveal")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["ok"] is True
|
||||||
|
assert body["hmac_secret"] == secret
|
||||||
|
|
||||||
|
def test_strict_cache_control_headers(self, loopback, stub_env):
|
||||||
|
"""The whole point of POST /reveal vs GET ?reveal=true is that
|
||||||
|
the response carries headers that prevent every cache layer
|
||||||
|
from persisting the secret."""
|
||||||
|
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||||
|
|
||||||
|
r = loopback.post("/api/ai/connect-info/reveal")
|
||||||
|
cc = r.headers.get("cache-control", "")
|
||||||
|
assert "no-store" in cc, (
|
||||||
|
f"reveal MUST set Cache-Control: no-store — got {cc!r}"
|
||||||
|
)
|
||||||
|
assert "no-cache" in cc
|
||||||
|
# Pragma + Expires as well for HTTP/1.0 caches.
|
||||||
|
assert r.headers.get("pragma", "").lower() == "no-cache"
|
||||||
|
assert r.headers.get("expires") == "0"
|
||||||
|
|
||||||
|
def test_404_when_no_secret_configured(self, loopback, stub_env):
|
||||||
|
r = loopback.post("/api/ai/connect-info/reveal")
|
||||||
|
assert r.status_code == 404
|
||||||
|
# Hint should point at the bootstrap endpoint, not just say "404".
|
||||||
|
detail = r.json().get("detail", "")
|
||||||
|
assert "/bootstrap" in detail or "bootstrap" in detail.lower()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# POST /api/ai/connect-info/regenerate — still returns the new secret
|
||||||
|
# inline (deliberate destructive action), but with no-store headers.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRegenerate:
|
||||||
|
def test_returns_new_secret_with_no_store_headers(self, loopback, stub_env):
|
||||||
|
# Seed an existing secret so we can prove it changes.
|
||||||
|
old = "oldold" + "0" * 38 + "1234"
|
||||||
|
stub_env["OPENCLAW_HMAC_SECRET"] = old
|
||||||
|
|
||||||
|
r = loopback.post("/api/ai/connect-info/regenerate")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["ok"] is True
|
||||||
|
assert body["hmac_secret"]
|
||||||
|
assert body["hmac_secret"] != old
|
||||||
|
# no-store headers MUST be present so the new secret doesn't
|
||||||
|
# land in browser disk cache after the regenerate click.
|
||||||
|
cc = r.headers.get("cache-control", "")
|
||||||
|
assert "no-store" in cc and "no-cache" in cc
|
||||||
|
assert r.headers.get("pragma", "").lower() == "no-cache"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Auth-gate regression — every endpoint still rejects anonymous remote
|
||||||
|
# callers. This is the property we already enforce for the rest of the
|
||||||
|
# operator-only surface; adding the three new endpoints to the audit
|
||||||
|
# coverage prevents a future refactor from dropping the dependency.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnonymousRejection:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"method,path,body",
|
||||||
|
[
|
||||||
|
("get", "/api/ai/connect-info", None),
|
||||||
|
("post", "/api/ai/connect-info/bootstrap", None),
|
||||||
|
("post", "/api/ai/connect-info/reveal", None),
|
||||||
|
("post", "/api/ai/connect-info/regenerate", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_remote_rejected(self, remote, method, path, body):
|
||||||
|
fn = getattr(remote, method)
|
||||||
|
r = fn(path, json=body) if body is not None else fn(path)
|
||||||
|
assert r.status_code == 403, (
|
||||||
|
f"{method.upper()} {path} must reject anonymous remote callers; "
|
||||||
|
f"got {r.status_code}"
|
||||||
|
)
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""Issues #240 & #241 (tg12): oracle market/stake resolution endpoints
|
||||||
|
must require admin authentication.
|
||||||
|
|
||||||
|
Before the fix, ``POST /api/mesh/oracle/resolve`` and
|
||||||
|
``POST /api/mesh/oracle/resolve-stakes`` were decorated with
|
||||||
|
``@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)``. That decorator
|
||||||
|
only tags the route as not requiring a mesh signed-write envelope; it
|
||||||
|
does NOT enforce authorization. The rate limiter (5/minute) was the
|
||||||
|
only real gate, which is wrong for control-plane state mutations.
|
||||||
|
|
||||||
|
The fix adds ``dependencies=[Depends(require_admin)]`` to both routes.
|
||||||
|
These tests prove:
|
||||||
|
|
||||||
|
- Anonymous callers receive 403.
|
||||||
|
- A request bearing the configured admin key passes the auth gate.
|
||||||
|
- The underlying ledger mutator is not invoked on a 403.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
_ADMIN_KEY = "test-admin-key-for-oracle-resolve-fixture-32+"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
"""TestClient with the private-lane transport middleware short-circuited.
|
||||||
|
|
||||||
|
The ``enforce_high_privacy_mesh`` middleware in ``main.py`` returns
|
||||||
|
HTTP 202 ("preparing private lane") for ``/api/mesh/*`` requests
|
||||||
|
when the Wormhole supervisor is not yet at the required transport
|
||||||
|
tier. In tests that's always — Wormhole is not running. Patching
|
||||||
|
``_minimum_transport_tier`` to return None disables the tier check
|
||||||
|
for the duration of the test, letting the request reach the route
|
||||||
|
(and therefore reach the ``Depends(require_admin)`` we are testing).
|
||||||
|
"""
|
||||||
|
import main
|
||||||
|
with patch("main._minimum_transport_tier", return_value=None):
|
||||||
|
yield TestClient(main.app, raise_server_exceptions=False)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_ledger():
|
||||||
|
"""Replace oracle_ledger methods so tests don't mutate persistent state.
|
||||||
|
|
||||||
|
The handler does ``from services.mesh.mesh_oracle import oracle_ledger``
|
||||||
|
at call time, so we patch the module attribute.
|
||||||
|
"""
|
||||||
|
fake = MagicMock()
|
||||||
|
fake.resolve_market.return_value = (0, 0)
|
||||||
|
fake.resolve_market_stakes.return_value = {"winners": 0, "losers": 0}
|
||||||
|
fake.resolve_expired_stakes.return_value = []
|
||||||
|
with patch("services.mesh.mesh_oracle.oracle_ledger", fake):
|
||||||
|
yield fake
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /api/mesh/oracle/resolve — issue #240
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestOracleResolveAuthGate:
|
||||||
|
def test_anonymous_caller_is_rejected(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve",
|
||||||
|
json={"market_title": "test-market", "outcome": "Yes"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
# Critically: the ledger mutator must NOT have been called on a 403.
|
||||||
|
assert mock_ledger.resolve_market.call_count == 0
|
||||||
|
assert mock_ledger.resolve_market_stakes.call_count == 0
|
||||||
|
|
||||||
|
def test_wrong_admin_key_rejected(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve",
|
||||||
|
headers={"X-Admin-Key": "this-key-is-wrong"},
|
||||||
|
json={"market_title": "test-market", "outcome": "Yes"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert mock_ledger.resolve_market.call_count == 0
|
||||||
|
|
||||||
|
def test_valid_admin_key_passes_auth_gate(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
json={"market_title": "test-market", "outcome": "Yes"},
|
||||||
|
)
|
||||||
|
# The auth gate let us through. The handler ran and called the
|
||||||
|
# (mocked) ledger.
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert mock_ledger.resolve_market.call_count == 1
|
||||||
|
assert mock_ledger.resolve_market.call_args[0] == ("test-market", "Yes")
|
||||||
|
|
||||||
|
def test_admin_key_unset_blocks_in_production_posture(self, client, mock_ledger):
|
||||||
|
"""When ADMIN_KEY env is not configured at all and we're not in
|
||||||
|
debug, the endpoint must still refuse — never silently accept."""
|
||||||
|
with (
|
||||||
|
patch("auth._current_admin_key", return_value=""),
|
||||||
|
patch("auth._allow_insecure_admin", return_value=False),
|
||||||
|
patch("auth._debug_mode_enabled", return_value=False),
|
||||||
|
patch("auth._scoped_admin_tokens", return_value={}),
|
||||||
|
):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve",
|
||||||
|
json={"market_title": "test-market", "outcome": "Yes"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert mock_ledger.resolve_market.call_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /api/mesh/oracle/resolve-stakes — issue #241
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestOracleResolveStakesAuthGate:
|
||||||
|
def test_anonymous_caller_is_rejected(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post("/api/mesh/oracle/resolve-stakes")
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert mock_ledger.resolve_expired_stakes.call_count == 0
|
||||||
|
|
||||||
|
def test_wrong_admin_key_rejected(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve-stakes",
|
||||||
|
headers={"X-Admin-Key": "nope"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert mock_ledger.resolve_expired_stakes.call_count == 0
|
||||||
|
|
||||||
|
def test_valid_admin_key_passes_auth_gate(self, client, mock_ledger):
|
||||||
|
with patch("auth._current_admin_key", return_value=_ADMIN_KEY):
|
||||||
|
r = client.post(
|
||||||
|
"/api/mesh/oracle/resolve-stakes",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert mock_ledger.resolve_expired_stakes.call_count == 1
|
||||||
|
body = r.json()
|
||||||
|
assert body["ok"] is True
|
||||||
|
assert body["count"] == 0
|
||||||
|
|
||||||
|
def test_admin_key_unset_blocks_in_production_posture(self, client, mock_ledger):
|
||||||
|
with (
|
||||||
|
patch("auth._current_admin_key", return_value=""),
|
||||||
|
patch("auth._allow_insecure_admin", return_value=False),
|
||||||
|
patch("auth._debug_mode_enabled", return_value=False),
|
||||||
|
patch("auth._scoped_admin_tokens", return_value={}),
|
||||||
|
):
|
||||||
|
r = client.post("/api/mesh/oracle/resolve-stakes")
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert mock_ledger.resolve_expired_stakes.call_count == 0
|
||||||
@@ -87,16 +87,32 @@ class TestRequireLocalOperator:
|
|||||||
assert self._call_with_host("172.16.0.5") == 403
|
assert self._call_with_host("172.16.0.5") == 403
|
||||||
|
|
||||||
def test_docker_bridge_blocked_without_compose_opt_in(self):
|
def test_docker_bridge_blocked_without_compose_opt_in(self):
|
||||||
|
# Even if DNS would resolve the frontend hostname to this IP,
|
||||||
|
# the env opt-in is required.
|
||||||
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": ""}):
|
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": ""}):
|
||||||
assert self._call_with_host("172.18.0.3") == 403
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert self._call_with_host("172.18.0.3") == 403
|
||||||
|
|
||||||
def test_docker_bridge_passes_with_compose_opt_in(self):
|
def test_docker_bridge_passes_with_compose_opt_in(self):
|
||||||
|
# Issue #250: opt-in alone is no longer sufficient — the source IP
|
||||||
|
# must also reverse-match a trusted frontend container hostname.
|
||||||
|
# Here we simulate Docker DNS resolving "frontend" to 172.18.0.3.
|
||||||
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": "1"}):
|
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": "1"}):
|
||||||
assert self._call_with_host("172.18.0.3") == 200
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert self._call_with_host("172.18.0.3") == 200
|
||||||
|
|
||||||
|
def test_unknown_bridge_ip_blocked_even_with_compose_opt_in(self):
|
||||||
|
# Issue #250 core regression: a rogue container on the same bridge
|
||||||
|
# whose IP is NOT in the resolved frontend hostname set must NOT
|
||||||
|
# be trusted, even when the bridge opt-in flag is on.
|
||||||
|
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": "1"}):
|
||||||
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert self._call_with_host("172.18.0.99") == 403
|
||||||
|
|
||||||
def test_lan_ip_still_blocked_with_compose_opt_in(self):
|
def test_lan_ip_still_blocked_with_compose_opt_in(self):
|
||||||
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": "1"}):
|
with patch.dict("os.environ", {"SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR": "1"}):
|
||||||
assert self._call_with_host("192.168.1.100") == 403
|
with patch("auth._resolve_trusted_bridge_ips", return_value=frozenset({"172.18.0.3"})):
|
||||||
|
assert self._call_with_host("192.168.1.100") == 403
|
||||||
|
|
||||||
def test_rfc1918_192168_blocked_without_key(self):
|
def test_rfc1918_192168_blocked_without_key(self):
|
||||||
assert self._call_with_host("192.168.1.100") == 403
|
assert self._call_with_host("192.168.1.100") == 403
|
||||||
|
|||||||
@@ -0,0 +1,281 @@
|
|||||||
|
"""Round 7a: per-install operator handle threads through every outbound
|
||||||
|
third-party API call.
|
||||||
|
|
||||||
|
Background: before this change every Shadowbroker install identified
|
||||||
|
itself to Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||||
|
weather.gov, NUFORC, etc. with a single project-wide ``Shadowbroker``
|
||||||
|
User-Agent. From the upstream's perspective, every install in the world
|
||||||
|
looked like one giant scraper. If one install misbehaved, the upstream's
|
||||||
|
only recourse was to block ``Shadowbroker`` as a whole, taking out every
|
||||||
|
other install.
|
||||||
|
|
||||||
|
Fix: each install gets a stable pseudonymous handle (auto-generated like
|
||||||
|
``shadow-7f3a92`` or operator-overridden via ``OPERATOR_HANDLE``) that
|
||||||
|
gets embedded in the User-Agent for every outbound call. Upstreams can
|
||||||
|
now rate-limit / contact the specific operator instead of the project.
|
||||||
|
|
||||||
|
These tests pin:
|
||||||
|
|
||||||
|
1. The handle is auto-generated on first call if no override exists.
|
||||||
|
2. The handle survives process restart (persisted to disk).
|
||||||
|
3. ``OPERATOR_HANDLE`` env var override wins over the auto-gen handle.
|
||||||
|
4. The handle is sanitized (whitespace, special chars, length).
|
||||||
|
5. Every previously-MONSTER-UA call site now sends the per-operator UA.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def isolated_handle(tmp_path, monkeypatch):
|
||||||
|
"""Redirect the persistence path to tmp and reset caches between tests."""
|
||||||
|
from services import network_utils
|
||||||
|
|
||||||
|
handle_file = tmp_path / "operator_handle.json"
|
||||||
|
monkeypatch.setattr(network_utils, "_OPERATOR_HANDLE_FILE", handle_file)
|
||||||
|
network_utils._reset_operator_handle_cache_for_tests()
|
||||||
|
monkeypatch.delenv("OPERATOR_HANDLE", raising=False)
|
||||||
|
|
||||||
|
# Reset Settings cache so OPERATOR_HANDLE env changes are picked up.
|
||||||
|
from services.config import get_settings
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
yield network_utils
|
||||||
|
|
||||||
|
network_utils._reset_operator_handle_cache_for_tests()
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core handle generation / persistence / override
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestOperatorHandleGeneration:
|
||||||
|
def test_auto_generates_on_first_call(self, isolated_handle):
|
||||||
|
h = isolated_handle.get_operator_handle()
|
||||||
|
# Prefix is "operator-" (deliberately neutral; "shadow-" looked
|
||||||
|
# exactly like a pattern abuse-detection systems would auto-block).
|
||||||
|
assert h.startswith("operator-")
|
||||||
|
assert len(h) == len("operator-") + 6
|
||||||
|
# Hex suffix.
|
||||||
|
suffix = h.split("-", 1)[1]
|
||||||
|
int(suffix, 16) # raises if not hex
|
||||||
|
|
||||||
|
def test_persists_to_disk_so_handle_survives_restart(self, isolated_handle):
|
||||||
|
first = isolated_handle.get_operator_handle()
|
||||||
|
# Simulate process restart: clear in-memory cache, then ask again.
|
||||||
|
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||||
|
second = isolated_handle.get_operator_handle()
|
||||||
|
assert second == first
|
||||||
|
# The file actually exists.
|
||||||
|
assert isolated_handle._OPERATOR_HANDLE_FILE.exists()
|
||||||
|
body = json.loads(isolated_handle._OPERATOR_HANDLE_FILE.read_text())
|
||||||
|
assert body["handle"] == first
|
||||||
|
|
||||||
|
def test_env_override_wins_over_auto_generated(self, isolated_handle, monkeypatch):
|
||||||
|
# First call without env var auto-generates.
|
||||||
|
auto = isolated_handle.get_operator_handle()
|
||||||
|
assert auto.startswith("operator-")
|
||||||
|
# Setting env var changes the resolved handle without touching the disk file.
|
||||||
|
monkeypatch.setenv("OPERATOR_HANDLE", "alice")
|
||||||
|
from services.config import get_settings
|
||||||
|
get_settings.cache_clear()
|
||||||
|
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||||
|
assert isolated_handle.get_operator_handle() == "alice"
|
||||||
|
|
||||||
|
def test_handle_is_sanitized(self, isolated_handle, monkeypatch):
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
# Sanitization tests run against the normalizer directly so the
|
||||||
|
# empty-string case can be asserted independently of the env-var
|
||||||
|
# resolution path (where empty means "use auto-gen", not "use
|
||||||
|
# 'anonymous'").
|
||||||
|
from services.network_utils import _normalize_handle
|
||||||
|
|
||||||
|
cases = [
|
||||||
|
("Alice Smith", "alice-smith"),
|
||||||
|
("user@example.com", "user-example-com"),
|
||||||
|
(" whitespace ", "whitespace"),
|
||||||
|
("UPPER-CASE", "upper-case"),
|
||||||
|
("multiple---dashes", "multiple-dashes"),
|
||||||
|
("/leading/slash", "leading-slash"),
|
||||||
|
("trailing-", "trailing"),
|
||||||
|
("", "anonymous"),
|
||||||
|
]
|
||||||
|
for raw, expected in cases:
|
||||||
|
got = _normalize_handle(raw)
|
||||||
|
assert got == expected, f"{raw!r} -> {got!r}, expected {expected!r}"
|
||||||
|
assert got == got.lower()
|
||||||
|
for ch in got:
|
||||||
|
assert ch.isalnum() or ch in "-_", f"unsafe char {ch!r} in {got!r}"
|
||||||
|
assert "--" not in got
|
||||||
|
|
||||||
|
def test_handle_is_length_capped(self, isolated_handle, monkeypatch):
|
||||||
|
from services.config import get_settings
|
||||||
|
|
||||||
|
monkeypatch.setenv("OPERATOR_HANDLE", "x" * 1000)
|
||||||
|
get_settings.cache_clear()
|
||||||
|
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||||
|
got = isolated_handle.get_operator_handle()
|
||||||
|
assert len(got) <= 48
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# outbound_user_agent() builds the right header
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutboundUserAgentString:
|
||||||
|
def test_includes_operator_handle(self, isolated_handle):
|
||||||
|
ua = isolated_handle.outbound_user_agent()
|
||||||
|
handle = isolated_handle.get_operator_handle()
|
||||||
|
assert f"operator: {handle}" in ua
|
||||||
|
|
||||||
|
def test_includes_purpose_when_provided(self, isolated_handle):
|
||||||
|
ua = isolated_handle.outbound_user_agent("wikipedia")
|
||||||
|
assert "purpose: wikipedia" in ua
|
||||||
|
|
||||||
|
def test_includes_contact_path(self, isolated_handle):
|
||||||
|
ua = isolated_handle.outbound_user_agent()
|
||||||
|
assert "github.com" in ua.lower()
|
||||||
|
assert "shadowbroker" in ua.lower()
|
||||||
|
|
||||||
|
def test_version_prefix(self, isolated_handle):
|
||||||
|
ua = isolated_handle.outbound_user_agent()
|
||||||
|
assert ua.startswith("Shadowbroker/")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Wikipedia / Wikidata — retroactive fix for PR #284's MONSTER pattern
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWikimediaCallsAreNowPerOperator:
|
||||||
|
def test_wikidata_call_uses_per_operator_ua(self, isolated_handle, monkeypatch):
|
||||||
|
from services import region_dossier
|
||||||
|
|
||||||
|
captured = []
|
||||||
|
|
||||||
|
class _FakeResp:
|
||||||
|
status_code = 200
|
||||||
|
def json(self):
|
||||||
|
return {"results": {"bindings": []}}
|
||||||
|
|
||||||
|
def fake_fetch(url, **kwargs):
|
||||||
|
captured.append(kwargs.get("headers") or {})
|
||||||
|
return _FakeResp()
|
||||||
|
|
||||||
|
monkeypatch.setattr(region_dossier, "fetch_with_curl", fake_fetch)
|
||||||
|
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||||
|
|
||||||
|
assert captured, "Wikidata fetcher was not called"
|
||||||
|
headers = captured[0]
|
||||||
|
assert "User-Agent" in headers
|
||||||
|
assert "Api-User-Agent" in headers
|
||||||
|
handle = isolated_handle.get_operator_handle()
|
||||||
|
for header_value in (headers["User-Agent"], headers["Api-User-Agent"]):
|
||||||
|
assert f"operator: {handle}" in header_value, (
|
||||||
|
f"Wikimedia UA must include the per-operator handle; got {header_value!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_wikipedia_summary_uses_per_operator_ua(self, isolated_handle, monkeypatch):
|
||||||
|
from services import region_dossier
|
||||||
|
|
||||||
|
captured = []
|
||||||
|
|
||||||
|
class _FakeResp:
|
||||||
|
status_code = 200
|
||||||
|
def json(self):
|
||||||
|
return {
|
||||||
|
"type": "standard",
|
||||||
|
"description": "x",
|
||||||
|
"extract": "y",
|
||||||
|
"thumbnail": {"source": ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
def fake_fetch(url, **kwargs):
|
||||||
|
captured.append((url, kwargs.get("headers") or {}))
|
||||||
|
return _FakeResp()
|
||||||
|
|
||||||
|
monkeypatch.setattr(region_dossier, "fetch_with_curl", fake_fetch)
|
||||||
|
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||||
|
|
||||||
|
wikipedia_hits = [c for c in captured if "wikipedia.org" in c[0]]
|
||||||
|
assert wikipedia_hits, "Wikipedia summary fetch was not called"
|
||||||
|
for _url, headers in wikipedia_hits:
|
||||||
|
handle = isolated_handle.get_operator_handle()
|
||||||
|
assert f"operator: {handle}" in headers.get("User-Agent", "")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Generic round-7a regression guard
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNoMonsterUserAgentRemains:
|
||||||
|
"""The audit's underlying concern was that every Shadowbroker install
|
||||||
|
looked like one entity. This test scans the codebase for the OLD
|
||||||
|
aggregate identifier patterns and fails if a new one sneaks back in.
|
||||||
|
|
||||||
|
We allow the strings to appear in:
|
||||||
|
- comments (audit prose, change-log notes)
|
||||||
|
- tests
|
||||||
|
- .env.example (documentation)
|
||||||
|
The test only fails if the string lives in actual outbound-request
|
||||||
|
HEADER values without going through the per-operator helper.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BANNED_LITERALS = (
|
||||||
|
"ShadowBroker-OSINT/1.0",
|
||||||
|
"ShadowBroker-OSINT/0.9",
|
||||||
|
"ShadowBroker-FeedIngester/1.0",
|
||||||
|
"ShadowBroker/0.9.79 local Shodan connector",
|
||||||
|
"ShadowBroker/0.9.79 Finnhub connector",
|
||||||
|
"ShadowBroker/0.9.8 local Shodan connector",
|
||||||
|
"ShadowBroker/0.9.8 Finnhub connector",
|
||||||
|
"ShadowBroker/0.9.81 local Shodan connector",
|
||||||
|
"ShadowBroker/0.9.81 Finnhub connector",
|
||||||
|
"Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_banned_aggregate_user_agent_strings(self):
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
backend_root = Path(__file__).parent.parent
|
||||||
|
offenders = []
|
||||||
|
for py in backend_root.rglob("*.py"):
|
||||||
|
# Skip test files and any audit-context comments.
|
||||||
|
rel = py.relative_to(backend_root).as_posix()
|
||||||
|
if rel.startswith("tests/"):
|
||||||
|
continue
|
||||||
|
text = py.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
# Look only for the literal as part of a string in a User-Agent
|
||||||
|
# context: cheap heuristic via "User-Agent" + literal coexisting
|
||||||
|
# in the same file. A literal in a comment block won't trigger
|
||||||
|
# because the same line won't have User-Agent surrounding it.
|
||||||
|
for banned in self.BANNED_LITERALS:
|
||||||
|
if banned in text:
|
||||||
|
# Walk lines to ensure it's a real header value.
|
||||||
|
for i, line in enumerate(text.splitlines(), 1):
|
||||||
|
if banned in line:
|
||||||
|
# Comments / docstrings are allowed — only fail
|
||||||
|
# if the line looks like a header assignment.
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped.startswith("#"):
|
||||||
|
continue
|
||||||
|
if '"User-Agent"' in line or "'User-Agent'" in line:
|
||||||
|
offenders.append(f"{rel}:{i}: {stripped[:120]}")
|
||||||
|
assert not offenders, (
|
||||||
|
"Round 7a regression: the following lines reintroduced an "
|
||||||
|
"aggregate Shadowbroker User-Agent. Use "
|
||||||
|
"outbound_user_agent('purpose') instead so the per-install "
|
||||||
|
"operator handle is embedded.\n"
|
||||||
|
+ "\n".join(offenders)
|
||||||
|
)
|
||||||
@@ -0,0 +1,366 @@
|
|||||||
|
"""Issue #256 (tg12): per-peer HMAC secrets must defeat cross-peer
|
||||||
|
impersonation.
|
||||||
|
|
||||||
|
Before the fix, ALL peer-push HMACs were derived from the single
|
||||||
|
fleet-shared ``MESH_PEER_PUSH_SECRET``. The receiver could only prove
|
||||||
|
"this request was signed by someone who knows the fleet secret" — not
|
||||||
|
which peer signed it. Any peer that knew the secret could compute the
|
||||||
|
expected HMAC for any other peer's URL and impersonate that peer.
|
||||||
|
|
||||||
|
The fix introduces ``MESH_PEER_SECRETS``, a per-peer URL-to-secret map.
|
||||||
|
When a peer URL appears there:
|
||||||
|
|
||||||
|
- Only the listed per-peer secret is accepted for that URL.
|
||||||
|
- The global ``MESH_PEER_PUSH_SECRET`` is ignored for that specific URL.
|
||||||
|
- A peer that knows only the global secret (or a different peer's
|
||||||
|
per-peer secret) cannot forge a request claiming to be that peer.
|
||||||
|
|
||||||
|
When a peer URL is NOT listed (the common case for single-peer installs
|
||||||
|
and for migration windows), the resolver falls back to the global
|
||||||
|
secret — preserving existing behavior with zero operator action.
|
||||||
|
|
||||||
|
These tests exercise ``resolve_peer_key_for_url`` directly so we cover
|
||||||
|
the security contract without spinning up a full mesh node.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _lookup_per_peer_secret — env parsing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestLookupPerPeerSecret:
|
||||||
|
def setup_method(self):
|
||||||
|
# Invalidate the parser cache so each test sees its own env state.
|
||||||
|
from services.mesh import mesh_crypto
|
||||||
|
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE = {}
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE_RAW = ""
|
||||||
|
|
||||||
|
def test_returns_empty_when_env_unset(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_PEER_SECRETS", raising=False)
|
||||||
|
assert _lookup_per_peer_secret("https://peer.example") == ""
|
||||||
|
|
||||||
|
def test_returns_empty_when_env_blank(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv("MESH_PEER_SECRETS", "")
|
||||||
|
assert _lookup_per_peer_secret("https://peer.example") == ""
|
||||||
|
|
||||||
|
def test_returns_per_peer_secret_for_listed_url(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-a.example=secretA,https://peer-b.example=secretB",
|
||||||
|
)
|
||||||
|
assert _lookup_per_peer_secret("https://peer-a.example") == "secretA"
|
||||||
|
assert _lookup_per_peer_secret("https://peer-b.example") == "secretB"
|
||||||
|
|
||||||
|
def test_returns_empty_for_url_not_listed(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-a.example=secretA",
|
||||||
|
)
|
||||||
|
assert _lookup_per_peer_secret("https://other.example") == ""
|
||||||
|
|
||||||
|
def test_url_is_normalized_before_lookup(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
# Configure with a trailing slash + uppercase host. Lookup with
|
||||||
|
# plain lowercase host. Both should normalize to the same key.
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://Peer-A.Example/=secretA",
|
||||||
|
)
|
||||||
|
assert _lookup_per_peer_secret("https://peer-a.example") == "secretA"
|
||||||
|
|
||||||
|
def test_whitespace_around_entries_is_stripped(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
" https://peer-a.example = secretA , https://peer-b.example=secretB ",
|
||||||
|
)
|
||||||
|
assert _lookup_per_peer_secret("https://peer-a.example") == "secretA"
|
||||||
|
assert _lookup_per_peer_secret("https://peer-b.example") == "secretB"
|
||||||
|
|
||||||
|
def test_malformed_entries_are_skipped_not_raised(self, monkeypatch):
|
||||||
|
"""A garbled MESH_PEER_SECRETS value must NOT crash the resolver.
|
||||||
|
Bad entries are silently dropped; well-formed entries still work.
|
||||||
|
This is the "fail-forward, not loud" rule — a typo in operator
|
||||||
|
config should not take the whole backend down."""
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"no_equals_sign,=missing_url,https://no.secret=,https://good.example=secretGood",
|
||||||
|
)
|
||||||
|
assert _lookup_per_peer_secret("https://good.example") == "secretGood"
|
||||||
|
# The malformed ones produce no entry (and don't poison the cache).
|
||||||
|
assert _lookup_per_peer_secret("https://no.secret") == ""
|
||||||
|
|
||||||
|
def test_cache_invalidates_on_env_change(self, monkeypatch):
|
||||||
|
"""A test (or operator) updating MESH_PEER_SECRETS must see the
|
||||||
|
new value immediately — no process restart required."""
|
||||||
|
from services.mesh.mesh_crypto import _lookup_per_peer_secret
|
||||||
|
|
||||||
|
monkeypatch.setenv("MESH_PEER_SECRETS", "https://a.example=first")
|
||||||
|
assert _lookup_per_peer_secret("https://a.example") == "first"
|
||||||
|
monkeypatch.setenv("MESH_PEER_SECRETS", "https://a.example=second")
|
||||||
|
assert _lookup_per_peer_secret("https://a.example") == "second"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# resolve_peer_key_for_url — precedence + fallback
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolvePeerKeyForUrl:
|
||||||
|
def setup_method(self):
|
||||||
|
from services.mesh import mesh_crypto
|
||||||
|
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE = {}
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE_RAW = ""
|
||||||
|
|
||||||
|
def _fake_settings(self, global_secret: str):
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
s = MagicMock()
|
||||||
|
s.MESH_PEER_PUSH_SECRET = global_secret
|
||||||
|
return s
|
||||||
|
|
||||||
|
def test_falls_back_to_global_when_no_per_peer_entry(self, monkeypatch):
|
||||||
|
"""Single-peer installs: MESH_PEER_SECRETS empty, MESH_PEER_PUSH_SECRET
|
||||||
|
set — must keep working as before."""
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_PEER_SECRETS", raising=False)
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setattr(
|
||||||
|
"services.config.get_settings",
|
||||||
|
lambda: self._fake_settings("global-secret"),
|
||||||
|
)
|
||||||
|
key = resolve_peer_key_for_url("https://peer.example")
|
||||||
|
expected = _derive_peer_key("global-secret", "https://peer.example")
|
||||||
|
assert key == expected
|
||||||
|
assert len(key) == 32 # SHA-256 output
|
||||||
|
|
||||||
|
def test_per_peer_secret_takes_precedence_over_global(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-a.example=per-peer-a-secret",
|
||||||
|
)
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setattr(
|
||||||
|
"services.config.get_settings",
|
||||||
|
lambda: self._fake_settings("global-secret"),
|
||||||
|
)
|
||||||
|
key = resolve_peer_key_for_url("https://peer-a.example")
|
||||||
|
expected_per_peer = _derive_peer_key(
|
||||||
|
"per-peer-a-secret", "https://peer-a.example"
|
||||||
|
)
|
||||||
|
expected_global = _derive_peer_key("global-secret", "https://peer-a.example")
|
||||||
|
assert key == expected_per_peer
|
||||||
|
assert key != expected_global
|
||||||
|
|
||||||
|
def test_unlisted_peer_uses_global_during_migration(self, monkeypatch):
|
||||||
|
"""Partial migration: peer A is in MESH_PEER_SECRETS, peer B is
|
||||||
|
not yet. Peer B must keep working under the global secret."""
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-a.example=per-peer-a-secret",
|
||||||
|
)
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setattr(
|
||||||
|
"services.config.get_settings",
|
||||||
|
lambda: self._fake_settings("global-secret"),
|
||||||
|
)
|
||||||
|
key_a = resolve_peer_key_for_url("https://peer-a.example")
|
||||||
|
key_b = resolve_peer_key_for_url("https://peer-b.example")
|
||||||
|
expected_b = _derive_peer_key("global-secret", "https://peer-b.example")
|
||||||
|
assert key_b == expected_b
|
||||||
|
# Peer A's per-peer key must differ from peer B's global key
|
||||||
|
# (they're keyed by different secrets and different URLs).
|
||||||
|
assert key_a != key_b
|
||||||
|
|
||||||
|
def test_returns_empty_when_no_secret_available(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import resolve_peer_key_for_url
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_PEER_SECRETS", raising=False)
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setattr(
|
||||||
|
"services.config.get_settings",
|
||||||
|
lambda: self._fake_settings(""),
|
||||||
|
)
|
||||||
|
key = resolve_peer_key_for_url("https://peer.example")
|
||||||
|
assert key == b""
|
||||||
|
|
||||||
|
def test_returns_empty_when_url_is_unparseable(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import resolve_peer_key_for_url
|
||||||
|
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setattr(
|
||||||
|
"services.config.get_settings",
|
||||||
|
lambda: self._fake_settings("global-secret"),
|
||||||
|
)
|
||||||
|
assert resolve_peer_key_for_url("") == b""
|
||||||
|
assert resolve_peer_key_for_url("not-a-url") == b""
|
||||||
|
assert resolve_peer_key_for_url(None) == b""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# The actual #256 attack: peer A cannot impersonate peer B
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCrossPeerImpersonationRefused:
|
||||||
|
"""The core regression: when MESH_PEER_SECRETS is configured, a peer
|
||||||
|
that knows ONLY the global secret (or a different peer's per-peer
|
||||||
|
secret) cannot produce a valid HMAC for another peer's URL."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
from services.mesh import mesh_crypto
|
||||||
|
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE = {}
|
||||||
|
mesh_crypto._PEER_SECRETS_CACHE_RAW = ""
|
||||||
|
|
||||||
|
def _hmac(self, key: bytes, body: bytes) -> str:
|
||||||
|
return hmac.new(key, body, hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
def test_peer_a_global_secret_cannot_forge_peer_b_hmac(self, monkeypatch):
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
# Receiver has BOTH the global secret AND a per-peer secret for B.
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-b.example=per-peer-b-secret",
|
||||||
|
)
|
||||||
|
settings = MagicMock()
|
||||||
|
settings.MESH_PEER_PUSH_SECRET = "global-secret"
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"services.config.get_settings", lambda: settings
|
||||||
|
)
|
||||||
|
|
||||||
|
body = b'{"events": [{"id": 1}]}'
|
||||||
|
|
||||||
|
# Attacker (peer A) knows only the global secret. Tries to forge
|
||||||
|
# an HMAC claiming to be peer B.
|
||||||
|
attacker_key = _derive_peer_key("global-secret", "https://peer-b.example")
|
||||||
|
attacker_hmac = self._hmac(attacker_key, body)
|
||||||
|
|
||||||
|
# Receiver derives B's expected key from B's per-peer secret.
|
||||||
|
receiver_key = resolve_peer_key_for_url("https://peer-b.example")
|
||||||
|
expected_hmac = self._hmac(receiver_key, body)
|
||||||
|
|
||||||
|
# The forgery MUST NOT match.
|
||||||
|
assert attacker_hmac != expected_hmac
|
||||||
|
|
||||||
|
def test_peer_a_per_peer_secret_cannot_forge_peer_b_hmac(self, monkeypatch):
|
||||||
|
"""Even harder case: peer A has its OWN per-peer secret, but
|
||||||
|
still does not know peer B's per-peer secret, and so cannot
|
||||||
|
forge an HMAC for peer B."""
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-a.example=secretA,https://peer-b.example=secretB",
|
||||||
|
)
|
||||||
|
settings = MagicMock()
|
||||||
|
settings.MESH_PEER_PUSH_SECRET = ""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"services.config.get_settings", lambda: settings
|
||||||
|
)
|
||||||
|
|
||||||
|
body = b'{"events": [{"id": 99}]}'
|
||||||
|
|
||||||
|
# Attacker A tries to forge for B using its own secret (secretA).
|
||||||
|
attacker_key = _derive_peer_key("secretA", "https://peer-b.example")
|
||||||
|
attacker_hmac = self._hmac(attacker_key, body)
|
||||||
|
|
||||||
|
receiver_key = resolve_peer_key_for_url("https://peer-b.example")
|
||||||
|
expected_hmac = self._hmac(receiver_key, body)
|
||||||
|
|
||||||
|
assert attacker_hmac != expected_hmac
|
||||||
|
|
||||||
|
def test_legitimate_peer_b_request_verifies(self, monkeypatch):
|
||||||
|
"""Positive control: when peer B uses ITS per-peer secret and
|
||||||
|
claims to be itself, the receiver accepts the HMAC."""
|
||||||
|
from services.mesh.mesh_crypto import resolve_peer_key_for_url
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"MESH_PEER_SECRETS",
|
||||||
|
"https://peer-b.example=secretB",
|
||||||
|
)
|
||||||
|
settings = MagicMock()
|
||||||
|
settings.MESH_PEER_PUSH_SECRET = ""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"services.config.get_settings", lambda: settings
|
||||||
|
)
|
||||||
|
|
||||||
|
body = b'{"events": [{"id": 7}]}'
|
||||||
|
|
||||||
|
# Peer B and the receiver both call resolve_peer_key_for_url.
|
||||||
|
sender_key = resolve_peer_key_for_url("https://peer-b.example")
|
||||||
|
receiver_key = resolve_peer_key_for_url("https://peer-b.example")
|
||||||
|
|
||||||
|
sender_hmac = self._hmac(sender_key, body)
|
||||||
|
expected_hmac = self._hmac(receiver_key, body)
|
||||||
|
|
||||||
|
assert sender_hmac == expected_hmac
|
||||||
|
|
||||||
|
def test_single_peer_install_zero_behavior_change(self, monkeypatch):
|
||||||
|
"""The "no UX hostility" guarantee: an install with the global
|
||||||
|
secret set and NO MESH_PEER_SECRETS entries must derive exactly
|
||||||
|
the same key as before this change."""
|
||||||
|
from services.mesh.mesh_crypto import (
|
||||||
|
resolve_peer_key_for_url,
|
||||||
|
_derive_peer_key,
|
||||||
|
)
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
monkeypatch.delenv("MESH_PEER_SECRETS", raising=False)
|
||||||
|
settings = MagicMock()
|
||||||
|
settings.MESH_PEER_PUSH_SECRET = "legacy-global-secret"
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"services.config.get_settings", lambda: settings
|
||||||
|
)
|
||||||
|
|
||||||
|
# The legacy derivation that every prior call site used.
|
||||||
|
legacy_key = _derive_peer_key("legacy-global-secret", "https://peer.example")
|
||||||
|
# The new resolver, with no per-peer entries configured.
|
||||||
|
new_key = resolve_peer_key_for_url("https://peer.example")
|
||||||
|
|
||||||
|
assert new_key == legacy_key
|
||||||
@@ -0,0 +1,186 @@
|
|||||||
|
"""Tests for issue #287: proxy-aware slowapi key function.
|
||||||
|
|
||||||
|
Contract:
|
||||||
|
* Untrusted peer → key is the peer IP (matches old get_remote_address).
|
||||||
|
* Trusted frontend peer with X-Forwarded-For → key is first XFF entry.
|
||||||
|
* Trusted frontend peer without X-Forwarded-For → key is the peer IP
|
||||||
|
(fail-soft: no behaviour change vs. before #287).
|
||||||
|
* XFF from an untrusted peer is IGNORED — there must be no way to
|
||||||
|
spoof another operator's bucket by sending XFF directly.
|
||||||
|
* The first XFF entry is used (not the last — that's the trusted
|
||||||
|
proxy talking to the backend, not the actual operator).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeClient:
|
||||||
|
def __init__(self, host: str):
|
||||||
|
self.host = host
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRequest:
|
||||||
|
"""Minimal slowapi-compatible request shim — has ``client`` and
|
||||||
|
``headers`` attributes, which is all the key_func touches."""
|
||||||
|
|
||||||
|
def __init__(self, client_host: str, headers: dict | None = None):
|
||||||
|
self.client = _FakeClient(client_host) if client_host is not None else None
|
||||||
|
self.headers = dict(headers or {})
|
||||||
|
# slowapi's get_remote_address also tries request.client; we
|
||||||
|
# exercise both branches via the same shim.
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── untrusted peers ──────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestUntrustedPeer:
|
||||||
|
def test_direct_loopback_uses_client_host(self, monkeypatch):
|
||||||
|
"""Direct hit from 127.0.0.1 — no XFF — keys on the peer IP."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
# Make sure the trusted-frontend cache resolves to nothing relevant.
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||||
|
req = _FakeRequest("127.0.0.1")
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "127.0.0.1"
|
||||||
|
|
||||||
|
def test_xff_from_untrusted_peer_is_ignored(self, monkeypatch):
|
||||||
|
"""A random caller sending X-Forwarded-For must NOT steal another
|
||||||
|
operator's bucket. The XFF is dropped on the floor."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
# Trusted set deliberately does NOT include 1.2.3.4.
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest("1.2.3.4", {"X-Forwarded-For": "9.9.9.9"})
|
||||||
|
# Falls back to the peer IP, not 9.9.9.9.
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "1.2.3.4"
|
||||||
|
|
||||||
|
def test_unknown_host_with_xff_uses_peer_host(self, monkeypatch):
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||||
|
req = _FakeRequest("10.0.0.5", {"X-Forwarded-For": "1.1.1.1"})
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "10.0.0.5"
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── trusted frontend peers ───────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestTrustedFrontendPeer:
|
||||||
|
def test_trusted_peer_with_xff_uses_first_xff_entry(self, monkeypatch):
|
||||||
|
"""When the immediate peer is the trusted frontend container and
|
||||||
|
XFF carries the operator's chain, we key on the operator."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "203.0.113.7"})
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||||
|
|
||||||
|
def test_first_xff_entry_picked_in_chain(self, monkeypatch):
|
||||||
|
"""`client, proxy1, proxy2` → we pick the client, not the proxies.
|
||||||
|
Picking the last entry would mean every operator behind the same
|
||||||
|
upstream gets bucketed together, which is the bug we're fixing."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest(
|
||||||
|
"172.20.0.5",
|
||||||
|
{"X-Forwarded-For": "203.0.113.7, 198.51.100.1, 10.0.0.1"},
|
||||||
|
)
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||||
|
|
||||||
|
def test_trusted_peer_without_xff_falls_back_to_peer(self, monkeypatch):
|
||||||
|
"""If the trusted frontend forgot to forward XFF (legacy clients,
|
||||||
|
broken deploys), don't crash — bucket on the bridge IP exactly
|
||||||
|
like the pre-#287 behaviour."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest("172.20.0.5", headers={})
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||||
|
|
||||||
|
def test_trusted_peer_with_empty_xff_falls_back(self, monkeypatch):
|
||||||
|
"""``X-Forwarded-For: , ,`` → no usable entries → falls back."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": " , , "})
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||||
|
|
||||||
|
def test_xff_header_case_insensitive(self, monkeypatch):
|
||||||
|
"""HTTP header names are case-insensitive — slowapi normalises
|
||||||
|
but our shim doesn't, so we explicitly check both forms."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
req = _FakeRequest("172.20.0.5", {"x-forwarded-for": "203.0.113.7"})
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── isolation guarantees ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsolation:
|
||||||
|
def test_two_operators_behind_same_proxy_get_different_keys(self, monkeypatch):
|
||||||
|
"""The whole reason this fix exists — two operators behind the
|
||||||
|
SAME proxy must end up in DIFFERENT buckets."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
op_a = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"})
|
||||||
|
op_b = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.2"})
|
||||||
|
key_a = shadowbroker_rate_limit_key(op_a)
|
||||||
|
key_b = shadowbroker_rate_limit_key(op_b)
|
||||||
|
assert key_a != key_b
|
||||||
|
assert key_a == "10.1.1.1"
|
||||||
|
assert key_b == "10.1.1.2"
|
||||||
|
|
||||||
|
def test_no_xff_spoof_from_outside(self, monkeypatch):
|
||||||
|
"""If we ever expose the backend port directly to the internet,
|
||||||
|
an attacker MUST NOT be able to steal another operator's bucket
|
||||||
|
by sending their own XFF header."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
# Trusted set is the frontend container IP; the attacker is on a
|
||||||
|
# different (untrusted) IP and tries to spoof a victim's IP.
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||||
|
attacker = _FakeRequest("203.0.113.66", {"X-Forwarded-For": "10.1.1.1"})
|
||||||
|
victim_via_proxy = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"})
|
||||||
|
assert shadowbroker_rate_limit_key(attacker) == "203.0.113.66"
|
||||||
|
assert shadowbroker_rate_limit_key(victim_via_proxy) == "10.1.1.1"
|
||||||
|
# The attacker burning their own bucket doesn't touch the victim's.
|
||||||
|
assert shadowbroker_rate_limit_key(attacker) != shadowbroker_rate_limit_key(
|
||||||
|
victim_via_proxy
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_limiter_object_uses_proxy_aware_key(self):
|
||||||
|
"""Smoke check that the module-level Limiter exports the new key
|
||||||
|
function rather than slowapi's default."""
|
||||||
|
from limiter import limiter, shadowbroker_rate_limit_key
|
||||||
|
# slowapi stores it as ._key_func; we don't want to depend on
|
||||||
|
# that internal name, so just check the function is reachable.
|
||||||
|
assert callable(shadowbroker_rate_limit_key)
|
||||||
|
assert limiter is not None
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── defensive corners ────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestDefensive:
|
||||||
|
def test_no_client_object(self, monkeypatch):
|
||||||
|
"""Some upstream middleware paths (websocket, ASGI lifespan)
|
||||||
|
produce requests with no ``client`` attribute — must not raise."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||||
|
|
||||||
|
class _NoClient:
|
||||||
|
def __init__(self):
|
||||||
|
self.client = None
|
||||||
|
self.headers = {}
|
||||||
|
|
||||||
|
# slowapi's get_remote_address returns "127.0.0.1" as a default
|
||||||
|
# in this case, so we just ensure no exception escapes.
|
||||||
|
result = shadowbroker_rate_limit_key(_NoClient())
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
def test_resolver_raises_is_treated_as_untrusted(self, monkeypatch):
|
||||||
|
"""If DNS blows up inside the trusted-bridge resolver, we MUST
|
||||||
|
fall back to peer IP — never accept XFF blindly."""
|
||||||
|
from limiter import shadowbroker_rate_limit_key
|
||||||
|
|
||||||
|
def _explode():
|
||||||
|
raise RuntimeError("DNS down")
|
||||||
|
|
||||||
|
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", _explode)
|
||||||
|
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "9.9.9.9"})
|
||||||
|
# XFF must be ignored when we can't confirm peer is trusted.
|
||||||
|
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
"""Issues #218 / #219 (tg12): outbound Wikipedia + Wikidata calls must
|
||||||
|
identify ShadowBroker via the Wikimedia-recommended User-Agent /
|
||||||
|
Api-User-Agent headers.
|
||||||
|
|
||||||
|
Before this fix, ``backend/services/region_dossier.py`` called
|
||||||
|
``fetch_with_curl(url)`` with no explicit headers, falling back to the
|
||||||
|
generic project default UA. That sent a too-anonymous identifier to
|
||||||
|
Wikimedia. Per Wikimedia's policy
|
||||||
|
(https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy)
|
||||||
|
the API caller should send a stable, contactable identifier so Wikimedia
|
||||||
|
operators can rate-limit or reach the project.
|
||||||
|
|
||||||
|
This test does NOT make network calls. It patches ``fetch_with_curl``
|
||||||
|
and asserts the headers that get passed through.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_resp(payload: dict, status: int = 200) -> MagicMock:
|
||||||
|
r = MagicMock()
|
||||||
|
r.status_code = status
|
||||||
|
r.json.return_value = payload
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def test_wikidata_call_passes_wikimedia_request_headers():
|
||||||
|
from services import region_dossier
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_fetch(url, **kwargs):
|
||||||
|
calls.append(kwargs.get("headers"))
|
||||||
|
return _fake_resp({"results": {"bindings": []}})
|
||||||
|
|
||||||
|
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||||
|
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||||
|
|
||||||
|
assert calls, "fetch_with_curl was not called"
|
||||||
|
headers = calls[0] or {}
|
||||||
|
assert "User-Agent" in headers
|
||||||
|
assert "Api-User-Agent" in headers
|
||||||
|
# Stable identifier should mention the project + a contact path.
|
||||||
|
assert "Shadowbroker" in headers["Api-User-Agent"] or "ShadowBroker" in headers["Api-User-Agent"]
|
||||||
|
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_wikipedia_summary_call_passes_wikimedia_request_headers():
|
||||||
|
from services import region_dossier
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_fetch(url, **kwargs):
|
||||||
|
calls.append((url, kwargs.get("headers")))
|
||||||
|
return _fake_resp(
|
||||||
|
{
|
||||||
|
"type": "standard",
|
||||||
|
"description": "test desc",
|
||||||
|
"extract": "test extract",
|
||||||
|
"thumbnail": {"source": ""},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||||
|
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||||
|
|
||||||
|
# At least one Wikipedia REST call was issued.
|
||||||
|
wikipedia_calls = [c for c in calls if "wikipedia.org" in c[0]]
|
||||||
|
assert wikipedia_calls, "no Wikipedia call was issued"
|
||||||
|
for url, headers in wikipedia_calls:
|
||||||
|
headers = headers or {}
|
||||||
|
assert "User-Agent" in headers, f"missing User-Agent on {url}"
|
||||||
|
assert "Api-User-Agent" in headers, f"missing Api-User-Agent on {url}"
|
||||||
|
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_wikimedia_headers_helper_is_stable():
|
||||||
|
"""Regression guard: if someone removes the contact path or the
|
||||||
|
per-operator handle from the Wikimedia headers, we want a loud
|
||||||
|
test failure, not a silent ToS drift.
|
||||||
|
|
||||||
|
Round 7a: the original ``_WIKIMEDIA_REQUEST_HEADERS`` constant was
|
||||||
|
replaced with the ``_wikimedia_request_headers()`` function so the
|
||||||
|
per-install operator handle is embedded at call time. This test
|
||||||
|
pins both the project identifier AND the contact path AND the
|
||||||
|
per-operator format.
|
||||||
|
"""
|
||||||
|
from services.region_dossier import _wikimedia_request_headers
|
||||||
|
|
||||||
|
headers = _wikimedia_request_headers()
|
||||||
|
aua = headers.get("Api-User-Agent", "")
|
||||||
|
ua = headers.get("User-Agent", "")
|
||||||
|
for h, label in ((ua, "User-Agent"), (aua, "Api-User-Agent")):
|
||||||
|
assert "Shadowbroker" in h or "ShadowBroker" in h, f"{label} missing project id"
|
||||||
|
assert "github.com" in h.lower(), f"{label} missing contact URL"
|
||||||
|
assert "issues" in h.lower(), f"{label} missing /issues contact path"
|
||||||
|
# Round 7a: must include the per-operator handle.
|
||||||
|
assert "operator:" in h, f"{label} missing per-operator handle: {h!r}"
|
||||||
@@ -0,0 +1,263 @@
|
|||||||
|
"""Issues #243, #252, #253 (tg12): settings endpoints must not leak
|
||||||
|
operational posture to unauthenticated callers.
|
||||||
|
|
||||||
|
- **#243**: ``GET /api/settings/wormhole``, ``/api/settings/privacy-profile``,
|
||||||
|
and ``/api/settings/node`` were leaking transport choice, anonymous-mode
|
||||||
|
state, the named privacy profile, and node-participant state to any
|
||||||
|
unauthenticated caller. The fix tightens the redaction allowlists to
|
||||||
|
expose ONLY a bare "is this feature on?" boolean and gates node mode
|
||||||
|
behind authenticated reads.
|
||||||
|
|
||||||
|
- **#252**: ``GET /api/settings/news-feeds`` returned the operator's full
|
||||||
|
curated feed inventory (names + URLs) to anyone. Now gated on
|
||||||
|
local-operator.
|
||||||
|
|
||||||
|
- **#253**: ``GET /api/settings/timemachine`` returned whether archival
|
||||||
|
capture is enabled to anyone. Now gated on local-operator.
|
||||||
|
|
||||||
|
Auth model: ``require_local_operator`` allows loopback (Tauri shell),
|
||||||
|
the Docker bridge frontend container (via the hostname-bound trust from
|
||||||
|
PR #278), and any caller that presents the configured admin key.
|
||||||
|
Anonymous LAN or internet callers do NOT pass and either receive 403
|
||||||
|
(news-feeds, timemachine) or a redacted minimum (wormhole / node).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
_ADMIN_KEY = "test-admin-key-for-round5-fixture-32+chars"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
"""TestClient with the private-lane transport middleware disabled.
|
||||||
|
|
||||||
|
Same shape as the oracle resolve fixture — the mesh privacy
|
||||||
|
middleware returns 202 for ``/api/settings/*`` under TestClient
|
||||||
|
because Wormhole is not actually running. Patching out the tier
|
||||||
|
requirement lets requests reach the route's auth gate.
|
||||||
|
"""
|
||||||
|
import main
|
||||||
|
with patch("main._minimum_transport_tier", return_value=None):
|
||||||
|
yield TestClient(main.app, raise_server_exceptions=False)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #243: Wormhole posture redaction
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWormholeSettingsRedaction:
|
||||||
|
"""``GET /api/settings/wormhole`` must NOT leak transport choice or
|
||||||
|
anonymous-mode state to unauthenticated callers."""
|
||||||
|
|
||||||
|
def _read_settings_payload(self):
|
||||||
|
return {
|
||||||
|
"enabled": True,
|
||||||
|
"transport": "tor_arti",
|
||||||
|
"anonymous_mode": True,
|
||||||
|
"privacy_profile": "high",
|
||||||
|
"socks_proxy": "socks5h://127.0.0.1:9050",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_anonymous_caller_sees_only_enabled_bool(self, client):
|
||||||
|
with (
|
||||||
|
patch("main.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("routers.wormhole.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get("/api/settings/wormhole")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
# Only the bare "is Wormhole on?" boolean is exposed publicly.
|
||||||
|
assert "enabled" in body
|
||||||
|
assert body["enabled"] is True
|
||||||
|
# Posture fields the audit flagged must be absent.
|
||||||
|
assert "transport" not in body
|
||||||
|
assert "anonymous_mode" not in body
|
||||||
|
assert "privacy_profile" not in body
|
||||||
|
assert "socks_proxy" not in body
|
||||||
|
|
||||||
|
def test_authenticated_caller_sees_full_state(self, client):
|
||||||
|
with (
|
||||||
|
patch("main.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("routers.wormhole.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get(
|
||||||
|
"/api/settings/wormhole",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
# All fields visible when authenticated.
|
||||||
|
assert body["enabled"] is True
|
||||||
|
assert body["transport"] == "tor_arti"
|
||||||
|
assert body["anonymous_mode"] is True
|
||||||
|
assert body["privacy_profile"] == "high"
|
||||||
|
|
||||||
|
|
||||||
|
class TestPrivacyProfileRedaction:
|
||||||
|
"""``GET /api/settings/privacy-profile`` must NOT leak the named
|
||||||
|
profile to unauthenticated callers (the profile name itself
|
||||||
|
discloses operator intent)."""
|
||||||
|
|
||||||
|
def _payload(self):
|
||||||
|
return {
|
||||||
|
"enabled": True,
|
||||||
|
"transport": "tor_arti",
|
||||||
|
"anonymous_mode": True,
|
||||||
|
"privacy_profile": "high",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_anonymous_caller_sees_only_wormhole_enabled_bool(self, client):
|
||||||
|
with (
|
||||||
|
patch("main.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("routers.wormhole.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get("/api/settings/privacy-profile")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert "wormhole_enabled" in body
|
||||||
|
assert body["wormhole_enabled"] is True
|
||||||
|
# The named profile, transport, and anonymous mode must NOT
|
||||||
|
# leak to anonymous callers.
|
||||||
|
assert "profile" not in body or body.get("profile") is None
|
||||||
|
assert "transport" not in body
|
||||||
|
assert "anonymous_mode" not in body
|
||||||
|
|
||||||
|
def test_authenticated_caller_sees_named_profile_and_transport(self, client):
|
||||||
|
with (
|
||||||
|
patch("main.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("routers.wormhole.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._payload()),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get(
|
||||||
|
"/api/settings/privacy-profile",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["profile"] == "high"
|
||||||
|
assert body["wormhole_enabled"] is True
|
||||||
|
assert body["transport"] == "tor_arti"
|
||||||
|
assert body["anonymous_mode"] is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestNodeSettingsRedaction:
|
||||||
|
"""``GET /api/settings/node`` must NOT disclose node_mode or
|
||||||
|
node_enabled to anonymous callers."""
|
||||||
|
|
||||||
|
def _node_data(self):
|
||||||
|
return {"some_node_field": "value"}
|
||||||
|
|
||||||
|
def test_anonymous_caller_sees_empty_stub(self, client):
|
||||||
|
with (
|
||||||
|
patch("services.node_settings.read_node_settings", return_value=self._node_data()),
|
||||||
|
patch("routers.admin._current_node_mode", return_value="participant"),
|
||||||
|
patch("routers.admin._participant_node_enabled", return_value=True),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get("/api/settings/node")
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
# No posture fields.
|
||||||
|
assert "node_mode" not in body
|
||||||
|
assert "node_enabled" not in body
|
||||||
|
assert "some_node_field" not in body
|
||||||
|
|
||||||
|
def test_authenticated_caller_sees_full_node_state(self, client):
|
||||||
|
with (
|
||||||
|
patch("services.node_settings.read_node_settings", return_value=self._node_data()),
|
||||||
|
patch("routers.admin._current_node_mode", return_value="participant"),
|
||||||
|
patch("routers.admin._participant_node_enabled", return_value=True),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get(
|
||||||
|
"/api/settings/node",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["node_mode"] == "participant"
|
||||||
|
assert body["node_enabled"] is True
|
||||||
|
assert body["some_node_field"] == "value"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #252: news-feeds auth gate
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewsFeedsAuthGate:
|
||||||
|
def _fake_feeds(self):
|
||||||
|
return [
|
||||||
|
{"name": "Custom Internal", "url": "https://internal.example/rss", "weight": 5},
|
||||||
|
{"name": "Default News", "url": "https://news.example/rss", "weight": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_anonymous_caller_rejected(self, client):
|
||||||
|
with (
|
||||||
|
patch("services.news_feed_config.get_feeds", return_value=self._fake_feeds()) as get_feeds,
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get("/api/settings/news-feeds")
|
||||||
|
assert r.status_code == 403
|
||||||
|
# Critically: the underlying config read must NOT have been performed
|
||||||
|
# (else the response body could leak the count via response timing).
|
||||||
|
assert get_feeds.call_count == 0
|
||||||
|
|
||||||
|
def test_authenticated_caller_sees_full_feed_inventory(self, client):
|
||||||
|
with (
|
||||||
|
patch("services.news_feed_config.get_feeds", return_value=self._fake_feeds()),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get(
|
||||||
|
"/api/settings/news-feeds",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert len(body) == 2
|
||||||
|
assert body[0]["name"] == "Custom Internal"
|
||||||
|
assert body[0]["url"] == "https://internal.example/rss"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# #253: timemachine auth gate
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimemachineAuthGate:
|
||||||
|
def test_anonymous_caller_rejected(self, client):
|
||||||
|
node_data = {"timemachine_enabled": True}
|
||||||
|
with (
|
||||||
|
patch("services.node_settings.read_node_settings", return_value=node_data),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get("/api/settings/timemachine")
|
||||||
|
assert r.status_code == 403
|
||||||
|
|
||||||
|
def test_authenticated_caller_sees_enabled_state(self, client):
|
||||||
|
node_data = {"timemachine_enabled": True}
|
||||||
|
with (
|
||||||
|
patch("services.node_settings.read_node_settings", return_value=node_data),
|
||||||
|
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||||
|
):
|
||||||
|
r = client.get(
|
||||||
|
"/api/settings/timemachine",
|
||||||
|
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
body = r.json()
|
||||||
|
assert body["enabled"] is True
|
||||||
|
assert "storage_warning" in body
|
||||||
@@ -0,0 +1,277 @@
|
|||||||
|
"""Issue #298 (tg12): Sentinel credentials must live server-side.
|
||||||
|
|
||||||
|
Before the fix, ``frontend/src/components/SettingsPanel.tsx`` stored
|
||||||
|
``client_id`` and ``client_secret`` in ``localStorage`` /
|
||||||
|
``sessionStorage`` via the privacy storage helper, and the proxy routes
|
||||||
|
in ``backend/routers/tools.py`` REQUIRED those values to come in the
|
||||||
|
request body. Any same-origin script (XSS, malicious extension,
|
||||||
|
dev-tools HAR export) had read access to real third-party Sentinel
|
||||||
|
credentials.
|
||||||
|
|
||||||
|
After the fix:
|
||||||
|
|
||||||
|
* ``SENTINEL_CLIENT_ID`` and ``SENTINEL_CLIENT_SECRET`` are entries
|
||||||
|
in the ``api_settings.API_REGISTRY`` and are persisted via the
|
||||||
|
existing ``/api/settings/api-keys`` flow (admin-gated, .env-backed,
|
||||||
|
never returned to the browser).
|
||||||
|
* The proxy routes prefer request-body values for back-compat but
|
||||||
|
fall back to ``os.environ.get("SENTINEL_CLIENT_ID")`` /
|
||||||
|
``os.environ.get("SENTINEL_CLIENT_SECRET")`` when the body omits
|
||||||
|
them. The dashboard's ``sentinelHub.ts`` no longer sends credentials
|
||||||
|
in the body — every request now hits the env path.
|
||||||
|
* When neither source has a value, the route returns a 400 with a
|
||||||
|
pointer to the API Keys panel rather than a curt "client_id and
|
||||||
|
client_secret required" message.
|
||||||
|
|
||||||
|
These tests cover the resolution order and the registry surface.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helper: import the routes module fresh per test so monkey-patched
|
||||||
|
# environment variables are picked up by the route's os.environ.get call.
|
||||||
|
# (The lookup is per-request, not at import time, so this isn't strictly
|
||||||
|
# required — but it makes the test layout obvious.)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def loopback_client():
|
||||||
|
"""ASGI client with peer IP 127.0.0.1 so the Sentinel routes' (post-#303)
|
||||||
|
``require_local_operator`` gate passes.
|
||||||
|
|
||||||
|
Built without a context manager so the privacy-core lifespan check
|
||||||
|
doesn't run in the test env.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
class _Loop:
|
||||||
|
def __init__(self):
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
self._transport = ASGITransport(app=app, client=("127.0.0.1", 12345))
|
||||||
|
self._base = "http://127.0.0.1:8000"
|
||||||
|
|
||||||
|
def _do(self, method: str, url: str, **kw):
|
||||||
|
async def go():
|
||||||
|
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||||
|
return await ac.request(method, url, **kw)
|
||||||
|
return self._loop.run_until_complete(go())
|
||||||
|
|
||||||
|
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||||
|
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||||
|
def put(self, url, **kw): return self._do("PUT", url, **kw)
|
||||||
|
|
||||||
|
def close(self): self._loop.close()
|
||||||
|
|
||||||
|
c = _Loop()
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# API_REGISTRY surface
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestApiRegistry:
|
||||||
|
def test_sentinel_keys_registered(self):
|
||||||
|
"""Both Sentinel keys must be entries in API_REGISTRY so the
|
||||||
|
existing /api/settings/api-keys PUT flow can write them to .env."""
|
||||||
|
from services.api_settings import API_REGISTRY, ALLOWED_ENV_KEYS
|
||||||
|
|
||||||
|
ids = {row["id"] for row in API_REGISTRY}
|
||||||
|
assert "sentinel_client_id" in ids
|
||||||
|
assert "sentinel_client_secret" in ids
|
||||||
|
|
||||||
|
# Critical: ALLOWED_ENV_KEYS is the gate on which .env keys the
|
||||||
|
# API can mutate. If we forgot to add the env_key field on the
|
||||||
|
# registry rows, callers couldn't actually save the values.
|
||||||
|
assert "SENTINEL_CLIENT_ID" in ALLOWED_ENV_KEYS
|
||||||
|
assert "SENTINEL_CLIENT_SECRET" in ALLOWED_ENV_KEYS
|
||||||
|
|
||||||
|
def test_api_keys_put_accepts_sentinel_keys(self, loopback_client, monkeypatch, tmp_path):
|
||||||
|
"""End-to-end: PUT /api/settings/api-keys with SENTINEL_CLIENT_ID
|
||||||
|
+ SENTINEL_CLIENT_SECRET must persist to .env."""
|
||||||
|
import services.api_settings as api_settings
|
||||||
|
|
||||||
|
# Redirect both .env paths to tmp so the test doesn't mutate
|
||||||
|
# the developer's real backend .env.
|
||||||
|
tmp_env = tmp_path / ".env"
|
||||||
|
monkeypatch.setattr(api_settings, "ENV_PATH", tmp_env)
|
||||||
|
monkeypatch.setattr(api_settings, "OPERATOR_KEYS_ENV_PATH", tmp_path / "operator_api_keys.env")
|
||||||
|
|
||||||
|
r = loopback_client.put(
|
||||||
|
"/api/settings/api-keys",
|
||||||
|
json={
|
||||||
|
"SENTINEL_CLIENT_ID": "test-sentinel-id",
|
||||||
|
"SENTINEL_CLIENT_SECRET": "test-sentinel-secret",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200, f"PUT failed: {r.text}"
|
||||||
|
body = r.json()
|
||||||
|
assert body.get("ok") is True
|
||||||
|
|
||||||
|
# File on disk should now carry both keys.
|
||||||
|
parsed = api_settings._parse_env_file(tmp_env)
|
||||||
|
assert parsed.get("SENTINEL_CLIENT_ID") == "test-sentinel-id"
|
||||||
|
assert parsed.get("SENTINEL_CLIENT_SECRET") == "test-sentinel-secret"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Credential resolution — body wins, env is fallback, neither is 400
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSentinelTokenCredResolution:
|
||||||
|
def test_env_fallback_when_body_empty(self, loopback_client, monkeypatch):
|
||||||
|
"""No body credentials → backend reads .env values."""
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||||
|
|
||||||
|
# Mock the upstream Copernicus call so we don't hit the network.
|
||||||
|
# Capture what was sent so we can prove env values were used.
|
||||||
|
captured: dict = {}
|
||||||
|
fake_resp = MagicMock()
|
||||||
|
fake_resp.status_code = 200
|
||||||
|
fake_resp.content = b'{"access_token": "stub", "expires_in": 300}'
|
||||||
|
|
||||||
|
def fake_post(url, *args, **kwargs):
|
||||||
|
captured["url"] = url
|
||||||
|
captured["data"] = kwargs.get("data", {})
|
||||||
|
return fake_resp
|
||||||
|
|
||||||
|
with patch("requests.post", side_effect=fake_post):
|
||||||
|
r = loopback_client.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={}, # ← deliberately empty body
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
# The forwarded creds must come from env, not from a stale cache
|
||||||
|
# or fallback string.
|
||||||
|
assert captured.get("data", {}).get("client_id") == "env-id"
|
||||||
|
assert captured.get("data", {}).get("client_secret") == "env-secret"
|
||||||
|
|
||||||
|
def test_body_credentials_win_over_env(self, loopback_client, monkeypatch):
|
||||||
|
"""Body values (back-compat path) must win when both sources
|
||||||
|
are present. This preserves the pre-#298 behavior for any
|
||||||
|
legacy callers that still post credentials."""
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||||
|
|
||||||
|
captured: dict = {}
|
||||||
|
fake_resp = MagicMock()
|
||||||
|
fake_resp.status_code = 200
|
||||||
|
fake_resp.content = b'{"access_token": "stub"}'
|
||||||
|
|
||||||
|
def fake_post(url, *args, **kwargs):
|
||||||
|
captured["data"] = kwargs.get("data", {})
|
||||||
|
return fake_resp
|
||||||
|
|
||||||
|
with patch("requests.post", side_effect=fake_post):
|
||||||
|
r = loopback_client.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={"client_id": "body-id", "client_secret": "body-secret"},
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert captured["data"]["client_id"] == "body-id"
|
||||||
|
assert captured["data"]["client_secret"] == "body-secret"
|
||||||
|
|
||||||
|
def test_400_when_neither_source_has_credentials(self, loopback_client, monkeypatch):
|
||||||
|
"""If body is empty AND env is empty, return 400 with a
|
||||||
|
friendly pointer to the API Keys panel — not a curt
|
||||||
|
"required" message and not a 500."""
|
||||||
|
monkeypatch.delenv("SENTINEL_CLIENT_ID", raising=False)
|
||||||
|
monkeypatch.delenv("SENTINEL_CLIENT_SECRET", raising=False)
|
||||||
|
|
||||||
|
# If the route ever calls requests.post here, the gate is broken
|
||||||
|
# — empty creds should never produce an outbound HTTP call.
|
||||||
|
fake = MagicMock(side_effect=AssertionError(
|
||||||
|
"requests.post should not be called when no credentials are configured"
|
||||||
|
))
|
||||||
|
with patch("requests.post", fake):
|
||||||
|
r = loopback_client.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={},
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert r.status_code == 400
|
||||||
|
detail = r.json().get("detail", "")
|
||||||
|
# The pointer to the API Keys panel is what makes this non-hostile.
|
||||||
|
assert "API Keys panel" in detail or "SENTINEL_CLIENT_ID" in detail
|
||||||
|
assert fake.call_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestSentinelTileCredResolution:
|
||||||
|
def test_env_fallback_when_body_omits_credentials(self, loopback_client, monkeypatch):
|
||||||
|
"""Tile route: no body credentials → uses env values."""
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||||
|
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||||
|
|
||||||
|
token_resp = MagicMock()
|
||||||
|
token_resp.status_code = 200
|
||||||
|
token_resp.json = MagicMock(return_value={"access_token": "stub", "expires_in": 300})
|
||||||
|
|
||||||
|
process_resp = MagicMock()
|
||||||
|
process_resp.status_code = 200
|
||||||
|
process_resp.content = b"<png bytes>"
|
||||||
|
process_resp.headers = {"content-type": "image/png"}
|
||||||
|
|
||||||
|
captured: list = []
|
||||||
|
|
||||||
|
def fake_post(url, *args, **kwargs):
|
||||||
|
captured.append({"url": url, "data": kwargs.get("data"), "json": kwargs.get("json")})
|
||||||
|
if "openid-connect/token" in url:
|
||||||
|
return token_resp
|
||||||
|
return process_resp
|
||||||
|
|
||||||
|
with patch("requests.post", side_effect=fake_post):
|
||||||
|
r = loopback_client.post(
|
||||||
|
"/api/sentinel/tile",
|
||||||
|
json={
|
||||||
|
# Note: no client_id / client_secret in body
|
||||||
|
"preset": "TRUE-COLOR",
|
||||||
|
"date": "2026-01-01",
|
||||||
|
"z": 6, "x": 30, "y": 20,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
# First call was the token mint; verify it used env creds.
|
||||||
|
token_call = next(c for c in captured if "openid-connect/token" in c["url"])
|
||||||
|
assert token_call["data"]["client_id"] == "env-id"
|
||||||
|
assert token_call["data"]["client_secret"] == "env-secret"
|
||||||
|
|
||||||
|
def test_400_when_neither_source_has_credentials(self, loopback_client, monkeypatch):
|
||||||
|
monkeypatch.delenv("SENTINEL_CLIENT_ID", raising=False)
|
||||||
|
monkeypatch.delenv("SENTINEL_CLIENT_SECRET", raising=False)
|
||||||
|
|
||||||
|
fake = MagicMock(side_effect=AssertionError(
|
||||||
|
"requests.post should not be called when no credentials are configured"
|
||||||
|
))
|
||||||
|
with patch("requests.post", fake):
|
||||||
|
r = loopback_client.post(
|
||||||
|
"/api/sentinel/tile",
|
||||||
|
json={
|
||||||
|
"preset": "TRUE-COLOR",
|
||||||
|
"date": "2026-01-01",
|
||||||
|
"z": 6, "x": 30, "y": 20,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert r.status_code == 400
|
||||||
|
detail = r.json().get("detail", "")
|
||||||
|
assert "API Keys panel" in detail or "SENTINEL_CLIENT_ID" in detail
|
||||||
|
assert fake.call_count == 0
|
||||||
@@ -0,0 +1,231 @@
|
|||||||
|
"""Issues #299, #300, #301 (tg12): Sentinel proxy routes must require
|
||||||
|
local-operator auth.
|
||||||
|
|
||||||
|
Before the fix, three Sentinel proxy routes in ``backend/routers/tools.py``
|
||||||
|
were decorated only with ``@limiter.limit(...)`` — no
|
||||||
|
``Depends(require_local_operator)``:
|
||||||
|
|
||||||
|
* ``POST /api/sentinel/token`` — Copernicus CDSE OAuth relay for
|
||||||
|
caller-supplied client_id + client_secret. Anonymous access made the
|
||||||
|
backend a free OAuth-mint relay for any Sentinel account.
|
||||||
|
* ``POST /api/sentinel/tile`` — Sentinel Hub Process API relay.
|
||||||
|
Caller supplies their own credentials, backend mints a token if
|
||||||
|
needed and relays the PNG. Anonymous access was a bandwidth + quota
|
||||||
|
relay for any Copernicus account.
|
||||||
|
* ``GET /api/sentinel2/search`` — Planetary Computer STAC search with
|
||||||
|
Esri imagery fallback. No caller credentials are involved, but the
|
||||||
|
route is still an anonymous external-search relay.
|
||||||
|
|
||||||
|
The fix adds ``dependencies=[Depends(require_local_operator)]`` to each.
|
||||||
|
The parameterized regression in ``test_control_surface_auth.py`` covers
|
||||||
|
the basic 403 path. This file adds the harder property: when the auth
|
||||||
|
gate fires, **the underlying upstream HTTP call never happens** — no
|
||||||
|
outbound Copernicus token mint, no Sentinel Hub Process call, no
|
||||||
|
Planetary Computer STAC search. The egress-on-403 property is what
|
||||||
|
separates a real gate from a route that returns 403 *after* burning a
|
||||||
|
quota.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Remote client fixture — same shape as test_control_surface_auth.py, but
|
||||||
|
# inlined here so this file doesn't depend on the shared remote_client
|
||||||
|
# fixture order. Uses 1.2.3.4 as the peer IP so loopback auth bypass
|
||||||
|
# doesn't accidentally let the request through.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _PeerClient:
|
||||||
|
"""Raw ASGI client with a configurable peer IP. FastAPI's
|
||||||
|
``TestClient`` reports ``request.client.host`` as ``"testclient"``
|
||||||
|
which isn't on the loopback allowlist — we need to set the peer
|
||||||
|
explicitly to exercise the real ``require_local_operator`` path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, peer_ip: str):
|
||||||
|
from main import app
|
||||||
|
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
self._transport = ASGITransport(app=app, client=(peer_ip, 12345))
|
||||||
|
self._base = f"http://{peer_ip}:8000"
|
||||||
|
|
||||||
|
def _do(self, method: str, url: str, **kw):
|
||||||
|
async def go():
|
||||||
|
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||||
|
return await ac.request(method, url, **kw)
|
||||||
|
|
||||||
|
return self._loop.run_until_complete(go())
|
||||||
|
|
||||||
|
def get(self, url, **kw):
|
||||||
|
return self._do("GET", url, **kw)
|
||||||
|
|
||||||
|
def post(self, url, **kw):
|
||||||
|
return self._do("POST", url, **kw)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def remote():
|
||||||
|
"""Untrusted remote caller (1.2.3.4) — must hit the auth gate."""
|
||||||
|
client = _PeerClient("1.2.3.4")
|
||||||
|
yield client
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def loopback():
|
||||||
|
"""127.0.0.1 caller — must pass the gate exactly like the operator."""
|
||||||
|
client = _PeerClient("127.0.0.1")
|
||||||
|
yield client
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /api/sentinel/token — issue #299
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSentinelTokenAuthGate:
|
||||||
|
def test_anonymous_caller_is_rejected(self, remote):
|
||||||
|
"""A remote (non-loopback, non-bridge) caller MUST be rejected."""
|
||||||
|
r = remote.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={"client_id": "anything", "client_secret": "anything"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
|
||||||
|
def test_no_upstream_token_mint_on_403(self, remote):
|
||||||
|
"""The Copernicus token endpoint must NOT be contacted when the
|
||||||
|
auth gate fires. This is what makes the gate real — without it,
|
||||||
|
a 403 returned *after* the upstream call still burns quota.
|
||||||
|
|
||||||
|
We patch ``requests.post`` at the module level so any outbound
|
||||||
|
token request would be intercepted. The mock is asserted to have
|
||||||
|
ZERO calls.
|
||||||
|
"""
|
||||||
|
fake_post = MagicMock()
|
||||||
|
# If the gate is broken, the route would call requests.post; we
|
||||||
|
# want this MagicMock to make that fact loud.
|
||||||
|
fake_post.side_effect = AssertionError(
|
||||||
|
"requests.post was called despite auth-gate 403 — the gate is bypassable"
|
||||||
|
)
|
||||||
|
with patch("requests.post", fake_post):
|
||||||
|
r = remote.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={"client_id": "anything", "client_secret": "anything"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert fake_post.call_count == 0
|
||||||
|
|
||||||
|
def test_loopback_caller_passes_auth(self, loopback):
|
||||||
|
"""A 127.0.0.1 caller must pass the gate. We don't care about
|
||||||
|
the upstream response shape — just that the request reaches the
|
||||||
|
handler (which would then try to talk to Copernicus). We patch
|
||||||
|
``requests.post`` to return a 401 so the test doesn't hit the
|
||||||
|
real network.
|
||||||
|
|
||||||
|
Note: FastAPI's ``TestClient`` reports ``request.client.host``
|
||||||
|
as ``"testclient"`` by default, which is NOT on the loopback
|
||||||
|
allowlist (``127.0.0.1`` / ``::1`` / ``localhost``). The
|
||||||
|
``loopback`` fixture below uses raw ASGI with an explicit
|
||||||
|
``127.0.0.1`` peer IP so the auth gate sees real loopback.
|
||||||
|
"""
|
||||||
|
fake_resp = MagicMock()
|
||||||
|
fake_resp.status_code = 401
|
||||||
|
fake_resp.content = b'{"error": "invalid_client"}'
|
||||||
|
with patch("requests.post", return_value=fake_resp):
|
||||||
|
r = loopback.post(
|
||||||
|
"/api/sentinel/token",
|
||||||
|
data={"client_id": "anything", "client_secret": "anything"},
|
||||||
|
)
|
||||||
|
# 200 (relayed), 401 (upstream said no), or 502 (upstream blew up)
|
||||||
|
# are all acceptable — what matters is we got past the auth gate
|
||||||
|
# (no 403). The route relays the upstream response status.
|
||||||
|
assert r.status_code != 403
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /api/sentinel/tile — issue #300
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSentinelTileAuthGate:
|
||||||
|
_VALID_BODY = {
|
||||||
|
"client_id": "anything",
|
||||||
|
"client_secret": "anything",
|
||||||
|
"preset": "TRUE-COLOR",
|
||||||
|
"date": "2026-01-01",
|
||||||
|
"z": 6,
|
||||||
|
"x": 30,
|
||||||
|
"y": 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_anonymous_caller_is_rejected(self, remote):
|
||||||
|
r = remote.post("/api/sentinel/tile", json=self._VALID_BODY)
|
||||||
|
assert r.status_code == 403
|
||||||
|
|
||||||
|
def test_no_upstream_call_on_403(self, remote):
|
||||||
|
"""When the gate fires, neither the token mint nor the Process
|
||||||
|
API call should happen."""
|
||||||
|
fake_post = MagicMock(side_effect=AssertionError(
|
||||||
|
"requests.post was called despite auth-gate 403 — gate bypassable"
|
||||||
|
))
|
||||||
|
with patch("requests.post", fake_post):
|
||||||
|
r = remote.post("/api/sentinel/tile", json=self._VALID_BODY)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert fake_post.call_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /api/sentinel2/search — issue #301
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSentinel2SearchAuthGate:
|
||||||
|
def test_anonymous_caller_is_rejected(self, remote):
|
||||||
|
r = remote.get("/api/sentinel2/search?lat=0&lng=0")
|
||||||
|
assert r.status_code == 403
|
||||||
|
|
||||||
|
def test_no_upstream_search_on_403(self, remote):
|
||||||
|
"""The Planetary Computer STAC search MUST NOT be called when
|
||||||
|
the gate fires."""
|
||||||
|
fake = MagicMock(side_effect=AssertionError(
|
||||||
|
"search_sentinel2_scene was called despite 403 — gate bypassable"
|
||||||
|
))
|
||||||
|
# Patch the underlying service function — that's the network
|
||||||
|
# surface. If the auth dep fires first, the handler body never
|
||||||
|
# runs and this stays uncalled.
|
||||||
|
with patch("services.sentinel_search.search_sentinel2_scene", fake):
|
||||||
|
r = remote.get("/api/sentinel2/search?lat=0&lng=0")
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert fake.call_count == 0
|
||||||
|
|
||||||
|
def test_loopback_caller_reaches_handler(self, loopback):
|
||||||
|
"""127.0.0.1 must pass the gate and reach the search function.
|
||||||
|
Uses raw ASGI peer IP via the ``loopback`` fixture — TestClient
|
||||||
|
would set ``request.client.host`` to ``"testclient"`` which
|
||||||
|
isn't on the loopback allowlist."""
|
||||||
|
fake = MagicMock(return_value={"ok": True, "results": []})
|
||||||
|
with patch("services.sentinel_search.search_sentinel2_scene", fake):
|
||||||
|
r = loopback.get("/api/sentinel2/search?lat=0&lng=0")
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert fake.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
|
# Note: an earlier draft included a static dependency walker that
|
||||||
|
# inspected the FastAPI route table to assert require_local_operator
|
||||||
|
# was wired in. It was deleted because FastAPI's internal route
|
||||||
|
# representation varies across minor versions — the walker was brittle
|
||||||
|
# and the behavioral pair (anonymous → 403 with no upstream egress;
|
||||||
|
# loopback → handler reached) gives stronger end-to-end evidence than
|
||||||
|
# any structural check.
|
||||||
@@ -0,0 +1,222 @@
|
|||||||
|
"""Issue #251 (tg12): Tor bundle extraction must refuse symlink and
|
||||||
|
hardlink members.
|
||||||
|
|
||||||
|
The previous extractor checked ``member.name`` against path traversal
|
||||||
|
but never inspected ``member.linkname``. Python 3.11's ``tarfile``
|
||||||
|
honors symlinks during ``extractall()``, so a malicious archive could
|
||||||
|
ship a member named ``innocent.txt`` whose linkname points at an
|
||||||
|
arbitrary filesystem location. After extraction, reads of innocent.txt
|
||||||
|
dereference to that location; writes corrupt it.
|
||||||
|
|
||||||
|
The fix categorically refuses any link member during extraction.
|
||||||
|
Tor Expert Bundles never legitimately contain symlinks or hardlinks,
|
||||||
|
so this is non-disruptive for real updates and a hard stop for hostile
|
||||||
|
archives.
|
||||||
|
|
||||||
|
These tests build synthetic tar archives covering each refused case
|
||||||
|
and assert ``_extract_tor_bundle_safely`` rejects them.
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
import tarfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from services.tor_hidden_service import _extract_tor_bundle_safely
|
||||||
|
|
||||||
|
|
||||||
|
def _build_archive(tmp_path: Path, members: list) -> Path:
|
||||||
|
"""Write a .tar.gz with the given (name, builder) pairs.
|
||||||
|
|
||||||
|
Each builder is called with the open tarfile and is responsible for
|
||||||
|
adding its member however it likes (regular file, symlink, etc.).
|
||||||
|
"""
|
||||||
|
archive = tmp_path / "test_bundle.tar.gz"
|
||||||
|
with tarfile.open(str(archive), "w:gz") as tar:
|
||||||
|
for name, builder in members:
|
||||||
|
builder(tar, name)
|
||||||
|
return archive
|
||||||
|
|
||||||
|
|
||||||
|
def _add_regular_file(tar: tarfile.TarFile, name: str, payload: bytes = b"hello") -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.size = len(payload)
|
||||||
|
info.mode = 0o644
|
||||||
|
info.type = tarfile.REGTYPE
|
||||||
|
tar.addfile(info, io.BytesIO(payload))
|
||||||
|
|
||||||
|
|
||||||
|
def _add_symlink(tar: tarfile.TarFile, name: str, linkname: str) -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.size = 0
|
||||||
|
info.type = tarfile.SYMTYPE
|
||||||
|
info.linkname = linkname
|
||||||
|
info.mode = 0o777
|
||||||
|
tar.addfile(info)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_hardlink(tar: tarfile.TarFile, name: str, linkname: str) -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.size = 0
|
||||||
|
info.type = tarfile.LNKTYPE
|
||||||
|
info.linkname = linkname
|
||||||
|
info.mode = 0o644
|
||||||
|
tar.addfile(info)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_fifo(tar: tarfile.TarFile, name: str) -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.type = tarfile.FIFOTYPE
|
||||||
|
info.mode = 0o644
|
||||||
|
tar.addfile(info)
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_archive_extracts_successfully(tmp_path):
|
||||||
|
"""A normal archive with only regular files extracts fine."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
def add_normal(tar, name):
|
||||||
|
_add_regular_file(tar, name, b"clean content")
|
||||||
|
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/tor.exe", add_normal),
|
||||||
|
("tor/data/geoip", add_normal),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is True
|
||||||
|
assert (install_dir / "tor" / "tor.exe").is_file()
|
||||||
|
assert (install_dir / "tor" / "data" / "geoip").is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def test_symlink_member_is_rejected(tmp_path, caplog):
|
||||||
|
"""Issue #251 core regression: symlink members are refused."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/innocent.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
with caplog.at_level(logging.ERROR):
|
||||||
|
result = _extract_tor_bundle_safely(archive, install_dir)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
# No file should have been created
|
||||||
|
assert not (install_dir / "tor" / "innocent.txt").exists()
|
||||||
|
# Log should explain why
|
||||||
|
assert any(
|
||||||
|
"symlinks/hardlinks are not allowed" in rec.getMessage()
|
||||||
|
for rec in caplog.records
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hardlink_member_is_rejected(tmp_path):
|
||||||
|
"""Hardlinks are refused for the same reason as symlinks."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/regular.txt", lambda t, n: _add_regular_file(t, n)),
|
||||||
|
("tor/sneaky.txt", lambda t, n: _add_hardlink(t, n, "regular.txt")),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||||
|
# The whole extraction is refused even though only one member is bad.
|
||||||
|
assert not (install_dir / "tor" / "regular.txt").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_symlink_with_relative_target_still_rejected(tmp_path):
|
||||||
|
"""Even a relative symlink target inside the install dir is refused.
|
||||||
|
|
||||||
|
We don't allow symlinks at all — there is no legitimate Tor bundle
|
||||||
|
use case for them, and an attacker can chain link redirections in
|
||||||
|
ways the path-resolution check is poor at catching.
|
||||||
|
"""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/alias.txt", lambda t, n: _add_symlink(t, n, "tor/tor.exe")),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_fifo_or_device_member_is_rejected(tmp_path):
|
||||||
|
"""Non-regular-non-directory members (FIFOs, devices) are refused."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/weird.fifo", _add_fifo),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_path_traversal_member_is_rejected(tmp_path):
|
||||||
|
"""Pre-existing path-traversal guard still works under the new shape."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
def add_traversal(tar, name):
|
||||||
|
_add_regular_file(tar, name)
|
||||||
|
|
||||||
|
# ../../escape.txt resolves outside install_dir on most platforms.
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("../../escape.txt", add_traversal),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_malformed_tar_is_rejected(tmp_path):
|
||||||
|
"""A corrupt/non-tar file is rejected without crashing."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
bogus = tmp_path / "not-a-tar.tar.gz"
|
||||||
|
bogus.write_bytes(b"this is not a tar archive at all")
|
||||||
|
|
||||||
|
assert _extract_tor_bundle_safely(bogus, install_dir) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_extraction_failure_does_not_leave_partial_state_referenced_to_caller(tmp_path):
|
||||||
|
"""When extraction fails partway, the caller relies on a False return
|
||||||
|
to know it must clean up. We test the contract here — actual cleanup
|
||||||
|
of files that may have been written by tar.extractall() before the
|
||||||
|
failure point isn't part of THIS helper's responsibility (the caller
|
||||||
|
deletes the install dir if needed)."""
|
||||||
|
install_dir = tmp_path / "install"
|
||||||
|
install_dir.mkdir()
|
||||||
|
|
||||||
|
# Hostile archive: one good file, then a symlink. Whether the good
|
||||||
|
# file was written or not, the return value must be False so the
|
||||||
|
# caller refuses the bundle.
|
||||||
|
archive = _build_archive(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
("tor/clean.txt", lambda t, n: _add_regular_file(t, n)),
|
||||||
|
("tor/evil-link.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||||
@@ -0,0 +1,252 @@
|
|||||||
|
"""HF NUFORC fallback honors the rolling cutoff window.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
The UAP sightings layer is sourced primarily from a live scrape of
|
||||||
|
nuforc.org. When that fails (Cloudflare 403, curl disabled on Windows,
|
||||||
|
wdtNonce regex stale, etc.) the code falls back to a static CSV mirror
|
||||||
|
hosted on Hugging Face at ``kcimc/NUFORC/nuforc_str.csv``.
|
||||||
|
|
||||||
|
The HF mirror is maintained by a third party and refreshed sporadically.
|
||||||
|
Pre-fix, the fallback parsed every row, sorted by ``occurred`` descending,
|
||||||
|
and took the top 250 — **with no date cutoff**. When the HF mirror is
|
||||||
|
stale (its "newest" rows are ~2-3 years old), users saw a map full of
|
||||||
|
2022-2023 sightings labeled as the "last 60 days" layer.
|
||||||
|
|
||||||
|
These tests pin the new behavior:
|
||||||
|
|
||||||
|
* Rows older than ``_NUFORC_RECENT_DAYS`` are dropped before the take-top-N.
|
||||||
|
* If the HF mirror has nothing in the window, the fallback returns ``[]``
|
||||||
|
and logs ERROR (don't silently serve stale data).
|
||||||
|
* ``fetch_uap_sightings`` records the failure when BOTH paths fail, so
|
||||||
|
the layer shows as broken in the health registry instead of "fresh".
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime as real_datetime
|
||||||
|
|
||||||
|
|
||||||
|
class _FixedDateTime(real_datetime):
|
||||||
|
"""A datetime whose utcnow() returns a pinned value, for deterministic
|
||||||
|
cutoff math. Subclasses real datetime so existing operations still work."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def utcnow(cls):
|
||||||
|
return cls(2026, 5, 1, 12, 0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
class _StubResponse:
|
||||||
|
status_code = 200
|
||||||
|
|
||||||
|
def __init__(self, text: str):
|
||||||
|
self.text = text
|
||||||
|
|
||||||
|
|
||||||
|
def _stub_geocode_cache(*_args, **_kwargs):
|
||||||
|
"""Pre-populated location cache so the fallback doesn't try to hit
|
||||||
|
Photon during the test."""
|
||||||
|
return {
|
||||||
|
"Denver, CO, USA": [39.7392, -104.9903],
|
||||||
|
"Seattle, WA, USA": [47.6062, -122.3321],
|
||||||
|
"Phoenix, AZ, USA": [33.4484, -112.0740],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_hf_fallback_drops_rows_older_than_60_days(monkeypatch):
|
||||||
|
"""Pre-fix: a row from 2023 would make it into the layer if it was
|
||||||
|
among the newest 250 in the HF mirror. Post-fix: it's filtered out
|
||||||
|
before we even count to 250."""
|
||||||
|
from services.fetchers import earth_observation as eo
|
||||||
|
|
||||||
|
# 2026-05-01 - 60 days = 2026-03-02. So 2026-03-01 is one day too old.
|
||||||
|
csv_text = (
|
||||||
|
"Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
|
||||||
|
'1,2026-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2026-04-16,"In-window sighting"\n'
|
||||||
|
'2,2023-06-01 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2023-06-02,"Three years old"\n'
|
||||||
|
'3,2022-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2022-01-16,"Even older"\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(eo, "datetime", _FixedDateTime)
|
||||||
|
monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
|
||||||
|
monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
|
||||||
|
monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
|
||||||
|
# If the cutoff is missing, the geocoder may still get called for the
|
||||||
|
# 2022/2023 rows. We assert geocoder is NEVER invoked for stale rows.
|
||||||
|
geocode_calls: list[str] = []
|
||||||
|
|
||||||
|
def _geocode_spy(location, city, state, country=""):
|
||||||
|
geocode_calls.append(location)
|
||||||
|
return None # already in cache, shouldn't be hit anyway
|
||||||
|
|
||||||
|
monkeypatch.setattr(eo, "_geocode_uap_location", _geocode_spy)
|
||||||
|
|
||||||
|
sightings = eo._build_uap_sightings_from_hf_mirror()
|
||||||
|
|
||||||
|
ids = [s["id"] for s in sightings]
|
||||||
|
assert ids == ["NUFORC-1"], f"only the 2026 row should survive: got {ids}"
|
||||||
|
# Stale rows must not have been geocoded — they should be dropped
|
||||||
|
# before the geocoding loop is reached.
|
||||||
|
assert geocode_calls == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_hf_fallback_returns_empty_when_mirror_is_fully_stale(monkeypatch, caplog):
|
||||||
|
"""The smoking-gun case: the HF mirror is so stale that NO rows are
|
||||||
|
within the rolling window. Pre-fix returned 250 ancient rows. Post-fix
|
||||||
|
returns ``[]`` and logs ERROR so the operator knows the layer is dead."""
|
||||||
|
from services.fetchers import earth_observation as eo
|
||||||
|
|
||||||
|
csv_text = (
|
||||||
|
"Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
|
||||||
|
'1,2023-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2023-04-16,"Old"\n'
|
||||||
|
'2,2022-06-01 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2022-06-02,"Older"\n'
|
||||||
|
'3,2021-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2021-01-16,"Ancient"\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(eo, "datetime", _FixedDateTime)
|
||||||
|
monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
|
||||||
|
monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
|
||||||
|
monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
|
||||||
|
monkeypatch.setattr(eo, "_geocode_uap_location", lambda *a, **kw: None)
|
||||||
|
|
||||||
|
with caplog.at_level(logging.ERROR, logger="services.fetchers.earth_observation"):
|
||||||
|
sightings = eo._build_uap_sightings_from_hf_mirror()
|
||||||
|
|
||||||
|
assert sightings == []
|
||||||
|
# The error log should mention how many stale rows were dropped so the
|
||||||
|
# operator can tell the mirror is the problem (not "we got 0 rows" which
|
||||||
|
# could also mean the download failed).
|
||||||
|
relevant = [r for r in caplog.records if "HF fallback yielded 0 rows" in r.getMessage()]
|
||||||
|
assert relevant, "expected loud ERROR when HF mirror is fully stale"
|
||||||
|
# The message should report the count of dropped stale rows.
|
||||||
|
assert any("dropped 3" in r.getMessage() for r in relevant)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hf_fallback_still_returns_data_when_some_rows_are_in_window(monkeypatch):
|
||||||
|
"""Mixed-age mirror: some rows in the window, some not. The fallback
|
||||||
|
should return only the in-window rows and not log the doomsday ERROR."""
|
||||||
|
from services.fetchers import earth_observation as eo
|
||||||
|
|
||||||
|
csv_text = (
|
||||||
|
"Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
|
||||||
|
'1,2026-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2026-04-16,"Fresh"\n'
|
||||||
|
'2,2026-04-10 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2026-04-10,"Also fresh"\n'
|
||||||
|
'3,2020-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2020-01-16,"Ancient"\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(eo, "datetime", _FixedDateTime)
|
||||||
|
monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
|
||||||
|
monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
|
||||||
|
monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
|
||||||
|
monkeypatch.setattr(eo, "_geocode_uap_location", lambda *a, **kw: None)
|
||||||
|
|
||||||
|
sightings = eo._build_uap_sightings_from_hf_mirror()
|
||||||
|
|
||||||
|
ids = sorted(s["id"] for s in sightings)
|
||||||
|
assert ids == ["NUFORC-1", "NUFORC-2"], f"only in-window rows should appear: got {ids}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_uap_sightings_marks_failure_when_both_paths_empty(monkeypatch, caplog):
|
||||||
|
"""When the live path raises AND the HF fallback returns empty,
|
||||||
|
``fetch_uap_sightings`` must:
|
||||||
|
* NOT mark the layer fresh (pre-fix bug: it did, so the layer
|
||||||
|
showed as healthy-but-empty for days)
|
||||||
|
* call ``assert_canary("uap_sightings", 0)`` so the health
|
||||||
|
registry surfaces the broken layer
|
||||||
|
* log an ERROR with the live-path exception for debugging
|
||||||
|
"""
|
||||||
|
from services.fetchers import earth_observation as eo
|
||||||
|
from services.fetchers import _store
|
||||||
|
|
||||||
|
monkeypatch.setattr(_store, "is_any_active", lambda layer: True)
|
||||||
|
monkeypatch.setattr(eo, "_load_nuforc_sightings_cache", lambda force_refresh=False: None)
|
||||||
|
|
||||||
|
def _boom():
|
||||||
|
raise RuntimeError("NUFORC live: zero rows pulled across 3 months")
|
||||||
|
|
||||||
|
monkeypatch.setattr(eo, "_build_recent_uap_sightings", _boom)
|
||||||
|
monkeypatch.setattr(eo, "_build_uap_sightings_from_hf_mirror", lambda: [])
|
||||||
|
|
||||||
|
marked: list[str] = []
|
||||||
|
monkeypatch.setattr(eo, "_mark_fresh", lambda *keys: marked.extend(keys))
|
||||||
|
|
||||||
|
canary_calls: list[tuple[str, int]] = []
|
||||||
|
import services.slo as slo
|
||||||
|
monkeypatch.setattr(
|
||||||
|
slo, "assert_canary", lambda key, value: canary_calls.append((key, int(value)))
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level(logging.ERROR, logger="services.fetchers.earth_observation"):
|
||||||
|
eo.fetch_uap_sightings()
|
||||||
|
|
||||||
|
assert marked == [], "broken layer must NOT be marked fresh"
|
||||||
|
assert canary_calls == [("uap_sightings", 0)], (
|
||||||
|
f"expected canary trip when both paths fail; got {canary_calls}"
|
||||||
|
)
|
||||||
|
# The live error message should propagate into the error log so the
|
||||||
|
# operator can tell live failed AND fallback was empty (not the other
|
||||||
|
# way around).
|
||||||
|
assert any(
|
||||||
|
"both live NUFORC and HF fallback" in r.getMessage()
|
||||||
|
for r in caplog.records
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_uap_sightings_succeeds_when_fallback_returns_data(monkeypatch):
|
||||||
|
"""Positive path: live fails, fallback returns rows. The layer is
|
||||||
|
populated and marked fresh; assert_canary is NOT tripped (we only
|
||||||
|
trip the canary when the layer has zero data)."""
|
||||||
|
from services.fetchers import earth_observation as eo
|
||||||
|
from services.fetchers import _store
|
||||||
|
|
||||||
|
monkeypatch.setattr(_store, "is_any_active", lambda layer: True)
|
||||||
|
monkeypatch.setattr(eo, "_load_nuforc_sightings_cache", lambda force_refresh=False: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
eo, "_build_recent_uap_sightings", lambda: (_ for _ in ()).throw(RuntimeError("live down"))
|
||||||
|
)
|
||||||
|
|
||||||
|
fallback_rows = [{"id": "NUFORC-fb-1", "date_time": "2026-04-20", "lat": 0.0, "lng": 0.0}]
|
||||||
|
monkeypatch.setattr(eo, "_build_uap_sightings_from_hf_mirror", lambda: fallback_rows)
|
||||||
|
monkeypatch.setattr(eo, "_save_nuforc_sightings_cache", lambda s: None)
|
||||||
|
|
||||||
|
marked: list[str] = []
|
||||||
|
monkeypatch.setattr(eo, "_mark_fresh", lambda *keys: marked.extend(keys))
|
||||||
|
|
||||||
|
canary_calls: list[tuple[str, int]] = []
|
||||||
|
import services.slo as slo
|
||||||
|
monkeypatch.setattr(
|
||||||
|
slo, "assert_canary", lambda key, value: canary_calls.append((key, int(value)))
|
||||||
|
)
|
||||||
|
|
||||||
|
eo.fetch_uap_sightings()
|
||||||
|
|
||||||
|
assert marked == ["uap_sightings"]
|
||||||
|
assert canary_calls == [], "canary should not trip when fallback supplies data"
|
||||||
|
|
||||||
|
|
||||||
|
def test_uap_scheduler_runs_weekly_not_daily():
|
||||||
|
"""The cron job for the UAP layer must be configured for Mondays at
|
||||||
|
12:00 UTC, not daily. Daily was the pre-fix default; weekly matches
|
||||||
|
the layer's stated cadence (a rolling 60-day digest) and keeps load
|
||||||
|
on nuforc.org light."""
|
||||||
|
from services import data_fetcher
|
||||||
|
|
||||||
|
src = data_fetcher.__file__
|
||||||
|
with open(src, "r", encoding="utf-8") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
# Anchor on the scheduler block by id, then assert the cron triggers.
|
||||||
|
assert "uap_sightings_weekly" in text, (
|
||||||
|
"scheduler id should be uap_sightings_weekly (was uap_sightings_daily pre-fix)"
|
||||||
|
)
|
||||||
|
# The day_of_week directive is the difference between daily and weekly.
|
||||||
|
# If somebody flips it back to daily, this fires.
|
||||||
|
weekly_block = text.split("uap_sightings_weekly", 1)[0]
|
||||||
|
# Walk backwards for the matching add_job call.
|
||||||
|
add_job_idx = weekly_block.rfind("add_job(")
|
||||||
|
assert add_job_idx >= 0, "could not locate add_job block for UAP scheduler"
|
||||||
|
job_block = text[add_job_idx : text.find(")", text.index("uap_sightings_weekly")) + 1]
|
||||||
|
assert 'day_of_week="mon"' in job_block, (
|
||||||
|
f"expected day_of_week='mon' in UAP scheduler block:\n{job_block}"
|
||||||
|
)
|
||||||
@@ -0,0 +1,338 @@
|
|||||||
|
"""Issue #231 — self-update SHA-256 verification.
|
||||||
|
|
||||||
|
Before this fix, ``_validate_zip_hash`` returned silently whenever the
|
||||||
|
``MESH_UPDATE_SHA256`` env var was unset (the default — nothing in the
|
||||||
|
install docs ever told operators to set it). That made the auto-updater
|
||||||
|
a supply-chain RCE on any compromise of the GitHub release pipeline.
|
||||||
|
|
||||||
|
The fix introduces a four-source verification chain:
|
||||||
|
|
||||||
|
1. ``MESH_UPDATE_SHA256`` env var (operator override, preserved)
|
||||||
|
2. ``SHA256SUMS.txt`` asset published alongside the release (primary)
|
||||||
|
3. Baked-in ``backend/data/release_digests.json`` (fallback)
|
||||||
|
4. HTTPS-only fallback with a loud warning (preserves auto-update during
|
||||||
|
transient outages so the user isn't stuck)
|
||||||
|
|
||||||
|
A mismatch from any source that DID respond is fatal. Only the "no
|
||||||
|
source reachable at all" case falls back to HTTPS-only.
|
||||||
|
"""
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from services import updater
|
||||||
|
from services.updater import (
|
||||||
|
_compute_sha256,
|
||||||
|
_fetch_sha256sums,
|
||||||
|
_load_baked_in_release_digests,
|
||||||
|
_validate_zip_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_archive(tmp_path):
|
||||||
|
"""A tiny synthetic zip-shaped file so we can compute a known digest."""
|
||||||
|
archive = tmp_path / "update.zip"
|
||||||
|
payload = b"this is not really a release archive"
|
||||||
|
archive.write_bytes(payload)
|
||||||
|
expected = hashlib.sha256(payload).hexdigest().lower()
|
||||||
|
return str(archive), expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_baked_in_release_digests_file_loads():
|
||||||
|
"""The shipped release_digests.json must parse and contain v0.9.79."""
|
||||||
|
digests = _load_baked_in_release_digests()
|
||||||
|
assert "v0.9.79" in digests
|
||||||
|
entry = digests["v0.9.79"]
|
||||||
|
assert "ShadowBroker_v0.9.79.zip" in entry
|
||||||
|
digest = entry["ShadowBroker_v0.9.79.zip"]
|
||||||
|
assert len(digest) == 64
|
||||||
|
assert all(c in "0123456789abcdef" for c in digest)
|
||||||
|
|
||||||
|
|
||||||
|
def test_baked_in_skips_comment_keys():
|
||||||
|
"""The _comment top-level key is ignored, not surfaced as a release."""
|
||||||
|
digests = _load_baked_in_release_digests()
|
||||||
|
assert "_comment" not in digests
|
||||||
|
|
||||||
|
|
||||||
|
def test_compute_sha256_matches_known_value(fake_archive):
|
||||||
|
archive, expected = fake_archive
|
||||||
|
assert _compute_sha256(archive) == expected
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Source 1: MESH_UPDATE_SHA256 env override
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_override_matching_passes(fake_archive, monkeypatch):
|
||||||
|
"""Path 1: operator pinned the exact digest via env. Match = success."""
|
||||||
|
archive, expected = fake_archive
|
||||||
|
monkeypatch.setenv("MESH_UPDATE_SHA256", expected)
|
||||||
|
|
||||||
|
note = _validate_zip_hash(archive)
|
||||||
|
assert "MESH_UPDATE_SHA256" in note
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_override_mismatch_fails_loudly(fake_archive, monkeypatch):
|
||||||
|
"""Path 1: operator pinned a different digest. Mismatch = fatal."""
|
||||||
|
archive, _expected = fake_archive
|
||||||
|
monkeypatch.setenv("MESH_UPDATE_SHA256", "0" * 64)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError) as exc_info:
|
||||||
|
_validate_zip_hash(archive)
|
||||||
|
assert "mismatch" in str(exc_info.value).lower()
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Source 2: SHA256SUMS.txt asset
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_sha256sums_matching_passes(fake_archive, monkeypatch):
|
||||||
|
"""Path 2: SHA256SUMS.txt has the correct digest for our asset."""
|
||||||
|
archive, expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
|
||||||
|
def fake_sums(url):
|
||||||
|
return {"ShadowBroker_v9.9.9.zip": expected}
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", fake_sums)
|
||||||
|
note = _validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v9.9.9.zip",
|
||||||
|
sha256sums_url="https://example.test/SHA256SUMS.txt",
|
||||||
|
release_tag="v9.9.9",
|
||||||
|
)
|
||||||
|
assert "SHA256SUMS.txt" in note
|
||||||
|
|
||||||
|
|
||||||
|
def test_sha256sums_mismatch_fails_loudly(fake_archive, monkeypatch):
|
||||||
|
"""Path 2: SHA256SUMS.txt has a different digest. Refuse."""
|
||||||
|
archive, _expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
|
||||||
|
def fake_sums(url):
|
||||||
|
return {"ShadowBroker_v9.9.9.zip": "0" * 64}
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", fake_sums)
|
||||||
|
with pytest.raises(RuntimeError) as exc_info:
|
||||||
|
_validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v9.9.9.zip",
|
||||||
|
sha256sums_url="https://example.test/SHA256SUMS.txt",
|
||||||
|
release_tag="v9.9.9",
|
||||||
|
)
|
||||||
|
assert "mismatch" in str(exc_info.value).lower()
|
||||||
|
assert "SHA256SUMS" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Source 3: baked-in digest list
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_baked_in_matching_passes(fake_archive, monkeypatch):
|
||||||
|
"""Path 3: SHA256SUMS unreachable, but the baked-in list has us."""
|
||||||
|
archive, expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", lambda url: {})
|
||||||
|
monkeypatch.setattr(
|
||||||
|
updater,
|
||||||
|
"_load_baked_in_release_digests",
|
||||||
|
lambda: {"v9.9.9": {"ShadowBroker_v9.9.9.zip": expected}},
|
||||||
|
)
|
||||||
|
|
||||||
|
note = _validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v9.9.9.zip",
|
||||||
|
sha256sums_url="https://example.test/SHA256SUMS.txt",
|
||||||
|
release_tag="v9.9.9",
|
||||||
|
)
|
||||||
|
assert "baked-in" in note
|
||||||
|
|
||||||
|
|
||||||
|
def test_baked_in_mismatch_fails_loudly(fake_archive, monkeypatch):
|
||||||
|
"""Path 3: baked-in says something different. Refuse."""
|
||||||
|
archive, _expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", lambda url: {})
|
||||||
|
monkeypatch.setattr(
|
||||||
|
updater,
|
||||||
|
"_load_baked_in_release_digests",
|
||||||
|
lambda: {"v9.9.9": {"ShadowBroker_v9.9.9.zip": "0" * 64}},
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError) as exc_info:
|
||||||
|
_validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v9.9.9.zip",
|
||||||
|
sha256sums_url="",
|
||||||
|
release_tag="v9.9.9",
|
||||||
|
)
|
||||||
|
assert "mismatch" in str(exc_info.value).lower()
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Source 4: HTTPS-only fallback
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_https_only_fallback_when_no_source_available(fake_archive, monkeypatch, caplog):
|
||||||
|
"""Path 4: nothing matches — fall back to HTTPS-only with loud warning.
|
||||||
|
|
||||||
|
This preserves the auto-update flow during transient outages: an
|
||||||
|
operator on a flaky network during update doesn't get a hostile
|
||||||
|
error, they get a degraded-but-functional update with a clear log
|
||||||
|
message.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
archive, _expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", lambda url: {})
|
||||||
|
monkeypatch.setattr(updater, "_load_baked_in_release_digests", lambda: {})
|
||||||
|
|
||||||
|
with caplog.at_level(logging.WARNING):
|
||||||
|
note = _validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v99.99.zip",
|
||||||
|
sha256sums_url="",
|
||||||
|
release_tag="v99.99",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "https-only" in note.lower()
|
||||||
|
assert any(
|
||||||
|
"fell back to HTTPS-only" in rec.getMessage() for rec in caplog.records
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_https_only_fallback_when_release_tag_unknown(fake_archive, monkeypatch):
|
||||||
|
"""Path 4 also kicks in when we have a baked-in list but it doesn't
|
||||||
|
contain THIS release tag — e.g. a brand-new release that the local
|
||||||
|
install hasn't seen a digest for yet."""
|
||||||
|
archive, _expected = fake_archive
|
||||||
|
monkeypatch.delenv("MESH_UPDATE_SHA256", raising=False)
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", lambda url: {})
|
||||||
|
monkeypatch.setattr(
|
||||||
|
updater,
|
||||||
|
"_load_baked_in_release_digests",
|
||||||
|
lambda: {"v0.0.1": {"old.zip": "0" * 64}}, # different tag, doesn't match
|
||||||
|
)
|
||||||
|
|
||||||
|
note = _validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="ShadowBroker_v99.99.zip",
|
||||||
|
sha256sums_url="",
|
||||||
|
release_tag="v99.99",
|
||||||
|
)
|
||||||
|
assert "https-only" in note.lower()
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Precedence (env > SHA256SUMS > baked-in > https-only)
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_override_beats_all_other_sources(fake_archive, monkeypatch):
|
||||||
|
"""When MESH_UPDATE_SHA256 is set, it's the only source consulted.
|
||||||
|
|
||||||
|
The other sources may return false positives or negatives — they
|
||||||
|
shouldn't be queried at all when the operator pinned an exact value.
|
||||||
|
"""
|
||||||
|
archive, expected = fake_archive
|
||||||
|
monkeypatch.setenv("MESH_UPDATE_SHA256", expected)
|
||||||
|
|
||||||
|
def boom_sums(url):
|
||||||
|
raise AssertionError("SHA256SUMS source was queried despite env override")
|
||||||
|
|
||||||
|
def boom_baked():
|
||||||
|
raise AssertionError("Baked-in list was queried despite env override")
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater, "_fetch_sha256sums", boom_sums)
|
||||||
|
monkeypatch.setattr(updater, "_load_baked_in_release_digests", boom_baked)
|
||||||
|
|
||||||
|
note = _validate_zip_hash(
|
||||||
|
archive,
|
||||||
|
asset_name="any.zip",
|
||||||
|
sha256sums_url="https://example.test/SHA256SUMS.txt",
|
||||||
|
release_tag="any",
|
||||||
|
)
|
||||||
|
assert "MESH_UPDATE_SHA256" in note
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# _fetch_sha256sums parser
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_sha256sums_parses_standard_format(monkeypatch):
|
||||||
|
"""Standard ``sha256sum`` output: ``<digest> <filename>``."""
|
||||||
|
class _Resp:
|
||||||
|
text = (
|
||||||
|
"f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47 ShadowBroker_v0.9.79.zip\n"
|
||||||
|
"e0713c3cdda184cfbea750bfac0d62a35678fec00847e6476f2cac8e7e42046e ShadowBroker_0.9.79_x64_en-US.msi\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def fake_get(url, timeout=15):
|
||||||
|
return _Resp()
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater.requests, "get", fake_get)
|
||||||
|
monkeypatch.setattr(updater, "_validate_update_url", lambda url, **kw: url)
|
||||||
|
sums = _fetch_sha256sums("https://example.test/SHA256SUMS.txt")
|
||||||
|
assert sums["ShadowBroker_v0.9.79.zip"].startswith("f6877c1d")
|
||||||
|
assert sums["ShadowBroker_0.9.79_x64_en-US.msi"].startswith("e0713c3c")
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_sha256sums_handles_binary_marker(monkeypatch):
|
||||||
|
"""sha256sum -b output: ``<digest> *<filename>``."""
|
||||||
|
class _Resp:
|
||||||
|
text = "f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47 *ShadowBroker_v0.9.79.zip\n"
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater.requests, "get", lambda url, timeout=15: _Resp())
|
||||||
|
monkeypatch.setattr(updater, "_validate_update_url", lambda url, **kw: url)
|
||||||
|
sums = _fetch_sha256sums("https://example.test/SHA256SUMS.txt")
|
||||||
|
assert "ShadowBroker_v0.9.79.zip" in sums
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_sha256sums_skips_malformed_lines(monkeypatch):
|
||||||
|
"""Lines that don't parse cleanly are ignored, not aborted on."""
|
||||||
|
class _Resp:
|
||||||
|
text = (
|
||||||
|
"# comment line\n"
|
||||||
|
"\n"
|
||||||
|
"not-a-digest bogus.txt\n"
|
||||||
|
"f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47 good.zip\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater.requests, "get", lambda url, timeout=15: _Resp())
|
||||||
|
monkeypatch.setattr(updater, "_validate_update_url", lambda url, **kw: url)
|
||||||
|
sums = _fetch_sha256sums("https://example.test/SHA256SUMS.txt")
|
||||||
|
assert "good.zip" in sums
|
||||||
|
assert "bogus.txt" not in sums
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_sha256sums_handles_network_failure(monkeypatch):
|
||||||
|
"""If the SHA256SUMS asset can't be fetched, return empty (caller
|
||||||
|
falls through to baked-in / https-only)."""
|
||||||
|
import requests as _req
|
||||||
|
|
||||||
|
def fake_get(url, timeout=15):
|
||||||
|
raise _req.exceptions.ConnectionError("upstream down")
|
||||||
|
|
||||||
|
monkeypatch.setattr(updater.requests, "get", fake_get)
|
||||||
|
monkeypatch.setattr(updater, "_validate_update_url", lambda url, **kw: url)
|
||||||
|
sums = _fetch_sha256sums("https://example.test/SHA256SUMS.txt")
|
||||||
|
assert sums == {}
|
||||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@shadowbroker/desktop-shell",
|
"name": "@shadowbroker/desktop-shell",
|
||||||
"version": "0.9.79",
|
"version": "0.9.81",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@shadowbroker/desktop-shell",
|
"name": "@shadowbroker/desktop-shell",
|
||||||
"version": "0.9.79",
|
"version": "0.9.81",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"typescript": "^5.6.0"
|
"typescript": "^5.6.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@shadowbroker/desktop-shell",
|
"name": "@shadowbroker/desktop-shell",
|
||||||
"version": "0.9.79",
|
"version": "0.9.81",
|
||||||
"private": true,
|
"private": true,
|
||||||
"description": "ShadowBroker desktop shell packaging, runtime bridge, and release tooling",
|
"description": "ShadowBroker desktop shell packaging, runtime bridge, and release tooling",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
@@ -130,6 +130,45 @@ function stageBackendRuntime() {
|
|||||||
});
|
});
|
||||||
stagePrivacyCoreArtifact();
|
stagePrivacyCoreArtifact();
|
||||||
stageReleaseAttestation();
|
stageReleaseAttestation();
|
||||||
|
stageStartScripts();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy ``start.bat`` and ``start.sh`` from the repo root into the
|
||||||
|
* staged backend-runtime/ so they sit next to ``privacy_core.dll``.
|
||||||
|
*
|
||||||
|
* Why: an MSI/EXE/AppImage user who wants to launch via the dev-style
|
||||||
|
* scripts (because the desktop shell is failing, or they prefer the
|
||||||
|
* browser frontend at localhost:3000) shouldn't have to clone the
|
||||||
|
* source repo just to get the scripts. Having them inside the install
|
||||||
|
* directory also means the bundled ``privacy_core.dll`` fallback in
|
||||||
|
* those scripts resolves to the SAME directory as the script, which
|
||||||
|
* is exactly the layout the v0.9.81 script update is looking for.
|
||||||
|
*
|
||||||
|
* Tracked from issue #319: users who fell back to start.bat from
|
||||||
|
* their MSI install dir had to go fetch it from GitHub, then saw a
|
||||||
|
* scary "install Rust" warning because the script didn't know where
|
||||||
|
* the bundled DLL was. Bundling the script removes both problems.
|
||||||
|
*/
|
||||||
|
function stageStartScripts() {
|
||||||
|
const scripts = ['start.bat', 'start.sh'];
|
||||||
|
for (const name of scripts) {
|
||||||
|
const src = path.join(repoRoot, name);
|
||||||
|
if (!fs.existsSync(src)) {
|
||||||
|
console.warn(`backend-runtime staged without ${name} (not at repo root)`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const dst = path.join(outputDir, name);
|
||||||
|
fs.copyFileSync(src, dst);
|
||||||
|
// Preserve executable bit on POSIX systems for the .sh script.
|
||||||
|
if (name.endsWith('.sh') && process.platform !== 'win32') {
|
||||||
|
try {
|
||||||
|
fs.chmodSync(dst, 0o755);
|
||||||
|
} catch {
|
||||||
|
/* best-effort; not fatal on filesystems that don't honor chmod */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function stagePrivacyCoreArtifact() {
|
function stagePrivacyCoreArtifact() {
|
||||||
|
|||||||
@@ -46,12 +46,18 @@ function prepareBuildTree() {
|
|||||||
const stagedLayoutPath = path.join(buildFrontendDir, 'src', 'app', 'layout.tsx');
|
const stagedLayoutPath = path.join(buildFrontendDir, 'src', 'app', 'layout.tsx');
|
||||||
if (fs.existsSync(stagedLayoutPath)) {
|
if (fs.existsSync(stagedLayoutPath)) {
|
||||||
const layoutSource = fs.readFileSync(stagedLayoutPath, 'utf8');
|
const layoutSource = fs.readFileSync(stagedLayoutPath, 'utf8');
|
||||||
|
// CRLF compatibility: on Windows checkouts without ``core.autocrlf=input``
|
||||||
|
// (the default) layout.tsx has CRLF line endings, but the original regexes
|
||||||
|
// only matched LF. The strip silently no-op'd, ``force-dynamic`` stayed,
|
||||||
|
// and Next's static-export refused to render ``/_not-found`` ("Page with
|
||||||
|
// `dynamic = \"force-dynamic\"` couldn't be exported"). Use ``\r?\n`` so
|
||||||
|
// the strip works regardless of line-ending normalization.
|
||||||
fs.writeFileSync(
|
fs.writeFileSync(
|
||||||
stagedLayoutPath,
|
stagedLayoutPath,
|
||||||
layoutSource
|
layoutSource
|
||||||
.replace(/\n\/\/ The dashboard is a live local runtime[\s\S]*?client polling ever hydrates\.\n/g, '\n')
|
.replace(/\r?\n\/\/ The dashboard is a live local runtime[\s\S]*?client polling ever hydrates\.\r?\n/g, '\n')
|
||||||
.replace(/\nexport const dynamic = ['"]force-dynamic['"];\n/g, '\n')
|
.replace(/\r?\nexport const dynamic = ['"]force-dynamic['"];\r?\n/g, '\n')
|
||||||
.replace(/\nexport const revalidate = 0;\n/g, '\n'),
|
.replace(/\r?\nexport const revalidate = 0;\r?\n/g, '\n'),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -4201,7 +4201,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "shadowbroker-tauri-shell"
|
name = "shadowbroker-tauri-shell"
|
||||||
version = "0.9.79"
|
version = "0.9.81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"axum",
|
"axum",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user