mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 12:58:11 +02:00
Compare commits
126 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 30aa30204f | |||
| fb97042c01 | |||
| 2616a6c9e3 | |||
| a930497e14 | |||
| 2dc1fcc778 | |||
| 896d1ae938 | |||
| 8dfa6a7199 | |||
| ef6b8ec181 | |||
| dcea325fba | |||
| 03b8053617 | |||
| 20807a2d62 | |||
| 79fbf9741b | |||
| a2f5d62926 | |||
| 5e0b2c037e | |||
| 69ef231e5a | |||
| 7a5f47ca9e | |||
| 5cd49542bf | |||
| f14d4feb6d | |||
| 19a8560a80 | |||
| 0d0e009867 | |||
| febcce9125 | |||
| 31ebcb5cd9 | |||
| b3fca3dc18 | |||
| 401f114e4f | |||
| 79b39e8985 | |||
| c3e38621fc | |||
| 9ef02dd06f | |||
| ba39d3b9aa | |||
| f91ddcf38b | |||
| 49151d8b9f | |||
| 767a2f6c00 | |||
| 2da739c9e8 | |||
| eca7f24e2c | |||
| 7bfaad17f0 | |||
| e3efcfd476 | |||
| 32b8421a1c | |||
| bc70cc3527 | |||
| 44e9b38ac2 | |||
| b01a69c172 | |||
| b041b5e97c | |||
| c54ea7fd9f | |||
| a3aa7b4dec | |||
| 19fb7f0b1e | |||
| 35cd4e4c71 | |||
| 31f79fd8e2 | |||
| fd7d6fa401 | |||
| 49621824b1 | |||
| 76750caa92 | |||
| c3ef9f4b9e | |||
| 5e6bb8511a | |||
| 0fee36e8f7 | |||
| e125467721 | |||
| 2b03b808ac | |||
| 2e14e75a0e | |||
| 084e563412 | |||
| 9ef6213284 | |||
| fb11e0881f | |||
| 7f96151e56 | |||
| d0299fc0a0 | |||
| 87ba70acd6 | |||
| bcc2d036b3 | |||
| 729ea78cb2 | |||
| 459178f283 | |||
| 8e27658157 | |||
| e36d1fc79c | |||
| d00c63abed | |||
| e3297e9bc0 | |||
| 9ae0b189ba | |||
| dd7706f17f | |||
| 30f0360ef8 | |||
| 421682c447 | |||
| 40734e310b | |||
| 71a9d9e144 | |||
| de27d119f9 | |||
| b8384d6d91 | |||
| 11ea345518 | |||
| 25a98a9869 | |||
| 2ce0e43ee5 | |||
| b86a258535 | |||
| 85636ce95c | |||
| 5ee4f8ecd7 | |||
| b8ac0fb9e7 | |||
| 8926e08009 | |||
| 585a08bbac | |||
| 6ffd54931c | |||
| a017ba86d6 | |||
| 9427935c7f | |||
| 63043b32b5 | |||
| 1e34fa53b2 | |||
| d69602be9e | |||
| ce9ba39cd2 | |||
| 3eafb622ed | |||
| eb5564ca0e | |||
| 20d2ccc52c | |||
| 0fc09c9011 | |||
| 707ca29220 | |||
| eb0288ee4e | |||
| 8d3c7a51b7 | |||
| fa18c032e2 | |||
| e1060193d0 | |||
| 08810f2537 | |||
| f5b9d14b48 | |||
| 9122d306cd | |||
| 03e5fc1363 | |||
| 447afe0b2b | |||
| d515aba450 | |||
| 3a8db7f9cd | |||
| f1cb1e860d | |||
| 38bcc976a4 | |||
| 77b4361ad6 | |||
| c5819d40d1 | |||
| 009574db81 | |||
| 281371e135 | |||
| 401268f22a | |||
| f830148e69 | |||
| 4068c31cfa | |||
| 50721816fa | |||
| 5dac844532 | |||
| 8884675845 | |||
| 58144d1b82 | |||
| da2a27f92a | |||
| f6f6176a12 | |||
| e6bea9dad3 | |||
| aebd5f0198 | |||
| 2f70b50f65 | |||
| 1b2ad5023d |
@@ -8,6 +8,18 @@ __pycache__/
|
||||
venv/
|
||||
.venv/
|
||||
.ruff_cache/
|
||||
local-artifacts/
|
||||
release-secrets/
|
||||
|
||||
# Never send local configuration or credentials into Docker builds
|
||||
.env
|
||||
.env.*
|
||||
**/.env
|
||||
**/.env.*
|
||||
*.pem
|
||||
*.key
|
||||
*.p12
|
||||
*.pfx
|
||||
|
||||
# privacy-core build caches (source is needed, artifacts are not)
|
||||
privacy-core/target/
|
||||
@@ -21,3 +33,24 @@ privacy-core/.codex-tmp/
|
||||
*.log
|
||||
extra/
|
||||
prototype/
|
||||
|
||||
# Runtime state generated by local backend runs
|
||||
backend/.pytest_cache/
|
||||
backend/.ruff_cache/
|
||||
backend/backend.egg-info/
|
||||
backend/build/
|
||||
backend/node_modules/
|
||||
backend/timemachine/
|
||||
backend/venv/
|
||||
backend/data/*cache*.json
|
||||
backend/data/**/*cache*.json
|
||||
backend/data/wormhole*.json
|
||||
backend/data/**/wormhole*.json
|
||||
backend/data/dm_*.json
|
||||
backend/data/**/dm_*.json
|
||||
backend/data/**/peer_store.json
|
||||
backend/data/**/node.json
|
||||
backend/data/*.log
|
||||
backend/data/**/*.log
|
||||
backend/data/*.key
|
||||
backend/data/**/*.key
|
||||
|
||||
@@ -38,6 +38,41 @@ ADMIN_KEY=
|
||||
# Leave blank to skip this optional enrichment.
|
||||
# NUFORC_MAPBOX_TOKEN=
|
||||
|
||||
# Optional startup-risk controls.
|
||||
# On Windows, external curl fallback and the Playwright LiveUAMap scraper are
|
||||
# disabled by default so blocked upstream feeds cannot interrupt start.bat.
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=false
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false
|
||||
# AIS starts by default when AIS_API_KEY is set. Set to 0/false to force-disable.
|
||||
# SHADOWBROKER_ENABLE_AIS_STREAM_PROXY=true
|
||||
# Minimum visible satellite catalog before forcing a CelesTrak refresh.
|
||||
# SHADOWBROKER_MIN_VISIBLE_SATELLITES=350
|
||||
# Upper bound for TLE fallback satellite search when CelesTrak is unreachable.
|
||||
# SHADOWBROKER_MAX_VISIBLE_SATELLITES=450
|
||||
# NUFORC fallback uses the Hugging Face mirror when live NUFORC is unavailable.
|
||||
# NUFORC_HF_FALLBACK_LIMIT=250
|
||||
# NUFORC_HF_GEOCODE_LIMIT=150
|
||||
|
||||
# First-paint cache age budgets. These let the map and Global Threat Intercept
|
||||
# paint from the last local snapshot while live feeds refresh in the background.
|
||||
# FAST_STARTUP_CACHE_MAX_AGE_S=21600
|
||||
# INTEL_STARTUP_CACHE_MAX_AGE_S=21600
|
||||
|
||||
# Docker resource tuning. The backend synthesizes large geospatial feeds; keep
|
||||
# this at 4G or higher on hosts that run AIS, OpenSky, CCTV, satellites, and
|
||||
# threat feeds together. Lower caps can cause Docker OOM restarts and empty
|
||||
# slow layers such as news, UAP sightings, military bases, and wastewater.
|
||||
# BACKEND_MEMORY_LIMIT=4G
|
||||
# SHADOWBROKER_FETCH_WORKERS=8
|
||||
# SHADOWBROKER_SLOW_FETCH_CONCURRENCY=4
|
||||
# SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY=2
|
||||
|
||||
# Infonet bootstrap/sync responsiveness. Defaults favor fast seed failure
|
||||
# detection so stale onion peers do not make the terminal look hung.
|
||||
# MESH_SYNC_TIMEOUT_S=5
|
||||
# MESH_SYNC_MAX_PEERS_PER_CYCLE=3
|
||||
# MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S=15
|
||||
|
||||
# Google Earth Engine for VIIRS night lights change detection (optional).
|
||||
# pip install earthengine-api
|
||||
# GEE_SERVICE_ACCOUNT_KEY=
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
# CODEOWNERS — assigns required reviewers for sensitive paths.
|
||||
# Format: <path glob> <user-or-team> [<user-or-team> ...]
|
||||
# See https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
|
||||
#
|
||||
# Owners listed here are auto-requested for review when matching files
|
||||
# change in a PR. If branch protection requires CODEOWNERS approval, the
|
||||
# PR cannot be merged until an owner approves.
|
||||
|
||||
# ── Internationalization / translations ──
|
||||
# Translation contributions are held to a stricter neutrality standard
|
||||
# than most code changes — see CONTRIBUTING.md "Translation contributions".
|
||||
# The i18n layer itself (no network calls, no telemetry, static JSON
|
||||
# bundled at build) is the structural guarantee that makes this safe;
|
||||
# changes to it need owner review.
|
||||
/frontend/src/i18n/ @BigBodyCobain
|
||||
|
||||
# ── Security-sensitive code paths ──
|
||||
/backend/auth.py @BigBodyCobain
|
||||
/backend/routers/wormhole.py @BigBodyCobain
|
||||
/backend/services/mesh/ @BigBodyCobain
|
||||
/backend/services/fetchers/ @BigBodyCobain
|
||||
|
||||
# ── CI / build / deploy infra ──
|
||||
/.github/workflows/ @BigBodyCobain
|
||||
/.gitlab-ci.yml @BigBodyCobain
|
||||
/docker-compose.yml @BigBodyCobain
|
||||
/docker-compose.gitlab.yml @BigBodyCobain
|
||||
/helm/ @BigBodyCobain
|
||||
|
||||
# ── This file and policy docs ──
|
||||
/.github/CODEOWNERS @BigBodyCobain
|
||||
/CONTRIBUTING.md @BigBodyCobain
|
||||
@@ -7,6 +7,28 @@ on:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
# CI flake mitigation:
|
||||
# ci.yml is triggered TWICE per PR on the same commit — once directly via
|
||||
# the `pull_request` trigger above ("Frontend Tests & Build" check) and once
|
||||
# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests &
|
||||
# Build" check). Both jobs land on the same Actions runner pool at the same
|
||||
# time and fight for CPU/RAM. Under contention, React's reconciliation in
|
||||
# `messagesViewFirstContact.test.tsx > removes an approved contact …`
|
||||
# overruns its 5s waitFor timeout — that's the single failure mode we've
|
||||
# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the
|
||||
# fd7d6fa push. Backend tests and every other frontend test pass under
|
||||
# the same conditions, which is what made this look random.
|
||||
#
|
||||
# Pinning a concurrency group on the SHA (PR head, or the pushed commit
|
||||
# for main) serializes the two invocations so neither starves the other.
|
||||
# We use cancel-in-progress: false so the second one queues instead of
|
||||
# cancelling — cancelling could leave the PR check stuck "Expected" if
|
||||
# only one of the two ever finishes. Total CI time grows by ~2 min in
|
||||
# exchange for deterministic outcomes.
|
||||
concurrency:
|
||||
group: ci-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
frontend:
|
||||
name: Frontend Tests & Build
|
||||
|
||||
+49
@@ -91,6 +91,24 @@ backend/data/*
|
||||
!backend/data/power_plants.json
|
||||
!backend/data/tracked_names.json
|
||||
!backend/data/yacht_alert_db.json
|
||||
# Issue #206: bundled KiwiSDR receiver directory used as last-resort
|
||||
# fallback when rx.linkfanel.net (HTTP-only upstream) is unreachable
|
||||
# or returns content that fails our integrity validation.
|
||||
!backend/data/kiwisdr_directory.json
|
||||
# Issue #201: pinned SHA-256 digests for known Tor Expert Bundle URLs.
|
||||
# Used as a second verification source when upstream .sha256sum fails.
|
||||
!backend/data/tor_bundle_digests.json
|
||||
# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream
|
||||
# Let's Encrypt renewal failures without disabling TLS validation entirely.
|
||||
!backend/data/aisstream_spki_pins.json
|
||||
# Issue #231: pinned SHA-256 digests for known release archives. Used by
|
||||
# the self-updater as a second-line integrity check when the release's
|
||||
# SHA256SUMS.txt asset can't be fetched.
|
||||
!backend/data/release_digests.json
|
||||
# Issue #244/#245/#246: one-shot carrier-position seed shipped with each
|
||||
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||
# after that the cache reflects this install's own GDELT observations.
|
||||
!backend/data/carrier_seed.json
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
@@ -173,6 +191,8 @@ backend/services/test_*.py
|
||||
# Local analysis & dev tools
|
||||
backend/analyze_xlsx.py
|
||||
backend/services/ais_cache.json
|
||||
graphify/
|
||||
graphify-out/
|
||||
|
||||
# ========================
|
||||
# Internal docs & brainstorming (never commit)
|
||||
@@ -241,3 +261,32 @@ backend/data/wormhole_stdout.log
|
||||
|
||||
# Compressed snapshot archives (can be 100 MB+)
|
||||
*.json.gz
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# AI assistant / coding-agent scratch
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-tool config + scratch directories. These are private to whichever
|
||||
# coding agent the operator happens to be using and have no business in
|
||||
# the repo. If a tool's instructions need to be canonical for the project,
|
||||
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||
# repo root.
|
||||
|
||||
# OpenAI Codex CLI
|
||||
.codex/
|
||||
.codex-app-schema/
|
||||
.codex-app-ts/
|
||||
|
||||
# Per-agent instruction files dropped at repo root by various tools.
|
||||
# These are operator-side preferences, not part of the project contract.
|
||||
AGENTS.md
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
.github/copilot-instructions.md
|
||||
|
||||
# Stale AI-generated test file that referenced fields that don't exist in
|
||||
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||
# ignored so it doesn't accidentally get committed if it shows up again
|
||||
# from a tool that's working off an out-of-date understanding of the
|
||||
# module. If a real test for that function is needed, write it under a
|
||||
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||
backend/tests/test_carrier_tracker_region_centers.py
|
||||
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
# GitLab CI/CD for Shadowbroker
|
||||
#
|
||||
# Mirror of .github/workflows/docker-publish.yml — keeps the GitLab install
|
||||
# path (image registry + source) at parity with GitHub so users who prefer
|
||||
# GitLab get the same experience.
|
||||
#
|
||||
# What this does on every push to main:
|
||||
# 1. Builds multi-arch (amd64 + arm64) Docker images for the backend and
|
||||
# frontend, pushes them to the project's GitLab Container Registry:
|
||||
# registry.gitlab.com/bigbodycobain/shadowbroker/backend:latest
|
||||
# registry.gitlab.com/bigbodycobain/shadowbroker/frontend:latest
|
||||
# Both also get a :$CI_COMMIT_SHORT_SHA tag for traceability.
|
||||
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
||||
# on GitLab) so the two sources stay in sync.
|
||||
#
|
||||
# Auth notes:
|
||||
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
||||
# automatically. No credentials need to be configured.
|
||||
# - The reverse mirror requires a GitHub personal access token stored
|
||||
# as the GitLab CI/CD variable GITHUB_MIRROR_TOKEN (Protected + Masked).
|
||||
# Scope: public_repo (or repo for private). If the variable isn't
|
||||
# set the mirror job is skipped — image builds still run.
|
||||
|
||||
stages:
|
||||
- build
|
||||
- mirror
|
||||
|
||||
variables:
|
||||
# Use the dind service for buildx multi-arch builds.
|
||||
DOCKER_HOST: tcp://docker:2376
|
||||
DOCKER_TLS_CERTDIR: "/certs"
|
||||
DOCKER_DRIVER: overlay2
|
||||
# QEMU is what lets a single x86 runner build arm64 images. dind doesn't
|
||||
# install it by default; we install via tonistiigi/binfmt below.
|
||||
BUILDX_VERSION: "v0.14.1"
|
||||
# Repository-relative paths.
|
||||
BACKEND_IMAGE: $CI_REGISTRY_IMAGE/backend
|
||||
FRONTEND_IMAGE: $CI_REGISTRY_IMAGE/frontend
|
||||
|
||||
# Shared template: bootstraps buildx + QEMU on the dind service so a single
|
||||
# runner can produce both amd64 and arm64 manifests in one push.
|
||||
.buildx-setup: &buildx-setup
|
||||
image: docker:24
|
||||
services:
|
||||
- name: docker:24-dind
|
||||
command: ["--tls=true"]
|
||||
before_script:
|
||||
- docker info
|
||||
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
||||
- docker run --privileged --rm tonistiigi/binfmt --install all
|
||||
- docker buildx create --use --name multiarch --driver docker-container
|
||||
|
||||
# ── Backend image ────────────────────────────────────────────────────────
|
||||
build-backend:
|
||||
<<: *buildx-setup
|
||||
stage: build
|
||||
script:
|
||||
- >
|
||||
docker buildx build
|
||||
--platform linux/amd64,linux/arm64
|
||||
--file backend/Dockerfile
|
||||
--tag $BACKEND_IMAGE:latest
|
||||
--tag $BACKEND_IMAGE:$CI_COMMIT_SHORT_SHA
|
||||
--push
|
||||
.
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
changes:
|
||||
- backend/**/*
|
||||
- .gitlab-ci.yml
|
||||
|
||||
# ── Frontend image ───────────────────────────────────────────────────────
|
||||
build-frontend:
|
||||
<<: *buildx-setup
|
||||
stage: build
|
||||
script:
|
||||
- cd frontend
|
||||
- >
|
||||
docker buildx build
|
||||
--platform linux/amd64,linux/arm64
|
||||
--tag $FRONTEND_IMAGE:latest
|
||||
--tag $FRONTEND_IMAGE:$CI_COMMIT_SHORT_SHA
|
||||
--push
|
||||
.
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
changes:
|
||||
- frontend/**/*
|
||||
- .gitlab-ci.yml
|
||||
|
||||
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker.
|
||||
# Fast-forward-only — if GitLab main and GitHub main have diverged, this
|
||||
# fails loudly rather than silently overwriting either side.
|
||||
#
|
||||
# Only runs if GITHUB_MIRROR_TOKEN is set as a CI/CD variable. See the
|
||||
# header comment of this file for setup instructions.
|
||||
mirror-to-github:
|
||||
stage: mirror
|
||||
image: alpine:3.20
|
||||
needs: []
|
||||
before_script:
|
||||
- apk add --no-cache git openssh-client ca-certificates
|
||||
script:
|
||||
- git config --global user.email "ci-mirror@gitlab.com"
|
||||
- git config --global user.name "GitLab CI Mirror"
|
||||
- >
|
||||
git clone --depth=50 --branch main
|
||||
"https://oauth2:${CI_JOB_TOKEN}@gitlab.com/${CI_PROJECT_PATH}.git"
|
||||
repo
|
||||
- cd repo
|
||||
- >
|
||||
git push
|
||||
"https://x-access-token:${GITHUB_MIRROR_TOKEN}@github.com/BigBodyCobain/Shadowbroker.git"
|
||||
"${CI_COMMIT_SHA}:refs/heads/main"
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_TOKEN
|
||||
@@ -0,0 +1,75 @@
|
||||
# Contributing to Shadowbroker
|
||||
|
||||
Thank you for taking the time to contribute. This document covers things specific to this project — for general open-source contribution etiquette, see the GitHub docs.
|
||||
|
||||
---
|
||||
|
||||
## Code contributions
|
||||
|
||||
1. Fork the repo on GitHub (`bigbodycobain/Shadowbroker`) or GitLab (`bigbodycobain/Shadowbroker` mirror).
|
||||
2. Make your changes on a feature branch.
|
||||
3. Run the local test suite:
|
||||
- Backend: `pytest backend/tests/`
|
||||
- Frontend: `cd frontend && npx vitest run`
|
||||
4. Open a Pull Request against `main`.
|
||||
|
||||
CI runs on every PR. If CI fails, that's blocking — please push fixes rather than asking for it to be merged anyway.
|
||||
|
||||
---
|
||||
|
||||
## Reporting security issues
|
||||
|
||||
Do **not** file security issues as public GitHub issues. Email the maintainer or use a private security advisory on GitHub. Public disclosure of an exploitable vulnerability without prior coordination will be rejected from the project.
|
||||
|
||||
---
|
||||
|
||||
## Translation contributions
|
||||
|
||||
Shadowbroker supports UI localization (`frontend/src/i18n/`). Translation contributions are welcome but held to a stricter standard than most code changes, because translations can subtly reshape user perception in ways that are hard to spot during review. Read this section before submitting one.
|
||||
|
||||
### The neutrality requirement
|
||||
|
||||
**Translations must be technically faithful to the English source.** That means:
|
||||
|
||||
- Each `t('key')` entry should mean approximately the same thing in the target language as in English, modulo idiom.
|
||||
- Technical terms with established meanings (e.g. "GPS jamming," "military flight," "Tor," "onion routing," "encryption") should be translated using the corresponding established technical term in the target language — **not** softened, rebranded, or politically reframed.
|
||||
- The set of UI strings should be **the same** between languages. Don't omit features from one locale that are visible in another.
|
||||
|
||||
### What will get a translation PR rejected
|
||||
|
||||
Translation choices that align the project with the framing or terminology of state propaganda — from **any** country — will be rejected. This applies symmetrically:
|
||||
|
||||
| Country / source | Examples of substitutions we will reject |
|
||||
|---|---|
|
||||
| **PRC / CCP** | Calling Taiwan a "province" or "renegade province"; reframing protest layers as "riots"; using softened or euphemistic terms for surveillance, internment, or jamming when the source text is direct |
|
||||
| **Russia** | Calling the Ukraine war a "special military operation"; relabeling occupied territories as Russian; softening sanctions/jamming/disinfo terminology |
|
||||
| **United States / EU** | Reframing adversaries with editorial labels not in the source (e.g. inserting "regime" where the English says "government"); applying labels like "terrorist" or "rogue state" to entities the English source describes neutrally |
|
||||
| **Israel / Palestine / any active conflict** | Substituting one side's preferred terminology when the source uses the other side's or a neutral term |
|
||||
| **Any government** | Adding political slogans, omitting features that government finds inconvenient, or inserting terminology associated with a specific political faction |
|
||||
|
||||
The test is **"would a translator working strictly from the English source produce this rendering?"** If the answer requires assuming a political stance the source does not take, the substitution does not belong in the translation.
|
||||
|
||||
### How translation PRs are reviewed
|
||||
|
||||
Changes to `frontend/src/i18n/**` are owned by the maintainer (see `CODEOWNERS`) and require explicit approval. We will:
|
||||
|
||||
1. Diff the translation against the English source key-by-key.
|
||||
2. Spot-check a sample of entries with a native speaker of the target language when possible.
|
||||
3. Look for the patterns above.
|
||||
4. Look for suspicious additions to the i18n infrastructure itself (e.g. a remote translation fetcher, telemetry on language choice) — the i18n layer is supposed to be 100% client-side static JSON.
|
||||
|
||||
A PR that adds a new language is harder to review than one that fixes typos in an existing language. For new languages, please be patient and expect a real review window. For typo fixes, please describe each change in the PR body so the reviewer can verify intent.
|
||||
|
||||
### What about adding a new language?
|
||||
|
||||
We welcome new languages. The mechanical setup is documented in the header comment of `frontend/src/i18n/index.ts`. Beyond that:
|
||||
|
||||
- We are more likely to merge a new language quickly if at least one reviewer in the maintainer's network speaks it.
|
||||
- If you are the *only* speaker of the target language reading this repo, your translation is welcome but the merge timeline will be longer while a reviewer is found.
|
||||
- Partial translations are fine — the system falls back to English for any missing key.
|
||||
|
||||
---
|
||||
|
||||
## Anything else
|
||||
|
||||
If you have a question that isn't a security report, opening a GitHub Discussion or a draft PR with a question in the body is the fastest way to get a response. Direct emails are read but not always replied to promptly.
|
||||
@@ -11,15 +11,15 @@
|
||||
|
||||
|
||||
|
||||

|
||||
[](https://github.com/user-attachments/assets/248208ec-62f7-49d1-831d-4bd0a1fa6852)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**ShadowBroker** is a decentralized real-time, multi-domain OSINT dashboard that fuses 60+ live intelligence feeds into a single dark-ops map interface. Aircraft, ships, satellites, conflict zones, CCTV networks, GPS jamming, internet-connected devices, police scanners, mesh radio nodes, and breaking geopolitical events — all updating in real time on one screen as well as a obfuscated communications protocol and information exchange infrastructure.
|
||||
**ShadowBroker** is a decentralized intelligence platform that aggregates real-time, multi-domain OSINT telemetry from 60+ live intelligence feeds into a single dark-ops map interface. Aircraft, ships, satellites, conflict zones, CCTV networks, GPS jamming, internet-connected devices, police scanners, mesh radio nodes, and breaking geopolitical events — all updating in real time on one screen as well as an obfuscated communications protocol and information exchange infrastructure.
|
||||
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers, including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
|
||||
|
||||
Designed for analysts, researchers, radio operators, and anyone who wants to see what the world looks like when every public signal is on the same map.
|
||||
|
||||
@@ -38,10 +38,9 @@ ShadowBroker includes an optional Shodan connector for operator-supplied API acc
|
||||
|
||||
## Interesting Use Cases
|
||||
|
||||
* **Track Air Force One**, the private jets of billionaires and dictators, and every military tanker, ISR, and fighter broadcasting ADS-B. Air Force One and all of the accompanying Presidential/Vice Presidential planes are highlighted and monitored from the moment they leave the ground.
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 35+ data layers, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Communicate on the InfoNet testnet** — The first decentralized intelligence mesh built into an OSINT tool. Obfuscated messaging with gate personas, Dead Drop peer-to-peer exchange, and a built-in terminal CLI. No accounts, no signup. Privacy is not guaranteed yet — this is an experimental testnet — but the protocol is live and being hardened.
|
||||
* **Track Air Force One**, the private jets of billionaires and dictators, and every military tanker, ISR, and fighter broadcasting ADS-B — with automatic holding pattern detection when aircraft start circling
|
||||
* **Estimate where US aircraft carriers are** using automated GDELT news scraping — no other open tool does this
|
||||
* **Search internet-connected devices worldwide** via Shodan — cameras, SCADA systems, databases — plotted as a live overlay on the map
|
||||
* **Right-click anywhere on Earth** for a country dossier (head of state, population, languages), Wikipedia summary, and the latest Sentinel-2 satellite photo at 10m resolution
|
||||
* **Click a KiwiSDR node** and tune into live shortwave radio directly in the dashboard. Click a police scanner feed and eavesdrop in one click.
|
||||
* **Watch 11,000+ CCTV cameras** across 6 countries — London, NYC, California, Spain, Singapore, and more — streaming live on the map
|
||||
@@ -51,15 +50,19 @@ ShadowBroker includes an optional Shodan connector for operator-supplied API acc
|
||||
* **Follow earthquakes, volcanic eruptions, active wildfires** (NASA FIRMS), severe weather alerts, and air quality readings worldwide
|
||||
* **Map military bases, 35,000+ power plants**, 2,000+ data centers, and internet outage regions — cross-referenced automatically
|
||||
* **Connect to Meshtastic mesh radio nodes** and APRS amateur radio networks — visible on the map and integrated into Mesh Chat
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 35+ data layers, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Detect ground changes through cloud cover** with SAR (Synthetic Aperture Radar) — mm-scale ground deformation, flood extent, vegetation disturbance, and damage assessments from NASA OPERA and Copernicus EGMS. Define your own watch areas and get anomaly alerts. Free with a NASA Earthdata account.
|
||||
* **Switch visual modes** — DEFAULT, SATELLITE, FLIR (thermal), NVG (night vision), CRT (retro terminal) — via the STYLE button
|
||||
* **Track trains** across the US (Amtrak) and Europe (DigiTraffic) in real time
|
||||
* **Estimate where US aircraft carriers are** using automated GDELT news scraping — no other open tool does this
|
||||
* **Search internet-connected devices worldwide** via Shodan — cameras, SCADA systems, databases — plotted as a live overlay on the map
|
||||
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Quick Start (Docker)
|
||||
|
||||
### From GitHub (default — uses GHCR images)
|
||||
|
||||
```bash
|
||||
git clone https://github.com/bigbodycobain/Shadowbroker.git
|
||||
cd Shadowbroker
|
||||
@@ -67,9 +70,24 @@ docker compose pull
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### From GitLab (uses GitLab Container Registry)
|
||||
|
||||
```bash
|
||||
git clone https://gitlab.com/bigbodycobain/Shadowbroker.git
|
||||
cd Shadowbroker
|
||||
docker compose -f docker-compose.yml -f docker-compose.gitlab.yml pull
|
||||
docker compose -f docker-compose.yml -f docker-compose.gitlab.yml up -d
|
||||
```
|
||||
|
||||
Both paths produce identical containers — same source, same CI, same images byte-for-byte. Pick whichever ecosystem you already use.
|
||||
|
||||
Open `http://localhost:3000` to view the dashboard! *(Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/) or Docker Engine)*
|
||||
|
||||
> **Podman users:** Replace `docker compose` with `podman compose`, or use the `compose.sh` wrapper which auto-detects your engine. Force Podman with `./compose.sh --engine podman up -d`.
|
||||
> **Backend port already in use?** The browser only needs port `3000`, but the backend API is also published on host port `8000` for local diagnostics. If another app already uses `8000`, create or edit `.env` next to `docker-compose.yml` and set `BACKEND_PORT=8001`, then run `docker compose up -d`.
|
||||
|
||||
> **Blank news/UAP/bases/wastewater after several minutes?** Check for backend OOM restarts with `docker events --since 30m --filter container=shadowbroker-backend --filter event=oom`. The default compose file gives the backend 4GB; if your host has less memory, reduce enabled feeds or set `BACKEND_MEMORY_LIMIT=3G` and expect slower/heavier layers to warm more gradually.
|
||||
|
||||
> **Podman users:** Podman works, but `podman compose` is a wrapper and still needs a Compose provider installed. On Windows/WSL, if you see `looking up compose provider failed`, install `podman-compose` and run `podman-compose pull` followed by `podman-compose up -d` from inside the cloned `Shadowbroker` folder. On Linux/macOS/WSL shells you can also use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d`.
|
||||
|
||||
---
|
||||
|
||||
@@ -92,6 +110,8 @@ That's it. `pull` grabs the latest images, `up -d` restarts the containers.
|
||||
> docker compose pull
|
||||
> docker compose up -d
|
||||
> ```
|
||||
>
|
||||
> Podman users should run the equivalent provider command, for example `podman-compose pull` and `podman-compose up -d`, or use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d` from a bash-compatible shell.
|
||||
|
||||
### ⚠️ **Stuck on the old version?**
|
||||
|
||||
@@ -129,8 +149,13 @@ helm repo update
|
||||
|
||||
**2. Install the Chart:**
|
||||
```bash
|
||||
# Install from the local helm/chart directory
|
||||
# Default — pulls images from GHCR
|
||||
helm install shadowbroker ./helm/chart --create-namespace --namespace shadowbroker
|
||||
|
||||
# GitLab registry variant
|
||||
helm install shadowbroker ./helm/chart --create-namespace --namespace shadowbroker \
|
||||
-f helm/chart/values.yaml \
|
||||
-f helm/chart/values-gitlab.yaml
|
||||
```
|
||||
|
||||
**3. Key Features:**
|
||||
@@ -149,7 +174,7 @@ ShadowBroker v0.9.7 ships **InfoNet** (decentralized intelligence mesh + Soverei
|
||||
| Channel | Privacy Status | Details |
|
||||
|---|---|---|
|
||||
| **Meshtastic / APRS** | **PUBLIC** | RF radio transmissions are public and interceptable by design. |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden. |
|
||||
| **InfoNet Gate Chat** | **OBFUSCATED** | Messages are obfuscated with gate personas and canonical payload signing, but NOT end-to-end encrypted. Metadata is not hidden despite being designed through Tor and Reticulum (Work in progress). |
|
||||
| **Dead Drop DMs** | **STRONGEST CURRENT LANE** | Token-based epoch mailbox with SAS word verification. Strongest lane in this build, but not yet confidently private. |
|
||||
| **Sovereign Shell governance** | **PUBLIC LEDGER** | Petitions, votes, upgrade hashes, and dispute stakes are signed events on a public hashchain. Pseudonymous via gate persona, but governance actions are intentionally observable. |
|
||||
| **Privacy primitives (RingCT / stealth / DEX)** | **NOT YET WIRED** | Locked Protocol contracts are in place, but the cryptographic scheme has not been chosen. The privacy-core Rust crate is the integration target for a future sprint. |
|
||||
@@ -174,7 +199,7 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Communication layer (since v0.9.6):**
|
||||
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **InfoNet Experimental Testnet** — A global, obfuscated message relay using Tor and Reticulum. Anyone running ShadowBroker can transmit and receive on the InfoNet. Messages pass through a Wormhole relay layer with gate personas, Ed25519 canonical payload signing, and transport obfuscation.
|
||||
* **Mesh Chat Panel** — Three-tab interface: **INFONET** (gate chat with obfuscated transport), **MESH** (Meshtastic radio integration), **DEAD DROP** (peer-to-peer message exchange with token-based epoch mailboxes — strongest current lane).
|
||||
* **Gate Persona System** — Pseudonymous identities with Ed25519 signing keys, prekey bundles, SAS word contact verification, and abuse reporting.
|
||||
* **Mesh Terminal** — Built-in CLI: `send`, `dm`, market commands, gate state inspection. Draggable panel, minimizes to the top bar. Type `help` to see all commands.
|
||||
@@ -559,25 +584,51 @@ Open `http://localhost:3000` to view the dashboard.
|
||||
> **Deploying publicly or on a LAN?** No configuration needed for most setups.
|
||||
> The frontend proxies all API calls through the Next.js server to `BACKEND_URL`,
|
||||
> which defaults to `http://backend:8000` (Docker internal networking).
|
||||
> Port 8000 does not need to be exposed externally.
|
||||
> Host port `8000` is only published for local API/debug access. If it conflicts
|
||||
> with another service, set `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL`
|
||||
> as `http://backend:8000` because that is the Docker-internal port.
|
||||
> The backend memory cap is controlled by `BACKEND_MEMORY_LIMIT` and defaults
|
||||
> to `4G`. If Docker reports OOM events, the backend will restart and slow
|
||||
> layers can look empty until they repopulate.
|
||||
>
|
||||
> If your backend runs on a **different host or port**, set `BACKEND_URL` at runtime — no rebuild required:
|
||||
>
|
||||
> ```bash
|
||||
> # Linux / macOS
|
||||
> BACKEND_URL=http://myserver.com:9096 docker-compose up -d
|
||||
> BACKEND_URL=http://myserver.com:9096 docker compose up -d
|
||||
>
|
||||
> # Podman (via compose.sh wrapper)
|
||||
> BACKEND_URL=http://192.168.1.50:9096 ./compose.sh up -d
|
||||
>
|
||||
> # Windows (PowerShell)
|
||||
> $env:BACKEND_URL="http://myserver.com:9096"; docker-compose up -d
|
||||
> $env:BACKEND_URL="http://myserver.com:9096"; docker compose up -d
|
||||
>
|
||||
> # Or add to a .env file next to docker-compose.yml:
|
||||
> # BACKEND_URL=http://myserver.com:9096
|
||||
> ```
|
||||
|
||||
**Podman users:** Replace `docker compose` with `podman compose`, or use the `compose.sh` wrapper which auto-detects your engine.
|
||||
**Podman users:** Do not pass the GitHub URL to `podman compose pull`; clone the repo first, `cd Shadowbroker`, then run compose from that folder. `podman compose` also requires a Compose provider. If Podman reports `looking up compose provider failed`, install one:
|
||||
|
||||
```bash
|
||||
# Linux / macOS / WSL
|
||||
python3 -m pip install --user podman-compose
|
||||
podman-compose pull
|
||||
podman-compose up -d
|
||||
```
|
||||
|
||||
```powershell
|
||||
# Windows PowerShell
|
||||
py -m pip install --user podman-compose
|
||||
podman-compose pull
|
||||
podman-compose up -d
|
||||
```
|
||||
|
||||
If you are in a bash-compatible shell, the included wrapper can auto-detect Docker or Podman:
|
||||
|
||||
```bash
|
||||
./compose.sh --engine podman pull
|
||||
./compose.sh --engine podman up -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -599,7 +650,7 @@ services:
|
||||
# image: registry.gitlab.com/bigbodycobain/shadowbroker/backend:latest
|
||||
container_name: shadowbroker-backend
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "${BACKEND_PORT:-8000}:8000"
|
||||
environment:
|
||||
- AIS_API_KEY=your_aisstream_key # Required — get one free at aisstream.io
|
||||
- OPENSKY_CLIENT_ID= # Optional — higher flight data rate limits
|
||||
@@ -629,7 +680,7 @@ volumes:
|
||||
backend_data:
|
||||
```
|
||||
|
||||
> **How it works:** The frontend container proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. The browser only ever talks to port 3000 — port 8000 does not need to be exposed externally.
|
||||
> **How it works:** The frontend container proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. The browser only ever talks to port 3000. The backend's host port is for local API/debug access and can be changed with `BACKEND_PORT=8001` without changing `BACKEND_URL`.
|
||||
>
|
||||
> `BACKEND_URL` is a plain runtime environment variable (not a build-time `NEXT_PUBLIC_*`), so you can change it in Portainer, Uncloud, or any compose editor without rebuilding the image. Set it to the address where your backend is reachable from inside the Docker network (e.g. `http://backend:8000`, `http://192.168.1.50:8000`).
|
||||
|
||||
@@ -932,8 +983,9 @@ Then confirm authenticated `GET /api/wormhole/status` or `GET /api/settings/worm
|
||||
| Variable | Where to set | Purpose |
|
||||
|---|---|---|
|
||||
| `BACKEND_URL` | `environment` in `docker-compose.yml`, or shell env | URL the Next.js server uses to proxy API calls to the backend. Defaults to `http://backend:8000`. **Runtime variable — no rebuild needed.** |
|
||||
| `BACKEND_PORT` | repo-root `.env` or shell env before `docker compose up` | Host port used to expose the backend API for local diagnostics. Defaults to `8000`; set `BACKEND_PORT=8001` if port 8000 is already in use. Does not change Docker-internal `BACKEND_URL`. |
|
||||
|
||||
**How it works:** The frontend proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. Browsers only talk to port 3000; port 8000 never needs to be exposed externally. For local dev without Docker, `BACKEND_URL` defaults to `http://localhost:8000`.
|
||||
**How it works:** The frontend proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. Browsers only talk to port 3000; the backend host port is only for local diagnostics. For local dev without Docker, `BACKEND_URL` defaults to `http://localhost:8000`.
|
||||
|
||||
---
|
||||
|
||||
@@ -943,7 +995,7 @@ ShadowBroker is built in the open. These people shipped real code:
|
||||
|
||||
| Who | What | PR |
|
||||
|-----|------|----|
|
||||
| [@Alienmajik](https://github.com/Alienmajik) | Raspberry Pi 5 support — ARM64 packaging, headless deployment notes, runtime tuning for Pi-class hardware | — |
|
||||
| [@Alienmajik](https://gitlab.com/Alienmajik) | Raspberry Pi 5 support — ARM64 packaging, headless deployment notes, runtime tuning for Pi-class hardware | — |
|
||||
| [@wa1id](https://github.com/wa1id) | CCTV ingestion fix — threaded SQLite, persistent DB, startup hydration, cluster clickability | #92 |
|
||||
| [@AlborzNazari](https://github.com/AlborzNazari) | Spain DGT + Madrid CCTV sources, STIX 2.1 threat intel export | #91 |
|
||||
| [@adust09](https://github.com/adust09) | Power plants layer, East Asia intel coverage (JSDF bases, ICAO enrichment, Taiwan news, military classification) | #71, #72, #76, #77, #87 |
|
||||
|
||||
+69
-6
@@ -11,6 +11,13 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
|
||||
# ── Optional ───────────────────────────────────────────────────
|
||||
|
||||
# AISHub REST fallback. Used when stream.aisstream.io is unreachable
|
||||
# (e.g. their cert expires or server goes offline). Free tier requires
|
||||
# registration at https://www.aishub.net/api. Poll cadence defaults to
|
||||
# 20 min to stay courteous; tunable via AISHUB_POLL_INTERVAL_MINUTES.
|
||||
# AISHUB_USERNAME=
|
||||
# AISHUB_POLL_INTERVAL_MINUTES=20
|
||||
|
||||
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
||||
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
||||
|
||||
@@ -24,8 +31,54 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)
|
||||
# Per-install operator handle. Round 7a: every outbound third-party API
|
||||
# call (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||
# weather.gov, NUFORC, etc.) includes this handle in the User-Agent so
|
||||
# upstreams can rate-limit / contact the specific install instead of
|
||||
# treating every Shadowbroker user as one entity.
|
||||
#
|
||||
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||
# Operators who want a meaningful handle (real name, org, GitHub login) can
|
||||
# set it here. Special characters are sanitized to dashes.
|
||||
# OPERATOR_HANDLE=
|
||||
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers. Operators
|
||||
# who run a public relay and want a completely custom UA can set this; it
|
||||
# bypasses the per-operator helper entirely. Most installs should leave it
|
||||
# unset and use OPERATOR_HANDLE instead.
|
||||
# SHADOWBROKER_USER_AGENT=
|
||||
|
||||
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||
# use the per-install handle (default) — set only if you have a registered
|
||||
# Nominatim relay identity.
|
||||
# NOMINATIM_USER_AGENT=
|
||||
|
||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||
# These data sources phone home to politically/commercially sensitive
|
||||
# upstreams. Disabled by default; set to "true" only if the operator
|
||||
# explicitly wants the node's IP to contact these services.
|
||||
#
|
||||
# CrowdThreat — backend.crowdthreat.world (paid threat-intel aggregator).
|
||||
# CROWDTHREAT_ENABLED=false
|
||||
#
|
||||
# EUvsDisinfo FIMI — euvsdisinfo.eu (EU disinformation tracker).
|
||||
# FIMI_ENABLED=false
|
||||
#
|
||||
# Polymarket + Kalshi — US political/election prediction markets.
|
||||
# PREDICTION_MARKETS_ENABLED=false
|
||||
#
|
||||
# Finnhub fallback / yfinance — financial market data.
|
||||
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
|
||||
# the unauthenticated yfinance fallback to call Yahoo Finance.
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP sightings — huggingface.co dataset download.
|
||||
# NUFORC_ENABLED=false
|
||||
#
|
||||
# News RSS aggregator — defaults ON. Set to "false" to disable all
|
||||
# configured news feeds (kill switch for the news layer).
|
||||
# NEWS_ENABLED=true
|
||||
|
||||
# LTA Singapore traffic cameras — leave blank to skip this data source.
|
||||
# LTA_ACCOUNT_KEY=
|
||||
@@ -54,14 +107,19 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# MESH_MQTT_INCLUDE_DEFAULT_ROOTS=true
|
||||
# MESH_MQTT_BROKER=mqtt.meshtastic.org
|
||||
# MESH_MQTT_PORT=1883
|
||||
# MESH_MQTT_USER=meshdev
|
||||
# MESH_MQTT_PASS=large4cats
|
||||
# Leave user/pass blank for the public Meshtastic broker default.
|
||||
# MESH_MQTT_USER=
|
||||
# MESH_MQTT_PASS=
|
||||
|
||||
# Optional Meshtastic node ID (e.g. "!abcd1234"). When set, included in the
|
||||
# User-Agent sent to meshtastic.liamcottle.net so the upstream service operator
|
||||
# can identify per-install traffic instead of aggregated "ShadowBroker" hits.
|
||||
# Leave blank to send a generic UA with the project contact email only.
|
||||
# Leave blank to send a generic UA. If you set MESHTASTIC_OPERATOR_CALLSIGN,
|
||||
# it is included in outbound headers to meshtastic.org by default so they
|
||||
# can rate-limit per-operator. Set MESHTASTIC_SEND_CALLSIGN_HEADER=false to
|
||||
# suppress the callsign while still using it locally (e.g. for APRS).
|
||||
# MESHTASTIC_OPERATOR_CALLSIGN=
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=true
|
||||
# MESH_MQTT_PSK= # hex-encoded, empty = default LongFast key
|
||||
|
||||
# ── Mesh / Reticulum (RNS) ─────────────────────────────────────
|
||||
@@ -127,7 +185,12 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# MESH_BOOTSTRAP_DISABLED=false
|
||||
# MESH_BOOTSTRAP_MANIFEST_PATH=data/bootstrap_peers.json
|
||||
# MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY=
|
||||
# MESH_DEFAULT_SYNC_PEERS=https://node.shadowbroker.info # bundled pull-only public seed for fresh installs
|
||||
# Infonet/Wormhole fails closed to onion/RNS by default. Only enable clearnet
|
||||
# sync for local relay development or an explicitly public testnet.
|
||||
# MESH_INFONET_ALLOW_CLEARNET_SYNC=false
|
||||
# MESH_BOOTSTRAP_SEED_PEERS=http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000
|
||||
# Add comma-separated http://*.onion peers as more private seed/relay nodes come online.
|
||||
# MESH_DEFAULT_SYNC_PEERS= # legacy alias; prefer MESH_BOOTSTRAP_SEED_PEERS
|
||||
# MESH_RELAY_PEERS= # comma-separated operator-trusted sync/push peers (empty by default)
|
||||
# MESH_PEER_PUSH_SECRET= # REQUIRED when relay/RNS peers are configured (min 16 chars, generate with: python -c "import secrets; print(secrets.token_urlsafe(32))")
|
||||
# MESH_SYNC_INTERVAL_S=300
|
||||
|
||||
+11
-1
@@ -22,10 +22,12 @@ FROM python:3.11-slim-bookworm
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install Node.js (for AIS WebSocket proxy) and curl (for network fallback)
|
||||
# Install Node.js (for AIS WebSocket proxy), curl (for network fallback), and
|
||||
# Tor (for Wormhole/remote-agent .onion transport).
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
tor \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y --no-install-recommends nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
@@ -49,6 +51,13 @@ RUN cd /workspace/backend && uv sync --frozen --no-dev \
|
||||
# Copy backend source code
|
||||
COPY backend/ .
|
||||
|
||||
# Preserve safe static data outside /app/data. The compose named volume mounted
|
||||
# at /app/data hides image-baked files on first run, so the entrypoint seeds
|
||||
# missing static JSON into fresh volumes before the backend starts.
|
||||
RUN mkdir -p /app/image-data \
|
||||
&& if [ -d /app/data ]; then cp -a /app/data/. /app/image-data/; fi \
|
||||
&& chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
# Install Node.js dependencies (ws module for AIS WebSocket proxy)
|
||||
COPY backend/package*.json ./
|
||||
RUN npm ci --omit=dev
|
||||
@@ -75,4 +84,5 @@ USER backenduser
|
||||
EXPOSE 8000
|
||||
|
||||
# Start FastAPI server
|
||||
ENTRYPOINT ["/app/docker-entrypoint.sh"]
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "120"]
|
||||
|
||||
+234
-4
@@ -1,5 +1,37 @@
|
||||
// AIS Stream WebSocket proxy.
|
||||
//
|
||||
// Reads AIS_API_KEY from argv or env, opens a wss:// connection to
|
||||
// stream.aisstream.io, subscribes for vessel position reports inside the
|
||||
// active map bounding boxes, and pipes JSON messages to stdout for the
|
||||
// Python backend to ingest.
|
||||
//
|
||||
// Issue #258 — SPKI pinning fallback for upstream cert outages
|
||||
// -------------------------------------------------------------
|
||||
// AISStream uses Let's Encrypt and their renewal pipeline has been observed
|
||||
// to fail (cert expired on 2026-05-20). The naive fix the issue reporter
|
||||
// applied — passing { rejectUnauthorized: false } — turns off TLS validation
|
||||
// entirely, which lets any network attacker MITM the WebSocket and inject
|
||||
// fake ship positions onto the operator's map. Same class as the GDELT
|
||||
// plaintext-HTTP MITM issue (#199).
|
||||
//
|
||||
// Instead, when the normal TLS handshake fails with CERT_HAS_EXPIRED, we
|
||||
// do a custom TLS connection that ignores ONLY the expiry check, capture
|
||||
// the leaf certificate, and compare its public-key SPKI hash against a
|
||||
// pinned list (backend/data/aisstream_spki_pins.json). If the SPKI matches,
|
||||
// the upstream is still the genuine AISStream — just with an expired cert —
|
||||
// and we proceed in "degraded TLS" mode. If the SPKI does not match, we
|
||||
// refuse the connection and log loudly: an actual MITM is in progress.
|
||||
//
|
||||
// Let's Encrypt renewals keep the same public key by default, so the pinned
|
||||
// SPKI survives normal cert rotation. The pin list MUST be updated before
|
||||
// the operator's pinned key is rotated upstream.
|
||||
|
||||
const WebSocket = require('ws');
|
||||
const readline = require('readline');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const tls = require('tls');
|
||||
const crypto = require('crypto');
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const API_KEY = args[0] || process.env.AIS_API_KEY;
|
||||
@@ -9,6 +41,135 @@ if (!API_KEY) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── SPKI pin support (issue #258) ─────────────────────────────────────────
|
||||
|
||||
const AIS_HOST = 'stream.aisstream.io';
|
||||
const AIS_PORT = 443;
|
||||
const AIS_WS_URL = `wss://${AIS_HOST}/v0/stream`;
|
||||
|
||||
// Pin file is looked up in several layouts so the same JS works in:
|
||||
// - the Docker backend image (PIN_FILE_CANDIDATES[0])
|
||||
// - the Tauri desktop runtime (PIN_FILE_CANDIDATES[1])
|
||||
// - a future relocated layout (operator can drop a file at
|
||||
// SHADOWBROKER_AIS_PINS env var)
|
||||
const PIN_FILE_CANDIDATES = [
|
||||
process.env.SHADOWBROKER_AIS_PINS || '',
|
||||
path.join(__dirname, 'data', 'aisstream_spki_pins.json'),
|
||||
path.join(__dirname, 'aisstream_spki_pins.json'),
|
||||
].filter(Boolean);
|
||||
|
||||
// Embedded fallback. Used when no external pin file is reachable so the
|
||||
// SPKI fallback still works on minimal/portable installs. The external
|
||||
// file (when present) takes priority so operators can update pins without
|
||||
// needing a new build.
|
||||
const EMBEDDED_PINS = {
|
||||
[AIS_HOST]: [
|
||||
// Captured 2026-05-20 from AISStream's leaf cert (Let's Encrypt R12).
|
||||
// Replace when AISStream rotates server keys.
|
||||
'GJ10H0UPgLrO+2d3ZXROR/TXSVFXKUfRC3QEI2ibEg4=',
|
||||
],
|
||||
};
|
||||
|
||||
let aisDegradedMode = false; // surfaced via stdout status_query marker
|
||||
|
||||
function loadSpkiPins() {
|
||||
for (const candidate of PIN_FILE_CANDIDATES) {
|
||||
try {
|
||||
const raw = fs.readFileSync(candidate, 'utf-8');
|
||||
const parsed = JSON.parse(raw);
|
||||
const pins = Array.isArray(parsed[AIS_HOST]) ? parsed[AIS_HOST] : [];
|
||||
const cleaned = pins
|
||||
.filter((p) => typeof p === 'string' && p.length > 0)
|
||||
.map((p) => p.trim());
|
||||
if (cleaned.length > 0) {
|
||||
return cleaned;
|
||||
}
|
||||
} catch (e) {
|
||||
// Try the next candidate — file may not exist in this layout.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const embedded = (EMBEDDED_PINS[AIS_HOST] || []).slice();
|
||||
if (embedded.length > 0) {
|
||||
console.error(
|
||||
'[AIS Proxy] No external SPKI pin file found; using embedded fallback. '
|
||||
+ `(Set SHADOWBROKER_AIS_PINS or drop ${PIN_FILE_CANDIDATES[1]} to override.)`
|
||||
);
|
||||
}
|
||||
return embedded;
|
||||
}
|
||||
|
||||
function spkiHashFromPeerCert(peerCert) {
|
||||
// tls.TLSSocket.getPeerCertificate() exposes .pubkey when called with
|
||||
// detailed=true. The pubkey buffer is the DER-encoded SubjectPublicKeyInfo,
|
||||
// which is exactly the value we hash for SPKI pinning.
|
||||
if (!peerCert || !peerCert.pubkey) return null;
|
||||
return crypto.createHash('sha256').update(peerCert.pubkey).digest('base64');
|
||||
}
|
||||
|
||||
// Probe the upstream when normal TLS failed with CERT_HAS_EXPIRED. We open
|
||||
// a raw TLS connection with rejectUnauthorized=false ONLY to inspect the
|
||||
// leaf cert; we do NOT use this socket for the actual WebSocket traffic.
|
||||
// Returns { ok: true } if the leaf SPKI matches the pin list, { ok: false }
|
||||
// with a reason otherwise.
|
||||
function verifyExpiredCertAgainstPins() {
|
||||
return new Promise((resolve) => {
|
||||
const pins = loadSpkiPins();
|
||||
if (pins.length === 0) {
|
||||
resolve({ ok: false, reason: 'no SPKI pins configured' });
|
||||
return;
|
||||
}
|
||||
const sock = tls.connect(
|
||||
{
|
||||
host: AIS_HOST,
|
||||
port: AIS_PORT,
|
||||
servername: AIS_HOST,
|
||||
// Allow the handshake to complete despite the expired cert
|
||||
// so we can inspect the leaf. We do NOT trust this connection
|
||||
// for any application data.
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
() => {
|
||||
const peer = sock.getPeerCertificate(true);
|
||||
sock.end();
|
||||
if (!peer || Object.keys(peer).length === 0) {
|
||||
resolve({ ok: false, reason: 'no peer certificate returned' });
|
||||
return;
|
||||
}
|
||||
if (peer.subject && peer.subject.CN !== AIS_HOST) {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `cert CN mismatch (got ${peer.subject.CN}, expected ${AIS_HOST})`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const hash = spkiHashFromPeerCert(peer);
|
||||
if (!hash) {
|
||||
resolve({ ok: false, reason: 'could not compute SPKI hash from peer cert' });
|
||||
return;
|
||||
}
|
||||
if (pins.includes(hash)) {
|
||||
resolve({ ok: true, hash });
|
||||
} else {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `SPKI ${hash} not in pin list (possible MITM)`,
|
||||
});
|
||||
}
|
||||
},
|
||||
);
|
||||
sock.setTimeout(10000, () => {
|
||||
sock.destroy();
|
||||
resolve({ ok: false, reason: 'TLS probe timeout' });
|
||||
});
|
||||
sock.on('error', (err) => {
|
||||
resolve({ ok: false, reason: `TLS probe error: ${err.message}` });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ── Subscription state ───────────────────────────────────────────────────
|
||||
|
||||
// Start with global coverage, until frontend updates it
|
||||
let currentBboxes = [[[-90, -180], [90, 180]]];
|
||||
let activeWs = null;
|
||||
@@ -42,14 +203,34 @@ rl.on('line', (line) => {
|
||||
currentBboxes = cmd.bboxes;
|
||||
if (activeWs) sendSub(activeWs); // Resend subscription (swap and replace)
|
||||
}
|
||||
if (cmd.type === "status_query") {
|
||||
// Allow the Python side to probe degraded-mode state by sending
|
||||
// {"type": "status_query"} on stdin. Reply on stdout as a marker.
|
||||
process.stdout.write(JSON.stringify({
|
||||
__ais_proxy_status: { degraded_tls: aisDegradedMode }
|
||||
}) + '\n');
|
||||
}
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
function connect() {
|
||||
const ws = new WebSocket('wss://stream.aisstream.io/v0/stream');
|
||||
function attachWsHandlers(ws, { degraded } = { degraded: false }) {
|
||||
activeWs = ws;
|
||||
|
||||
ws.on('open', () => {
|
||||
if (degraded) {
|
||||
console.error(
|
||||
'[AIS Proxy] Connected in DEGRADED TLS MODE — upstream cert is expired '
|
||||
+ 'but SPKI matches the pinned key, so identity is still verified. '
|
||||
+ 'AISStream needs to renew their cert; until then MITM protection '
|
||||
+ 'depends only on the SPKI match. Watch backend logs for resolution.'
|
||||
);
|
||||
aisDegradedMode = true;
|
||||
} else {
|
||||
if (aisDegradedMode) {
|
||||
console.error('[AIS Proxy] Reconnected with full TLS validation — degraded mode cleared.');
|
||||
}
|
||||
aisDegradedMode = false;
|
||||
}
|
||||
sendSub(ws);
|
||||
});
|
||||
|
||||
@@ -61,14 +242,63 @@ function connect() {
|
||||
});
|
||||
|
||||
ws.on('error', (err) => {
|
||||
console.error("WebSocket Proxy Error:", err.message);
|
||||
console.error('WebSocket Proxy Error:', err.message);
|
||||
});
|
||||
|
||||
ws.on('close', () => {
|
||||
activeWs = null;
|
||||
console.error("WebSocket Proxy Closed. Reconnecting in 5s...");
|
||||
console.error('WebSocket Proxy Closed. Reconnecting in 5s...');
|
||||
setTimeout(connect, 5000);
|
||||
});
|
||||
}
|
||||
|
||||
function connect() {
|
||||
// Path A: normal TLS validation (the 99.9% case). If this succeeds we
|
||||
// never touch the SPKI fallback.
|
||||
const ws = new WebSocket(AIS_WS_URL);
|
||||
|
||||
let openedOk = false;
|
||||
ws.on('open', () => { openedOk = true; });
|
||||
|
||||
ws.on('error', async (err) => {
|
||||
// Only the CERT_HAS_EXPIRED case triggers SPKI verification. Any
|
||||
// other TLS or network error gets the standard reconnect path so we
|
||||
// don't accidentally cover up legitimate problems.
|
||||
if (!openedOk && err && err.code === 'CERT_HAS_EXPIRED') {
|
||||
console.error(
|
||||
'[AIS Proxy] Upstream certificate is expired. Verifying SPKI '
|
||||
+ 'against pinned keys before deciding whether to proceed in '
|
||||
+ 'degraded mode...'
|
||||
);
|
||||
const verdict = await verifyExpiredCertAgainstPins();
|
||||
if (verdict.ok) {
|
||||
console.error(
|
||||
`[AIS Proxy] SPKI ${verdict.hash} matches pinned key — `
|
||||
+ 'identity is verified, proceeding in DEGRADED TLS mode.'
|
||||
);
|
||||
const insecureWs = new WebSocket(AIS_WS_URL, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
attachWsHandlers(insecureWs, { degraded: true });
|
||||
} else {
|
||||
console.error(
|
||||
`[AIS Proxy] SPKI verification FAILED (${verdict.reason}). `
|
||||
+ 'Refusing to connect — this would normally indicate an active '
|
||||
+ 'MITM attack. If AISStream rotated their server key, update '
|
||||
+ 'backend/data/aisstream_spki_pins.json with the new SPKI hash.'
|
||||
);
|
||||
// Schedule a retry — operator may have updated the pin file.
|
||||
setTimeout(connect, 60000);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Default: surface the error and let the close handler reconnect.
|
||||
console.error('WebSocket Proxy Error:', err.message);
|
||||
});
|
||||
|
||||
// Wire normal handlers — these apply unless the error handler above
|
||||
// takes over and replaces activeWs with an insecure socket.
|
||||
attachWsHandlers(ws, { degraded: false });
|
||||
}
|
||||
|
||||
connect();
|
||||
|
||||
+125
-10
@@ -13,6 +13,7 @@ import hmac
|
||||
import asyncio
|
||||
import hmac as _hmac_mod
|
||||
import hashlib as _hashlib_mod
|
||||
import ipaddress
|
||||
import json as json_mod
|
||||
import logging
|
||||
import time
|
||||
@@ -44,6 +45,7 @@ from services.mesh.mesh_compatibility import (
|
||||
from services.mesh.mesh_crypto import (
|
||||
_derive_peer_key,
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
verify_signature,
|
||||
verify_node_binding,
|
||||
parse_public_key_algo,
|
||||
@@ -235,10 +237,111 @@ def _is_local_or_docker(host: str) -> bool:
|
||||
return host in {"127.0.0.1", "::1", "localhost"}
|
||||
|
||||
|
||||
def _docker_bridge_local_operator_enabled() -> bool:
|
||||
return str(os.environ.get("SHADOWBROKER_TRUST_DOCKER_BRIDGE_LOCAL_OPERATOR", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
# Issue #250 (tg12): the previous implementation returned True for any IP
|
||||
# in the entire 172.16.0.0/12 range. Anyone with `docker run` access on
|
||||
# the same daemon could spin up a container that automatically passed
|
||||
# local-operator auth. The fix narrows trust to ONLY connections whose
|
||||
# source IP matches the configured frontend container's hostname.
|
||||
#
|
||||
# Docker DNS resolves both the compose service name (``frontend``) and
|
||||
# the explicit ``container_name`` (``shadowbroker-frontend``) to the
|
||||
# frontend container's bridge IP. We forward-resolve both, cache the
|
||||
# result for 30s, and only trust connections from those exact IPs.
|
||||
#
|
||||
# Operators on shared Docker hosts get the benefit of the narrower
|
||||
# surface. Operators on single-user installs see no behavior change —
|
||||
# their frontend container still resolves and is still trusted.
|
||||
_DOCKER_BRIDGE_TRUST_CACHE: dict = {"ips": frozenset(), "expires": 0.0}
|
||||
_DOCKER_BRIDGE_TRUST_TTL = 30.0
|
||||
|
||||
|
||||
def _trusted_bridge_frontend_hostnames() -> list[str]:
|
||||
"""Container hostnames whose IPs we treat as local-operator on the bridge.
|
||||
|
||||
Default covers both Docker Compose service name (``frontend``) and the
|
||||
explicit ``container_name`` from the shipped docker-compose.yml
|
||||
(``shadowbroker-frontend``). Operators with non-default names can
|
||||
override via the ``SHADOWBROKER_TRUSTED_FRONTEND_HOSTS`` env var
|
||||
(comma-separated, no spaces).
|
||||
"""
|
||||
raw = str(
|
||||
os.environ.get(
|
||||
"SHADOWBROKER_TRUSTED_FRONTEND_HOSTS",
|
||||
"frontend,shadowbroker-frontend",
|
||||
)
|
||||
).strip()
|
||||
return [h.strip() for h in raw.split(",") if h.strip()]
|
||||
|
||||
|
||||
def _resolve_trusted_bridge_ips() -> frozenset[str]:
|
||||
"""Resolve trusted frontend hostnames to a set of IPs, with caching.
|
||||
|
||||
Cached for 30s so we don't hit DNS on every request. The cache is
|
||||
process-local — frontend container IP rotations during a backend's
|
||||
lifetime will be picked up within 30s.
|
||||
|
||||
Returns frozenset() if Docker DNS can't resolve any of the configured
|
||||
hostnames (fail-closed — when in doubt, refuse to trust the bridge).
|
||||
"""
|
||||
import socket
|
||||
import time as _time
|
||||
|
||||
now = _time.time()
|
||||
cache = _DOCKER_BRIDGE_TRUST_CACHE
|
||||
if cache["expires"] > now:
|
||||
return cache["ips"]
|
||||
|
||||
ips: set[str] = set()
|
||||
for hostname in _trusted_bridge_frontend_hostnames():
|
||||
try:
|
||||
_, _, addrs = socket.gethostbyname_ex(hostname)
|
||||
except (OSError, socket.gaierror):
|
||||
continue
|
||||
for addr in addrs:
|
||||
ips.add(addr)
|
||||
|
||||
resolved = frozenset(ips)
|
||||
cache["ips"] = resolved
|
||||
cache["expires"] = now + _DOCKER_BRIDGE_TRUST_TTL
|
||||
return resolved
|
||||
|
||||
|
||||
def _is_docker_bridge_host(host: str) -> bool:
|
||||
"""Return True only when the source IP matches our trusted frontend
|
||||
container hostname(s).
|
||||
|
||||
Previously trusted any 172.16.0.0/12 IP unconditionally. See the
|
||||
block comment above for the security rationale.
|
||||
"""
|
||||
try:
|
||||
ip = ipaddress.ip_address(host)
|
||||
except ValueError:
|
||||
return False
|
||||
# Public IPs are never our frontend container — skip DNS work for them.
|
||||
if not ip.is_private:
|
||||
return False
|
||||
return host in _resolve_trusted_bridge_ips()
|
||||
|
||||
|
||||
def _is_trusted_local_runtime_host(host: str) -> bool:
|
||||
if _is_local_or_docker(host):
|
||||
return True
|
||||
return _docker_bridge_local_operator_enabled() and _is_docker_bridge_host(host)
|
||||
|
||||
|
||||
def require_local_operator(request: Request):
|
||||
"""Allow local tooling on loopback / Docker internal network, or a valid admin key."""
|
||||
host = (request.client.host or "").lower() if request.client else ""
|
||||
if _is_local_or_docker(host) or (_debug_mode_enabled() and host == "test"):
|
||||
if _is_trusted_local_runtime_host(host) or (_debug_mode_enabled() and host == "test"):
|
||||
return
|
||||
admin_key = _current_admin_key()
|
||||
presented = str(request.headers.get("X-Admin-Key", "") or "").strip()
|
||||
@@ -334,6 +437,8 @@ async def _verify_openclaw_hmac(request: Request) -> bool:
|
||||
# Bind request body: digest the raw bytes so any body tampering
|
||||
# invalidates the signature. Empty/absent bodies hash as sha256(b"").
|
||||
body_bytes = await request.body()
|
||||
# Keep the cached body available for downstream handlers that call request.json().
|
||||
request._body = body_bytes
|
||||
body_digest = _hashlib_mod.sha256(body_bytes).hexdigest()
|
||||
|
||||
# Compute expected signature: HMAC-SHA256(secret, METHOD|path|ts|nonce|body_digest)
|
||||
@@ -362,8 +467,8 @@ async def require_openclaw_or_local(request: Request):
|
||||
"""
|
||||
host = (request.client.host or "").lower() if request.client else ""
|
||||
|
||||
# 1. Local loopback — always allowed
|
||||
if _is_local_or_docker(host) or (_debug_mode_enabled() and host == "test"):
|
||||
# 1. Local runtime path — loopback, plus bundled Docker bridge when compose opts in
|
||||
if _is_trusted_local_runtime_host(host) or (_debug_mode_enabled() and host == "test"):
|
||||
return
|
||||
|
||||
# 2. Admin key — full trust
|
||||
@@ -402,7 +507,9 @@ async def require_openclaw_or_local(request: Request):
|
||||
# Startup validators
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_KNOWN_COMPROMISED_PEER_PUSH_SECRET = "Mv63UvLfwqOEVWeRBXjA8MtFl2nEkkhUlLYVHiX1Zzo"
|
||||
_KNOWN_COMPROMISED_PEER_PUSH_SECRET_SHA256 = (
|
||||
"be05bc75350d6e5d2e154e969c4dfc14bab1e48a9661c64ab7a331e0aa96aea7"
|
||||
)
|
||||
|
||||
|
||||
def _validate_admin_startup() -> None:
|
||||
@@ -492,7 +599,11 @@ def _validate_peer_push_secret() -> None:
|
||||
secret = os.environ.get("MESH_PEER_PUSH_SECRET", "").strip()
|
||||
|
||||
# Replace the known-compromised testnet default automatically
|
||||
if secret == _KNOWN_COMPROMISED_PEER_PUSH_SECRET:
|
||||
if (
|
||||
secret
|
||||
and _hashlib_mod.sha256(secret.encode("utf-8")).hexdigest()
|
||||
== _KNOWN_COMPROMISED_PEER_PUSH_SECRET_SHA256
|
||||
):
|
||||
logger.warning(
|
||||
"MESH_PEER_PUSH_SECRET was the publicly-known testnet default — "
|
||||
"auto-generating a secure replacement."
|
||||
@@ -1293,11 +1404,15 @@ def _peer_hmac_url_from_request(request: Request) -> str:
|
||||
|
||||
|
||||
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests."""
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
if not secret:
|
||||
return False
|
||||
"""Verify HMAC-SHA256 peer authentication on push requests.
|
||||
|
||||
Issue #256: ``resolve_peer_key_for_url`` looks up a per-peer secret
|
||||
in ``MESH_PEER_SECRETS`` first, then falls back to the global
|
||||
``MESH_PEER_PUSH_SECRET``. When a peer URL is listed in the per-peer
|
||||
map, only the listed secret is accepted for it — the global secret
|
||||
is ignored, so any peer that knows only the global secret cannot
|
||||
forge a request claiming to be that peer.
|
||||
"""
|
||||
provided = str(request.headers.get("x-peer-hmac", "") or "").strip()
|
||||
if not provided:
|
||||
return False
|
||||
@@ -1306,7 +1421,7 @@ def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
allowed_peers = set(authenticated_push_peer_urls())
|
||||
if not peer_url or peer_url not in allowed_peers:
|
||||
return False
|
||||
peer_key = _derive_peer_key(secret, peer_url)
|
||||
peer_key = resolve_peer_key_for_url(peer_url)
|
||||
if not peer_key:
|
||||
return False
|
||||
|
||||
|
||||
@@ -1,15 +1,5 @@
|
||||
{
|
||||
"feeds": [
|
||||
{
|
||||
"name": "Reuters",
|
||||
"url": "https://www.reutersagency.com/feed/?best-topics=world",
|
||||
"weight": 5
|
||||
},
|
||||
{
|
||||
"name": "AP News",
|
||||
"url": "https://rsshub.app/apnews/topics/world-news",
|
||||
"weight": 5
|
||||
},
|
||||
{
|
||||
"name": "NPR",
|
||||
"url": "https://feeds.npr.org/1004/rss.xml",
|
||||
@@ -99,6 +89,26 @@
|
||||
"name": "Japan Times",
|
||||
"url": "https://www.japantimes.co.jp/feed/",
|
||||
"weight": 3
|
||||
},
|
||||
{
|
||||
"name": "CSM",
|
||||
"url": "https://www.csmonitor.com/rss/world",
|
||||
"weight": 4
|
||||
},
|
||||
{
|
||||
"name": "PBS NewsHour",
|
||||
"url": "https://www.pbs.org/newshour/feeds/rss/world",
|
||||
"weight": 4
|
||||
},
|
||||
{
|
||||
"name": "France 24",
|
||||
"url": "https://www.france24.com/en/rss",
|
||||
"weight": 4
|
||||
},
|
||||
{
|
||||
"name": "DW",
|
||||
"url": "https://rss.dw.com/xml/rss-en-world",
|
||||
"weight": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"_comment": [
|
||||
"SPKI (Subject Public Key Info) pin list for stream.aisstream.io.",
|
||||
"",
|
||||
"Issue #258: AISStream's Let's Encrypt cert expired on 2026-05-20 due to an",
|
||||
"upstream renewal-pipeline failure. Disabling TLS verification entirely",
|
||||
"would let any network attacker MITM the AIS WebSocket and inject fake",
|
||||
"ship positions onto the operator's map (same class as #199 GDELT MITM).",
|
||||
"Instead we pin the leaf certificate's public-key SPKI hash: if normal",
|
||||
"TLS validation fails specifically with CERT_HAS_EXPIRED, ais_proxy.js",
|
||||
"re-checks the leaf cert's SPKI against this list. A match means the",
|
||||
"key is still the genuine AISStream key (Let's Encrypt renewals keep the",
|
||||
"same key unless rekey is requested), so we proceed in 'degraded TLS'",
|
||||
"mode. A mismatch means a real MITM attempt and we refuse the connection.",
|
||||
"",
|
||||
"Format: each entry is a SHA-256 hash of the DER-encoded SPKI bytes,",
|
||||
"encoded as standard base64 (matches the format produced by:",
|
||||
" openssl s_client -connect host:443 | \\",
|
||||
" openssl x509 -pubkey -noout | openssl pkey -pubin -outform DER | \\",
|
||||
" openssl dgst -sha256 -binary | openssl base64",
|
||||
").",
|
||||
"",
|
||||
"When AISStream rotates their server key (rare — Let's Encrypt renewals",
|
||||
"default to keeping the same key), capture the new SPKI and add it to",
|
||||
"this list BEFORE removing the old one. That way operators on the old",
|
||||
"code still validate against the previous key during the transition."
|
||||
],
|
||||
"stream.aisstream.io": [
|
||||
"GJ10H0UPgLrO+2d3ZXROR/TXSVFXKUfRC3QEI2ibEg4="
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
{
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026",
|
||||
"note": "One-shot bootstrap for first-run carrier positions. Once carrier_cache.json exists in the runtime data volume, this seed file is never read again. All subsequent updates come from GDELT (and any future sources) and are written to carrier_cache.json. A year from now, your runtime cache reflects whatever your install has observed since first launch — not these snapshot positions."
|
||||
},
|
||||
"carriers": {
|
||||
"CVN-68": {
|
||||
"lat": 47.5535,
|
||||
"lng": -122.6400,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-76": {
|
||||
"lat": 47.5580,
|
||||
"lng": -122.6360,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Decommissioning)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-69": {
|
||||
"lat": 36.9465,
|
||||
"lng": -76.3265,
|
||||
"heading": 0,
|
||||
"desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-74": {
|
||||
"lat": 36.98,
|
||||
"lng": -76.43,
|
||||
"heading": 0,
|
||||
"desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-75": {
|
||||
"lat": 36.0,
|
||||
"lng": 15.0,
|
||||
"heading": 0,
|
||||
"desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-77": {
|
||||
"lat": 36.5,
|
||||
"lng": -74.0,
|
||||
"heading": 0,
|
||||
"desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-70": {
|
||||
"lat": 32.6840,
|
||||
"lng": -117.1290,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Homeport)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-71": {
|
||||
"lat": 32.6885,
|
||||
"lng": -117.1280,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-72": {
|
||||
"lat": 20.0,
|
||||
"lng": 64.0,
|
||||
"heading": 0,
|
||||
"desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-73": {
|
||||
"lat": 35.2830,
|
||||
"lng": 139.6700,
|
||||
"heading": 180,
|
||||
"desc": "Yokosuka, Japan (Forward deployed)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"_comment": [
|
||||
"Baked-in SHA-256 digests for known Shadowbroker release archives.",
|
||||
"",
|
||||
"Issue #231: the self-updater previously skipped integrity verification",
|
||||
"entirely whenever the MESH_UPDATE_SHA256 env var was unset (which is the",
|
||||
"default — nothing in the install docs tells operators to set it). That",
|
||||
"made the auto-update a supply-chain RCE on any compromise of the GitHub",
|
||||
"release pipeline.",
|
||||
"",
|
||||
"The fix uses a multi-source verification chain mirroring the Tor bundle",
|
||||
"digest approach in #201:",
|
||||
"",
|
||||
" 1. MESH_UPDATE_SHA256 env var (operator override, preserved)",
|
||||
" 2. SHA256SUMS.txt asset published alongside each release (primary —",
|
||||
" the maintainer's release process already publishes this)",
|
||||
" 3. This baked-in digest list (second line of defense for releases",
|
||||
" missing a SHA256SUMS asset, or when the asset can't be fetched)",
|
||||
" 4. HTTPS-only fallback with a loud warning (preserves auto-update",
|
||||
" flow during transient outages so users don't get stuck)",
|
||||
"",
|
||||
"Mismatch from a source that DID respond is fatal — the update is",
|
||||
"refused and the existing install keeps running. Only the 'no source",
|
||||
"reachable at all' case falls back to HTTPS-only.",
|
||||
"",
|
||||
"Format: each entry is keyed by release tag and maps asset filenames",
|
||||
"to their canonical SHA-256 digest (hex, lowercase). The updater",
|
||||
"compares the locally-computed digest of the downloaded asset against",
|
||||
"the value here.",
|
||||
"",
|
||||
"When the maintainer ships a new release, add its digests here BEFORE",
|
||||
"removing the old ones so operators on the old code still validate",
|
||||
"against the previous entries during the transition."
|
||||
],
|
||||
"v0.9.79": {
|
||||
"ShadowBroker_v0.9.79.zip": "f6877c1d66614525315ea82636ce9f7b41178332c4dbf90d27431a1ea1d9cd47",
|
||||
"ShadowBroker_0.9.79_x64-setup.exe": "f7b676ada45cac7da05868b0a353678c9ee700e3abcf456a7c0c038c36da446f",
|
||||
"ShadowBroker_0.9.79_x64_en-US.msi": "e0713c3cdda184cfbea750bfac0d62a35678fec00847e6476f2cac8e7e42046e"
|
||||
},
|
||||
"v0.9.8": {
|
||||
"ShadowBroker_v0.9.8.zip": "183bb5cd62b9b9349d95df5ef7696cb6ca810ab4b991fa9dab6f898af4c7a175",
|
||||
"ShadowBroker_0.9.8_x64-setup.exe": "94a0309862e9c81c92cdcbfea8eec9dbb97eef19ded82b26217b397defbc810c",
|
||||
"ShadowBroker_0.9.8_x64_en-US.msi": "fe22f9d51e4360d74c18a7250c2fbb9ed4fa4c7a884b3ac0d04a21115466386b"
|
||||
},
|
||||
"v0.9.81": {
|
||||
"ShadowBroker_v0.9.81.zip": "af8c87ccdece8fbb9aadc6be63cce10d3fcba74e6d87ef83289dda6d555fd270",
|
||||
"ShadowBroker_0.9.81_x64-setup.exe": "4e866fa0423c0c2470ed32f4809167a7815dc23ee7762b69e95681c1f3a28250",
|
||||
"ShadowBroker_0.9.81_x64_en-US.msi": "8977c9a1c54e1f0d030436be9c4e3d81d766cc0080699eb747649095f360c7ff"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"_comment": [
|
||||
"Pinned SHA-256 digests for the Tor Expert Bundle archives we know how to install.",
|
||||
"Used as the LAST-RESORT verification source when the upstream .sha256sum file is",
|
||||
"unreachable, MITM'd, or doesn't match what we downloaded. Issue #201.",
|
||||
"",
|
||||
"Each entry is keyed by the archive URL (so multiple platforms / versions",
|
||||
"can share this one file) and contains the canonical SHA-256 we trust.",
|
||||
"",
|
||||
"When the project tests a new Tor release, add its digest here in the same",
|
||||
"PR that bumps _TOR_EXPERT_BUNDLE_URLS. Old entries are kept indefinitely so",
|
||||
"users on older versions keep working — we only ever ADD here, never remove."
|
||||
],
|
||||
"https://dist.torproject.org/torbrowser/15.0.11/tor-expert-bundle-windows-x86_64-15.0.11.tar.gz": "PLACEHOLDER_REPLACE_BEFORE_RELEASE",
|
||||
"https://dist.torproject.org/torbrowser/15.0.8/tor-expert-bundle-windows-x86_64-15.0.8.tar.gz": "PLACEHOLDER_REPLACE_BEFORE_RELEASE"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
# Docker named volumes hide files that were baked into /app/data at image build
|
||||
# time. Seed safe, static data into a fresh volume so first-run Docker installs
|
||||
# behave like source installs without bundling local runtime secrets.
|
||||
if [ -d /app/image-data ]; then
|
||||
mkdir -p /app/data
|
||||
find /app/image-data -mindepth 1 -maxdepth 1 -type f | while IFS= read -r src; do
|
||||
dest="/app/data/$(basename "$src")"
|
||||
if [ ! -e "$dest" ]; then
|
||||
cp "$src" "$dest" || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -z "${PRIVACY_CORE_ALLOWED_SHA256:-}" ] && [ -f /app/libprivacy_core.so ]; then
|
||||
PRIVACY_CORE_ALLOWED_SHA256="$(sha256sum /app/libprivacy_core.so | awk '{print $1}')"
|
||||
export PRIVACY_CORE_ALLOWED_SHA256
|
||||
fi
|
||||
|
||||
exec "$@"
|
||||
+105
-1
@@ -1,4 +1,108 @@
|
||||
"""Rate-limit key function for slowapi.
|
||||
|
||||
Issue #287 (tg12): the previous implementation used
|
||||
``slowapi.util.get_remote_address`` which only ever returns
|
||||
``request.client.host``. Behind the bundled Next.js proxy (or any other
|
||||
reverse proxy), every connected operator's ``client.host`` is the
|
||||
frontend container's bridge IP. ``@limiter.limit("120/minute")`` then
|
||||
collapses into one shared bucket for everybody on the same backend —
|
||||
one heavy tab can starve every other operator on the node.
|
||||
|
||||
This module replaces that key function with one that:
|
||||
|
||||
* Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted
|
||||
frontend container (same allowlist used by the Docker bridge
|
||||
local-operator trust path — see ``backend/auth.py`` ``#250``).
|
||||
* Picks the FIRST entry in the XFF chain. That's the client end of
|
||||
the proxy chain, which is the operator we want to bucket on.
|
||||
* Falls back to ``request.client.host`` for any peer that isn't on
|
||||
the trusted-frontend allowlist. Direct hits, unrelated containers,
|
||||
and unknown hosts are bucketed exactly like before — there is no
|
||||
way for an untrusted caller to spoof XFF and steal another
|
||||
operator's rate-limit bucket.
|
||||
|
||||
Single-operator nodes are unaffected: the frontend resolves to one IP,
|
||||
that IP is on the trust list, the XFF header is read, and you get one
|
||||
bucket per operator (i.e. you).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
def _client_host(request: Any) -> str:
|
||||
"""Return the immediate peer's IP, normalised to a lowercase string."""
|
||||
client = getattr(request, "client", None)
|
||||
if client is None:
|
||||
return ""
|
||||
host = getattr(client, "host", "") or ""
|
||||
return host.lower()
|
||||
|
||||
|
||||
def _first_forwarded_for(value: str) -> str:
|
||||
"""Return the first non-empty entry from an ``X-Forwarded-For`` header.
|
||||
|
||||
RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The
|
||||
client end is what we want to bucket on. Empty parts (which appear
|
||||
in some malformed headers) are skipped so we don't end up keying on
|
||||
an empty string.
|
||||
"""
|
||||
for raw in value.split(","):
|
||||
candidate = raw.strip()
|
||||
if candidate:
|
||||
return candidate.lower()
|
||||
return ""
|
||||
|
||||
|
||||
def _is_trusted_frontend_peer(host: str) -> bool:
|
||||
"""True iff ``host`` is one of the resolved trusted-frontend IPs.
|
||||
|
||||
Imported lazily so this module stays usable in unit tests that
|
||||
don't want to pull the whole auth module into scope.
|
||||
"""
|
||||
if not host:
|
||||
return False
|
||||
try:
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
try:
|
||||
trusted_ips = _resolve_trusted_bridge_ips()
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
return host in trusted_ips
|
||||
|
||||
|
||||
def shadowbroker_rate_limit_key(request: Any) -> str:
|
||||
"""slowapi key_func that is proxy-aware on trusted frontend peers only.
|
||||
|
||||
Behaviour matrix:
|
||||
|
||||
* Direct loopback / unknown peer → ``request.client.host``
|
||||
(identical to slowapi's default ``get_remote_address``).
|
||||
* Peer is a trusted frontend container AND ``X-Forwarded-For`` is
|
||||
present → first XFF entry (the actual operator).
|
||||
* Peer is a trusted frontend container but no XFF → fall back to
|
||||
``request.client.host`` (the bridge IP). One shared bucket for
|
||||
everyone in that case, same as before — but you only get there
|
||||
if the trusted frontend forgot to forward XFF, which it won't.
|
||||
"""
|
||||
peer = _client_host(request)
|
||||
if _is_trusted_frontend_peer(peer):
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is not None:
|
||||
xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For")
|
||||
if xff:
|
||||
first = _first_forwarded_for(xff)
|
||||
if first:
|
||||
return first
|
||||
# Untrusted peer (or trusted peer without XFF): match the original
|
||||
# get_remote_address behaviour byte-for-byte.
|
||||
return get_remote_address(request)
|
||||
|
||||
|
||||
limiter = Limiter(key_func=shadowbroker_rate_limit_key)
|
||||
|
||||
+983
-186
File diff suppressed because it is too large
Load Diff
+61
-14
@@ -96,9 +96,10 @@ def _participant_node_enabled() -> bool:
|
||||
def _node_runtime_snapshot() -> dict[str, Any]:
|
||||
with _NODE_RUNTIME_LOCK:
|
||||
return {
|
||||
"node_mode": _NODE_BOOTSTRAP_STATE.get("node_mode", "participant"),
|
||||
"node_mode": _current_node_mode(),
|
||||
"node_enabled": _participant_node_enabled(),
|
||||
"bootstrap": dict(_NODE_BOOTSTRAP_STATE),
|
||||
"private_transport_required": _infonet_private_transport_required(),
|
||||
"bootstrap": {**dict(_NODE_BOOTSTRAP_STATE), "node_mode": _current_node_mode()},
|
||||
"sync_runtime": get_sync_state().to_dict(),
|
||||
"push_runtime": dict(_NODE_PUSH_STATE),
|
||||
}
|
||||
@@ -131,6 +132,30 @@ def _set_participant_node_enabled(enabled: bool) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _infonet_private_transport_required() -> bool:
|
||||
from services.config import get_settings
|
||||
|
||||
return not bool(getattr(get_settings(), "MESH_INFONET_ALLOW_CLEARNET_SYNC", False))
|
||||
|
||||
|
||||
def _infonet_private_transport_error() -> str:
|
||||
return "private Infonet requires onion/RNS transport; no clearnet sync fallback"
|
||||
|
||||
|
||||
def _is_private_infonet_transport(transport: str) -> bool:
|
||||
return str(transport or "").strip().lower() in {"onion", "rns"}
|
||||
|
||||
|
||||
def _configured_bootstrap_seed_peer_urls() -> list[str]:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_router import parse_configured_relay_peers
|
||||
|
||||
settings = get_settings()
|
||||
primary = str(getattr(settings, "MESH_BOOTSTRAP_SEED_PEERS", "") or "").strip()
|
||||
legacy = str(getattr(settings, "MESH_DEFAULT_SYNC_PEERS", "") or "").strip()
|
||||
return parse_configured_relay_peers(primary or legacy)
|
||||
|
||||
|
||||
def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_bootstrap_manifest import load_bootstrap_manifest_from_settings
|
||||
@@ -155,14 +180,17 @@ def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
except Exception:
|
||||
store = PeerStore(DEFAULT_PEER_STORE_PATH)
|
||||
|
||||
private_transport_required = _infonet_private_transport_required()
|
||||
operator_peers = configured_relay_peer_urls()
|
||||
default_sync_peers = parse_configured_relay_peers(
|
||||
str(getattr(get_settings(), "MESH_DEFAULT_SYNC_PEERS", "") or "")
|
||||
)
|
||||
bootstrap_seed_peers = _configured_bootstrap_seed_peer_urls()
|
||||
skipped_clearnet_peers = 0
|
||||
for peer_url in operator_peers:
|
||||
transport = peer_transport_kind(peer_url)
|
||||
if not transport:
|
||||
continue
|
||||
if private_transport_required and not _is_private_infonet_transport(transport):
|
||||
skipped_clearnet_peers += 1
|
||||
continue
|
||||
store.upsert(
|
||||
make_sync_peer_record(
|
||||
peer_url=peer_url,
|
||||
@@ -183,19 +211,22 @@ def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
)
|
||||
|
||||
operator_peer_set = set(operator_peers)
|
||||
for peer_url in default_sync_peers:
|
||||
for peer_url in bootstrap_seed_peers:
|
||||
if peer_url in operator_peer_set:
|
||||
continue
|
||||
transport = peer_transport_kind(peer_url)
|
||||
if not transport:
|
||||
continue
|
||||
if private_transport_required and not _is_private_infonet_transport(transport):
|
||||
skipped_clearnet_peers += 1
|
||||
continue
|
||||
store.upsert(
|
||||
make_bootstrap_peer_record(
|
||||
peer_url=peer_url,
|
||||
transport=transport,
|
||||
role="seed",
|
||||
label="ShadowBroker default seed",
|
||||
signer_id="shadowbroker-default",
|
||||
label="ShadowBroker bootstrap seed",
|
||||
signer_id="shadowbroker-bootstrap",
|
||||
now=timestamp,
|
||||
)
|
||||
)
|
||||
@@ -205,8 +236,8 @@ def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
transport=transport,
|
||||
role="seed",
|
||||
source="bundle",
|
||||
label="ShadowBroker default seed",
|
||||
signer_id="shadowbroker-default",
|
||||
label="ShadowBroker bootstrap seed",
|
||||
signer_id="shadowbroker-bootstrap",
|
||||
now=timestamp,
|
||||
)
|
||||
)
|
||||
@@ -220,6 +251,9 @@ def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
|
||||
if manifest is not None:
|
||||
for peer in manifest.peers:
|
||||
if private_transport_required and not _is_private_infonet_transport(peer.transport):
|
||||
skipped_clearnet_peers += 1
|
||||
continue
|
||||
store.upsert(
|
||||
make_bootstrap_peer_record(
|
||||
peer_url=peer.peer_url,
|
||||
@@ -242,17 +276,30 @@ def _refresh_node_peer_store(*, now: float | None = None) -> dict[str, Any]:
|
||||
)
|
||||
)
|
||||
|
||||
if private_transport_required and skipped_clearnet_peers and not bootstrap_error:
|
||||
bootstrap_error = _infonet_private_transport_error()
|
||||
|
||||
store.save()
|
||||
bootstrap_records = store.records_for_bucket("bootstrap")
|
||||
sync_records = store.records_for_bucket("sync")
|
||||
push_records = store.records_for_bucket("push")
|
||||
if private_transport_required:
|
||||
bootstrap_records = [record for record in bootstrap_records if _is_private_infonet_transport(record.transport)]
|
||||
sync_records = [record for record in sync_records if _is_private_infonet_transport(record.transport)]
|
||||
push_records = [record for record in push_records if _is_private_infonet_transport(record.transport)]
|
||||
snapshot = {
|
||||
"node_mode": mode,
|
||||
"private_transport_required": private_transport_required,
|
||||
"skipped_clearnet_peer_count": skipped_clearnet_peers,
|
||||
"manifest_loaded": manifest is not None,
|
||||
"manifest_signer_id": manifest.signer_id if manifest is not None else "",
|
||||
"manifest_valid_until": int(manifest.valid_until or 0) if manifest is not None else 0,
|
||||
"bootstrap_peer_count": len(store.records_for_bucket("bootstrap")),
|
||||
"sync_peer_count": len(store.records_for_bucket("sync")),
|
||||
"push_peer_count": len(store.records_for_bucket("push")),
|
||||
"bootstrap_peer_count": len(bootstrap_records),
|
||||
"sync_peer_count": len(sync_records),
|
||||
"push_peer_count": len(push_records),
|
||||
"operator_peer_count": len(operator_peers),
|
||||
"default_sync_peer_count": len(default_sync_peers),
|
||||
"bootstrap_seed_peer_count": len(bootstrap_seed_peers),
|
||||
"default_sync_peer_count": len(bootstrap_seed_peers),
|
||||
"last_bootstrap_error": bootstrap_error,
|
||||
}
|
||||
with _NODE_RUNTIME_LOCK:
|
||||
|
||||
@@ -7,26 +7,27 @@ py-modules = []
|
||||
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.9.7"
|
||||
version = "0.9.81"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"apscheduler==3.10.3",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
"cachetools==5.5.2",
|
||||
"cloudscraper==1.2.71",
|
||||
"cryptography>=41.0.0",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.115.12",
|
||||
"feedparser==6.0.10",
|
||||
"httpx==0.28.1",
|
||||
"playwright==1.50.0",
|
||||
"playwright==1.59.0",
|
||||
"playwright-stealth==1.0.6",
|
||||
"pydantic==2.11.1",
|
||||
"pydantic==2.13.3",
|
||||
"pydantic-settings==2.8.1",
|
||||
"pystac-client==0.8.6",
|
||||
"python-dotenv==1.2.2",
|
||||
"requests==2.31.0",
|
||||
"PySocks==1.7.1",
|
||||
"reverse-geocoder==1.5.1",
|
||||
"sgp4==2.23",
|
||||
"sgp4==2.25",
|
||||
"meshtastic>=2.5.0",
|
||||
"orjson>=3.10.0",
|
||||
"paho-mqtt>=1.6.0,<2.0.0",
|
||||
@@ -34,7 +35,7 @@ dependencies = [
|
||||
"slowapi==0.1.9",
|
||||
"vaderSentiment>=3.3.0",
|
||||
"uvicorn==0.34.0",
|
||||
"yfinance==0.2.54",
|
||||
"yfinance==1.3.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -42,7 +43,7 @@ dev = ["pytest>=8.3.4", "pytest-asyncio==0.25.0", "ruff>=0.9.0", "black>=24.0.0"
|
||||
|
||||
[tool.ruff.lint]
|
||||
# The current backend carries historical style debt in large legacy modules.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.7 release.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.81 release.
|
||||
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
||||
|
||||
[tool.black]
|
||||
|
||||
+164
-6
@@ -28,13 +28,46 @@ class TimeMachineToggle(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
@router.get("/api/settings/api-keys", dependencies=[Depends(require_admin)])
|
||||
class MeshtasticMqttUpdate(BaseModel):
|
||||
enabled: bool | None = None
|
||||
broker: str | None = None
|
||||
port: int | None = None
|
||||
username: str | None = None
|
||||
password: str | None = None
|
||||
psk: str | None = None
|
||||
include_default_roots: bool | None = None
|
||||
extra_roots: str | None = None
|
||||
extra_topics: str | None = None
|
||||
|
||||
|
||||
@router.get("/api/settings/api-keys", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_keys(request: Request):
|
||||
from services.api_settings import get_api_keys
|
||||
return get_api_keys()
|
||||
|
||||
|
||||
@router.put("/api/settings/api-keys", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_save_keys(request: Request):
|
||||
from services.api_settings import save_api_keys
|
||||
body = await request.json()
|
||||
if not isinstance(body, dict):
|
||||
return Response(
|
||||
content=json_mod.dumps({"ok": False, "detail": "Expected a JSON object."}),
|
||||
status_code=400,
|
||||
media_type="application/json",
|
||||
)
|
||||
result = save_api_keys({str(k): str(v) for k, v in body.items()})
|
||||
if result.get("ok"):
|
||||
return result
|
||||
return Response(
|
||||
content=json_mod.dumps(result),
|
||||
status_code=400,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/settings/api-keys/meta")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_keys_meta(request: Request):
|
||||
@@ -49,9 +82,40 @@ async def api_get_keys_meta(request: Request):
|
||||
return get_env_path_info()
|
||||
|
||||
|
||||
@router.get("/api/settings/news-feeds")
|
||||
@router.get(
|
||||
"/api/settings/operator-handle",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("60/minute")
|
||||
async def api_get_operator_handle(request: Request):
|
||||
"""Round 7a: return the per-install operator handle so the frontend
|
||||
can include it in browser-direct third-party API calls (Wikipedia /
|
||||
Wikidata via lib/wikimediaClient). The handle is auto-generated on
|
||||
first use; operators can override it via the OPERATOR_HANDLE setting
|
||||
or the env var of the same name.
|
||||
|
||||
Gated on local-operator: legitimate browser usage goes through the
|
||||
Next.js proxy which auto-attaches the admin key; remote scanners get
|
||||
403. The handle itself isn't a secret (it's sent to every third-party
|
||||
API the operator touches), but admin-gating it matches the rest of
|
||||
the settings endpoints and follows least-privilege.
|
||||
"""
|
||||
from services.network_utils import get_operator_handle
|
||||
return {"handle": get_operator_handle()}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_news_feeds(request: Request):
|
||||
"""Issue #252 (tg12): the curated feed inventory is configuration
|
||||
state, not a public data feed. Gated on local-operator so the
|
||||
Tauri shell, the Docker bridge frontend, and any caller with an
|
||||
admin key all see the full list; anonymous LAN/internet callers
|
||||
can no longer enumerate operator source URLs.
|
||||
"""
|
||||
from services.news_feed_config import get_feeds
|
||||
return get_feeds()
|
||||
|
||||
@@ -85,9 +149,18 @@ async def api_reset_news_feeds(request: Request):
|
||||
@router.get("/api/settings/node")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_node_settings(request: Request):
|
||||
"""Issue #243 (tg12): node_mode and node_enabled are operational
|
||||
posture. Anonymous callers receive an empty stub; authenticated
|
||||
callers (local-operator or admin/scoped token) see the full
|
||||
state. See the canonical handler in backend/main.py for the full
|
||||
rationale.
|
||||
"""
|
||||
import asyncio
|
||||
from auth import _scoped_view_authenticated
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
if not _scoped_view_authenticated(request, "node"):
|
||||
return {}
|
||||
return {
|
||||
**data,
|
||||
"node_mode": _current_node_mode(),
|
||||
@@ -99,12 +172,97 @@ async def api_get_node_settings(request: Request):
|
||||
@limiter.limit("10/minute")
|
||||
async def api_set_node_settings(request: Request, body: NodeSettingsUpdate):
|
||||
_refresh_node_peer_store()
|
||||
return _set_participant_node_enabled(bool(body.enabled))
|
||||
if bool(body.enabled):
|
||||
try:
|
||||
from services.transport_lane_isolation import disable_public_mesh_lane
|
||||
|
||||
disable_public_mesh_lane(reason="private_node_enabled")
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to disable public Mesh while enabling private node: %s", exc)
|
||||
result = _set_participant_node_enabled(bool(body.enabled))
|
||||
if bool(body.enabled):
|
||||
try:
|
||||
import main as _main
|
||||
|
||||
_main._kick_public_sync_background("operator_enable")
|
||||
except Exception:
|
||||
logger.debug("Unable to kick Infonet sync after node enable", exc_info=True)
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/api/settings/timemachine")
|
||||
def _meshtastic_runtime_snapshot() -> dict[str, Any]:
|
||||
from services.meshtastic_mqtt_settings import redacted_meshtastic_mqtt_settings
|
||||
from services.sigint_bridge import sigint_grid
|
||||
|
||||
return {
|
||||
**redacted_meshtastic_mqtt_settings(),
|
||||
"runtime": sigint_grid.mesh.status(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/settings/meshtastic-mqtt", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_meshtastic_mqtt_settings(request: Request):
|
||||
return _meshtastic_runtime_snapshot()
|
||||
|
||||
|
||||
@router.put("/api/settings/meshtastic-mqtt", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_set_meshtastic_mqtt_settings(request: Request, body: MeshtasticMqttUpdate):
|
||||
from services.meshtastic_mqtt_settings import write_meshtastic_mqtt_settings
|
||||
from services.sigint_bridge import sigint_grid
|
||||
|
||||
updates = body.model_dump(exclude_unset=True)
|
||||
# Empty secret fields mean "keep existing"; explicit non-empty values replace.
|
||||
if updates.get("password") == "":
|
||||
updates.pop("password", None)
|
||||
if updates.get("psk") == "":
|
||||
updates.pop("psk", None)
|
||||
|
||||
enabled_requested = updates.get("enabled")
|
||||
settings = write_meshtastic_mqtt_settings(**updates)
|
||||
if isinstance(enabled_requested, bool):
|
||||
logger.info("Meshtastic MQTT settings update: enabled=%s", enabled_requested)
|
||||
|
||||
if enabled_requested is True:
|
||||
# Public MQTT and Wormhole are intentionally mutually exclusive lanes.
|
||||
try:
|
||||
from services.node_settings import write_node_settings
|
||||
from services.wormhole_settings import write_wormhole_settings
|
||||
from services.wormhole_supervisor import disconnect_wormhole
|
||||
|
||||
write_wormhole_settings(enabled=False)
|
||||
disconnect_wormhole(reason="public_mesh_enabled")
|
||||
write_node_settings(enabled=False)
|
||||
_set_participant_node_enabled(False)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to disable private mesh lane while enabling public mesh: %s", exc)
|
||||
|
||||
if bool(settings.get("enabled")):
|
||||
if sigint_grid.mesh.is_running():
|
||||
sigint_grid.mesh.stop()
|
||||
threading.Timer(1.0, sigint_grid.mesh.start).start()
|
||||
else:
|
||||
sigint_grid.mesh.start()
|
||||
else:
|
||||
sigint_grid.mesh.stop()
|
||||
|
||||
return _meshtastic_runtime_snapshot()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/timemachine",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_timemachine_settings(request: Request):
|
||||
"""Issue #253 (tg12): archival-capture posture is operationally
|
||||
sensitive — it tells a remote caller whether this deployment is
|
||||
retaining replayable historical surveillance data. Gated on
|
||||
local-operator so the Tauri shell and Docker bridge frontend
|
||||
still see the toggle state, but anonymous LAN/internet callers
|
||||
can no longer fingerprint Time Machine state.
|
||||
"""
|
||||
import asyncio
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
@@ -261,8 +419,8 @@ async def api_reset_all_agent_credentials(request: Request):
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"new_hmac_secret": new_secret,
|
||||
"detail": "All agent credentials have been reset. Reconfigure your agent with the new credentials.",
|
||||
"hmac_regenerated": True,
|
||||
"detail": "All agent credentials have been reset. Use the agent connection screen to generate or reveal replacement credentials.",
|
||||
**results,
|
||||
}
|
||||
|
||||
|
||||
+205
-46
@@ -18,6 +18,12 @@ from auth import require_local_operator, require_openclaw_or_local
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import latest_data as _latest_data
|
||||
|
||||
|
||||
|
||||
def _ai_intel_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("ai-intel")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
@@ -379,14 +385,13 @@ async def api_refresh_layer_feed(request: Request, layer_id: str):
|
||||
# Agent Actions endpoint — frontend polls this for UI commands from the agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/api/ai/agent-actions")
|
||||
@router.get("/api/ai/agent-actions", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("120/minute")
|
||||
async def get_agent_actions(request: Request):
|
||||
"""Frontend polls for pending agent display actions (destructive read).
|
||||
|
||||
No auth required — this only contains display directives (show image,
|
||||
fly to location), not sensitive data. The agent authenticates when
|
||||
pushing actions through the command channel.
|
||||
Local operator access is required because polling destructively drains
|
||||
the shared operator action queue.
|
||||
"""
|
||||
actions = pop_agent_actions()
|
||||
return {"ok": True, "actions": actions}
|
||||
@@ -448,7 +453,7 @@ async def ai_satellite_images(
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=10,
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (ai-intel)"},
|
||||
headers={"User-Agent": _ai_intel_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
features = resp.json().get("features", [])
|
||||
@@ -1585,7 +1590,7 @@ async def agent_tool_manifest(request: Request):
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.7",
|
||||
"version": "0.9.81",
|
||||
"access_tier": access_tier,
|
||||
"available_commands": available_commands,
|
||||
"transport": {
|
||||
@@ -2221,7 +2226,7 @@ async def api_capabilities(request: Request):
|
||||
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.7",
|
||||
"version": "0.9.81",
|
||||
"auth": {
|
||||
"method": "HMAC-SHA256",
|
||||
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
||||
@@ -2516,45 +2521,85 @@ async def api_capabilities(request: Request):
|
||||
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
||||
# These endpoints manage the HMAC secret itself, so they MUST require
|
||||
# local operator access to prevent privilege escalation.
|
||||
#
|
||||
# Issue #302 (tg12): pre-fix, GET /api/ai/connect-info had two problems:
|
||||
#
|
||||
# 1. ``?reveal=true`` made the full secret travel through every operator
|
||||
# page-load that opened the Connect modal. Even gated to
|
||||
# ``require_local_operator``, that put the secret into browser
|
||||
# history, dev-tools network panels, browser disk caches, HAR
|
||||
# exports, and screen captures. Every time the modal opened.
|
||||
#
|
||||
# 2. The same GET endpoint auto-bootstrapped (generated + persisted)
|
||||
# the secret on first read. Side effects on a GET are a footgun:
|
||||
# browser prefetchers, mirror tools, and casual curl-from-history
|
||||
# would all silently mint+persist a fresh secret. (Gated, but
|
||||
# still surprising — and noisy in the audit log.)
|
||||
#
|
||||
# Resolution:
|
||||
#
|
||||
# GET /api/ai/connect-info — always returns the MASKED
|
||||
# secret. No ?reveal param.
|
||||
# No auto-bootstrap; if the
|
||||
# secret is missing,
|
||||
# ``hmac_secret_set: false``
|
||||
# tells the frontend to call
|
||||
# /bootstrap.
|
||||
#
|
||||
# POST /api/ai/connect-info/bootstrap — NEW. Generates + persists the
|
||||
# secret if missing. Idempotent.
|
||||
# Returns metadata only, never
|
||||
# the full secret.
|
||||
#
|
||||
# POST /api/ai/connect-info/reveal — NEW. Returns the full secret in
|
||||
# the body with strict
|
||||
# ``Cache-Control: no-store,
|
||||
# no-cache, must-revalidate``
|
||||
# + ``Pragma: no-cache`` so
|
||||
# it does not land in browser
|
||||
# caches. POST means it does
|
||||
# not land in URL history.
|
||||
#
|
||||
# POST /api/ai/connect-info/regenerate — keeps existing one-time-reveal
|
||||
# behavior (regenerate IS a
|
||||
# deliberate destructive action
|
||||
# the operator triggered, so
|
||||
# displaying the new secret
|
||||
# once is the only path that
|
||||
# makes the operation useful).
|
||||
# Same no-store headers added.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
# Cache-Control headers that should accompany every response carrying the
|
||||
# full HMAC secret. Reused across the reveal + regenerate endpoints so a
|
||||
# future refactor that splits or renames them can't forget the headers.
|
||||
_NO_STORE_HEADERS = {
|
||||
"Cache-Control": "no-store, no-cache, must-revalidate, private",
|
||||
"Pragma": "no-cache",
|
||||
"Expires": "0",
|
||||
}
|
||||
|
||||
The HMAC secret is masked by default. Pass ?reveal=true to see the full key.
|
||||
Private keys are NEVER returned.
|
||||
|
||||
def _mask_hmac_secret(secret: str) -> str:
|
||||
"""Return a fingerprint-style mask (first6 + bullets + last4) suitable
|
||||
for display in the UI before the operator clicks Reveal."""
|
||||
if not secret:
|
||||
return ""
|
||||
if len(secret) > 10:
|
||||
return secret[:6] + "••••••••" + secret[-4:]
|
||||
return "••••••••"
|
||||
|
||||
|
||||
def _connect_info_metadata(settings) -> dict:
|
||||
"""Return everything the Connect modal needs EXCEPT the secret itself.
|
||||
|
||||
Shared between GET /api/ai/connect-info (where the full secret is
|
||||
masked) and POST /api/ai/connect-info/bootstrap (where the operator
|
||||
just generated a secret but we don't return it inline — they have to
|
||||
call /reveal to see it).
|
||||
"""
|
||||
import os
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
|
||||
# Auto-generate if not set
|
||||
if not hmac_secret:
|
||||
hmac_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", hmac_secret)
|
||||
# Clear settings cache so next read picks up the new value
|
||||
get_settings.cache_clear()
|
||||
|
||||
masked = hmac_secret[:6] + "••••••••" + hmac_secret[-4:] if len(hmac_secret) > 10 else "••••••••"
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret if reveal else masked,
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": True,
|
||||
"auto_generated_this_call": not bool(settings.OPENCLAW_HMAC_SECRET or ""),
|
||||
"notes": [
|
||||
"If no HMAC secret exists yet, this endpoint bootstraps one and persists it to .env.",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
"access_tier": access_tier,
|
||||
"trust_model": {
|
||||
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
||||
@@ -2608,24 +2653,138 @@ async def get_connect_info(request: Request, reveal: bool = False):
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working."""
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
|
||||
The HMAC secret is always returned as a fingerprint mask
|
||||
(``first6 + bullets + last4``); the full value is only ever served by
|
||||
``POST /api/ai/connect-info/reveal`` (see #302). When the secret has
|
||||
not been bootstrapped yet, ``hmac_secret_set`` is false and the
|
||||
frontend should call ``POST /api/ai/connect-info/bootstrap``.
|
||||
|
||||
Private keys are NEVER returned.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": False,
|
||||
"notes": [
|
||||
"Call POST /api/ai/connect-info/bootstrap to mint a secret on first use.",
|
||||
"Call POST /api/ai/connect-info/reveal to see the full secret (no-store).",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
**_connect_info_metadata(settings),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/bootstrap", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def bootstrap_hmac_secret(request: Request):
|
||||
"""Mint and persist the OpenClaw HMAC secret if it isn't already set.
|
||||
|
||||
Idempotent: if a secret already exists, returns ``generated: false``
|
||||
and leaves the existing secret untouched. Never returns the secret
|
||||
value in the response body — the operator calls
|
||||
``POST /api/ai/connect-info/reveal`` to see it.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
existing = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if existing:
|
||||
return {
|
||||
"ok": True,
|
||||
"generated": False,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(existing),
|
||||
"detail": "HMAC secret already configured. Use /reveal to see it.",
|
||||
}
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
"generated": True,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret generated. Call /reveal to copy it into your OpenClaw config.",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/reveal", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def reveal_hmac_secret(request: Request):
|
||||
"""Return the full HMAC secret in the response body.
|
||||
|
||||
POST (not GET) so the secret never lands in URL history, access logs,
|
||||
or browser visit history. Strict ``Cache-Control: no-store`` headers
|
||||
prevent intermediaries from persisting the response. Returns 404 if
|
||||
no secret has been bootstrapped — the frontend should call
|
||||
``POST /api/ai/connect-info/bootstrap`` first.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if not hmac_secret:
|
||||
raise HTTPException(
|
||||
404,
|
||||
"No HMAC secret configured. Call POST /api/ai/connect-info/bootstrap first.",
|
||||
)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working.
|
||||
|
||||
Returns the new secret in the response body — this is the only
|
||||
operation where the full secret travels back through the response,
|
||||
because regenerating IS a deliberate destructive action the operator
|
||||
triggered and they need to see the new value once to update their
|
||||
OpenClaw configuration. Strict ``Cache-Control: no-store`` headers
|
||||
keep it from being persisted by browser caches, proxies, or HAR
|
||||
capture tooling.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def set_access_tier(request: Request, body: dict):
|
||||
|
||||
+132
-24
@@ -11,6 +11,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_CCTV_PROXY_CONNECT_TIMEOUT_S = 2.0
|
||||
|
||||
_CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"s3-eu-west-1.amazonaws.com",
|
||||
"jamcams.tfl.gov.uk",
|
||||
@@ -46,13 +48,20 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"infocar.dgt.es",
|
||||
"informo.madrid.es",
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com",
|
||||
"www.lakecountypassage.com",
|
||||
"webcam.forkswa.com",
|
||||
"webcam.sunmountainlodge.com",
|
||||
"www.nps.gov",
|
||||
"home.lewiscounty.com",
|
||||
"www.seattle.gov",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _CCTVProxyProfile:
|
||||
name: str
|
||||
timeout: tuple = (5.0, 10.0)
|
||||
timeout: tuple = (_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0)
|
||||
cache_seconds: int = 30
|
||||
headers: dict = field(default_factory=dict)
|
||||
|
||||
@@ -80,74 +89,89 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
path = str(parsed.path or "").strip().lower()
|
||||
|
||||
if host in {"jamcams.tfl.gov.uk", "s3-eu-west-1.amazonaws.com"}:
|
||||
return _CCTVProxyProfile(name="tfl-jamcam", timeout=(5.0, 20.0), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="tfl-jamcam", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=15,
|
||||
headers={"Accept": "video/mp4,image/avif,image/webp,image/apng,image/*,*/*;q=0.8", "Referer": "https://tfl.gov.uk/"})
|
||||
if host == "images.data.gov.sg":
|
||||
return _CCTVProxyProfile(name="lta-singapore", timeout=(5.0, 10.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="lta-singapore", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 10.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
if host == "cctv.austinmobility.io":
|
||||
return _CCTVProxyProfile(name="austin-mobility", timeout=(5.0, 8.0), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="austin-mobility", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://data.mobility.austin.gov/", "Origin": "https://data.mobility.austin.gov"})
|
||||
if host == "webcams.nyctmc.org":
|
||||
return _CCTVProxyProfile(name="nyc-dot", timeout=(5.0, 10.0), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="nyc-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 10.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
if host in {"cwwp2.dot.ca.gov", "wzmedia.dot.ca.gov"}:
|
||||
return _CCTVProxyProfile(name="caltrans", timeout=(5.0, 15.0), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="caltrans", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=15,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,image/*,*/*;q=0.8",
|
||||
"Referer": "https://cwwp2.dot.ca.gov/"})
|
||||
if host in {"images.wsdot.wa.gov", "olypen.com", "flyykm.com", "cam.pangbornairport.com"}:
|
||||
return _CCTVProxyProfile(name="wsdot", timeout=(5.0, 12.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="wsdot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
if host in {"www.lakecountypassage.com", "webcam.forkswa.com", "webcam.sunmountainlodge.com", "home.lewiscounty.com", "www.seattle.gov"}:
|
||||
return _CCTVProxyProfile(name="regional-cctv-image", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 10.0), cache_seconds=45,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": f"https://{host}/"})
|
||||
if host == "www.nps.gov":
|
||||
return _CCTVProxyProfile(name="nps-webcam", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 10.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.nps.gov/"})
|
||||
if host in {"navigator-c2c.dot.ga.gov", "navigator-c2c.ga.gov", "navigator-csc.dot.ga.gov"}:
|
||||
read_timeout = 18.0 if "/snapshots/" in path else 12.0
|
||||
return _CCTVProxyProfile(name="gdot-snapshot", timeout=(5.0, read_timeout), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="gdot-snapshot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, read_timeout), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
if host == "511ga.org":
|
||||
return _CCTVProxyProfile(name="gdot-511ga-image", timeout=(5.0, 12.0), cache_seconds=15,
|
||||
return _CCTVProxyProfile(name="gdot-511ga-image", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511ga.org/cctv"})
|
||||
if host.startswith("vss") and host.endswith("dot.ga.gov"):
|
||||
return _CCTVProxyProfile(name="gdot-hls", timeout=(5.0, 20.0), cache_seconds=10,
|
||||
return _CCTVProxyProfile(name="gdot-hls", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=10,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
return _CCTVProxyProfile(name="illinois-dot", timeout=(5.0, 12.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="illinois-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
if host in {"mdotjboss.state.mi.us", "micamerasimages.net"}:
|
||||
return _CCTVProxyProfile(name="michigan-dot", timeout=(5.0, 12.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="michigan-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://mdotjboss.state.mi.us/"})
|
||||
if host in {"publicstreamer1.cotrip.org", "publicstreamer2.cotrip.org",
|
||||
"publicstreamer3.cotrip.org", "publicstreamer4.cotrip.org"}:
|
||||
return _CCTVProxyProfile(name="cotrip-hls", timeout=(5.0, 20.0), cache_seconds=10,
|
||||
return _CCTVProxyProfile(name="cotrip-hls", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=10,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "https://www.cotrip.org/"})
|
||||
if host == "cocam.carsprogram.org":
|
||||
return _CCTVProxyProfile(name="cotrip-preview", timeout=(5.0, 12.0), cache_seconds=20,
|
||||
return _CCTVProxyProfile(name="cotrip-preview", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=20,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.cotrip.org/"})
|
||||
if host in {"tripcheck.com", "www.tripcheck.com"}:
|
||||
return _CCTVProxyProfile(name="odot-tripcheck", timeout=(5.0, 12.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="odot-tripcheck", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
if host == "infocar.dgt.es":
|
||||
return _CCTVProxyProfile(name="dgt-spain", timeout=(5.0, 8.0), cache_seconds=60,
|
||||
return _CCTVProxyProfile(name="dgt-spain", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://infocar.dgt.es/"})
|
||||
if host == "informo.madrid.es":
|
||||
return _CCTVProxyProfile(name="madrid-city", timeout=(5.0, 12.0), cache_seconds=30,
|
||||
return _CCTVProxyProfile(name="madrid-city", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://informo.madrid.es/"})
|
||||
if host == "www.windy.com":
|
||||
return _CCTVProxyProfile(name="windy-webcams", timeout=(5.0, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
return _CCTVProxyProfile(name="generic-cctv", timeout=(5.0, 10.0), cache_seconds=30,
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(name="windy-webcams", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.windy.com/"})
|
||||
return _CCTVProxyProfile(name="generic-cctv", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=30,
|
||||
headers={"Accept": "*/*"})
|
||||
|
||||
|
||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)", **profile.headers}
|
||||
# Round 7a: per-install operator handle. Mozilla/5.0 prefix retained
|
||||
# because many CCTV endpoints sniff for a browser-like prefix.
|
||||
from services.network_utils import outbound_user_agent
|
||||
headers = {
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||
**profile.headers,
|
||||
}
|
||||
range_header = request.headers.get("range")
|
||||
if range_header:
|
||||
headers["Range"] = range_header
|
||||
@@ -173,11 +197,68 @@ def _cctv_response_headers(resp, cache_seconds: int, include_length: bool = True
|
||||
return headers
|
||||
|
||||
|
||||
# Maximum number of redirects we'll follow on the CCTV upstream. Each hop is
|
||||
# re-validated against _cctv_host_allowed() before continuing, so this caps
|
||||
# the redirect-chain SSRF blast radius.
|
||||
_CCTV_MAX_REDIRECTS = 5
|
||||
|
||||
|
||||
def _fetch_cctv_upstream_response(request: Request, target_url: str, profile: _CCTVProxyProfile):
|
||||
"""Fetch an upstream CCTV URL, following redirects manually with host re-validation.
|
||||
|
||||
Why manual redirect following:
|
||||
The original code used ``allow_redirects=True``, which only validated
|
||||
the initial caller-supplied URL host against the allowlist. An attacker
|
||||
could submit an allowed host that 302-redirected to an internal address
|
||||
(e.g. ``http://localhost:8000/api/...`` or a private RFC1918 range),
|
||||
and the backend would dutifully follow and proxy the response — a
|
||||
classic open-redirect-to-SSRF chain.
|
||||
|
||||
With this loop, we re-run ``_cctv_host_allowed()`` on every hop's
|
||||
``Location`` header. A redirect to a host that isn't on the allowlist
|
||||
is rejected with 502 rather than silently followed.
|
||||
"""
|
||||
import requests as _req
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
headers = _cctv_upstream_headers(request, profile)
|
||||
current_url = target_url
|
||||
hops = 0
|
||||
try:
|
||||
resp = _req.get(target_url, timeout=profile.timeout, stream=True, allow_redirects=True, headers=headers)
|
||||
while True:
|
||||
resp = _req.get(
|
||||
current_url,
|
||||
timeout=profile.timeout,
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
headers=headers,
|
||||
)
|
||||
# Redirect handling — re-validate the next-hop host before following.
|
||||
if resp.is_redirect or resp.status_code in (301, 302, 303, 307, 308):
|
||||
location = resp.headers.get("Location", "")
|
||||
resp.close()
|
||||
if hops >= _CCTV_MAX_REDIRECTS:
|
||||
logger.warning(
|
||||
"CCTV upstream redirect chain exceeded limit [%s] %s",
|
||||
profile.name, target_url,
|
||||
)
|
||||
raise HTTPException(status_code=502, detail="Upstream redirect chain too long")
|
||||
if not location:
|
||||
raise HTTPException(status_code=502, detail="Upstream redirect missing Location")
|
||||
next_url = urljoin(current_url, location)
|
||||
next_parsed = urlparse(next_url)
|
||||
if next_parsed.scheme not in ("http", "https"):
|
||||
raise HTTPException(status_code=502, detail="Upstream redirect to non-HTTP scheme")
|
||||
if not _cctv_host_allowed(next_parsed.hostname):
|
||||
logger.warning(
|
||||
"CCTV upstream redirect to disallowed host [%s] %s -> %s",
|
||||
profile.name, current_url, next_url,
|
||||
)
|
||||
raise HTTPException(status_code=502, detail="Upstream redirect to disallowed host")
|
||||
current_url = next_url
|
||||
hops += 1
|
||||
continue
|
||||
break
|
||||
except _req.exceptions.Timeout as exc:
|
||||
logger.warning("CCTV upstream timeout [%s] %s", profile.name, target_url)
|
||||
raise HTTPException(status_code=504, detail="Upstream timeout") from exc
|
||||
@@ -221,13 +302,40 @@ def _rewrite_cctv_hls_playlist(base_url: str, body: str) -> str:
|
||||
return "\n".join(rewritten_lines) + ("\n" if body.endswith("\n") else "")
|
||||
|
||||
|
||||
def _infer_cctv_media_type_from_url(target_url: str, content_type: str) -> str:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
clean_type = str(content_type or "").split(";", 1)[0].strip().lower()
|
||||
if clean_type and clean_type not in {"application/octet-stream", "binary/octet-stream"}:
|
||||
return content_type
|
||||
path = str(urlparse(target_url).path or "").lower()
|
||||
if path.endswith((".jpg", ".jpeg")):
|
||||
return "image/jpeg"
|
||||
if path.endswith(".png"):
|
||||
return "image/png"
|
||||
if path.endswith(".webp"):
|
||||
return "image/webp"
|
||||
if path.endswith(".gif"):
|
||||
return "image/gif"
|
||||
if path.endswith(".mp4"):
|
||||
return "video/mp4"
|
||||
if path.endswith((".m3u8", ".m3u")):
|
||||
return "application/vnd.apple.mpegurl"
|
||||
if path.endswith((".mjpg", ".mjpeg")):
|
||||
return "multipart/x-mixed-replace"
|
||||
return content_type or "application/octet-stream"
|
||||
|
||||
|
||||
def _proxy_cctv_media_response(request: Request, target_url: str):
|
||||
from urllib.parse import urlparse
|
||||
from fastapi.responses import Response
|
||||
parsed = urlparse(target_url)
|
||||
profile = _cctv_proxy_profile_for_url(target_url)
|
||||
resp = _fetch_cctv_upstream_response(request, target_url, profile)
|
||||
content_type = resp.headers.get("Content-Type", "application/octet-stream")
|
||||
content_type = _infer_cctv_media_type_from_url(
|
||||
target_url,
|
||||
resp.headers.get("Content-Type", "application/octet-stream"),
|
||||
)
|
||||
is_hls_playlist = (
|
||||
".m3u8" in str(parsed.path or "").lower()
|
||||
or "mpegurl" in content_type.lower()
|
||||
|
||||
+297
-21
@@ -98,6 +98,88 @@ def _current_etag(prefix: str = "") -> str:
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||
# filtering. Light reference layers (datacenters, military_bases,
|
||||
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||
# in these sets — they ship world-scale even when bounds are supplied so
|
||||
# panning never reveals an "empty world" of static infrastructure.
|
||||
#
|
||||
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||
# is byte-for-byte identical to the pre-#288 behavior.
|
||||
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"sigint",
|
||||
"trains",
|
||||
)
|
||||
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"gdelt",
|
||||
"firms_fires",
|
||||
"kiwisdr",
|
||||
"scanners",
|
||||
"psk_reporter",
|
||||
)
|
||||
|
||||
|
||||
def _has_full_bbox(s, w, n, e) -> bool:
|
||||
return None not in (s, w, n, e)
|
||||
|
||||
|
||||
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||
"""Quantize bbox to 1° before mixing into the ETag.
|
||||
|
||||
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||
quantizing here means small mouse drags don't blow the ETag cache
|
||||
on the client. Full-world bounds collapse to a single suffix.
|
||||
"""
|
||||
if not _has_full_bbox(s, w, n, e):
|
||||
return ""
|
||||
try:
|
||||
ss = math.floor(float(s))
|
||||
ww = math.floor(float(w))
|
||||
nn = math.ceil(float(n))
|
||||
ee = math.ceil(float(e))
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
# If the requested window covers basically the whole world, treat it as
|
||||
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||
# same ETag and benefit from the existing 304 path.
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return ""
|
||||
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||
|
||||
|
||||
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||
s: float, w: float, n: float, e: float) -> dict:
|
||||
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||
|
||||
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||
pad and handles antimeridian crossings.
|
||||
"""
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||
# guarantees the response matches the no-params shape.
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return payload
|
||||
for key in heavy_keys:
|
||||
items = payload.get(key)
|
||||
if not isinstance(items, list) or not items:
|
||||
continue
|
||||
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||
return payload
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
@@ -185,11 +267,29 @@ def _bbox_spans(s, w, n, e) -> tuple:
|
||||
return lat_span, max(0.0, lng_span)
|
||||
|
||||
|
||||
def _downsample_points(items: list, max_items: int) -> list:
|
||||
if max_items <= 0 or len(items) <= max_items:
|
||||
def _cap_startup_items(items: list | None, max_items: int) -> list:
|
||||
if not items:
|
||||
return []
|
||||
if len(items) <= max_items:
|
||||
return items
|
||||
step = len(items) / float(max_items)
|
||||
return [items[min(len(items) - 1, int(i * step))] for i in range(max_items)]
|
||||
return items[:max_items]
|
||||
|
||||
|
||||
def _cap_fast_startup_payload(payload: dict) -> dict:
|
||||
capped = dict(payload)
|
||||
capped["commercial_flights"] = _cap_startup_items(capped.get("commercial_flights"), 800)
|
||||
capped["private_flights"] = _cap_startup_items(capped.get("private_flights"), 300)
|
||||
capped["private_jets"] = _cap_startup_items(capped.get("private_jets"), 150)
|
||||
capped["ships"] = _cap_startup_items(capped.get("ships"), 1500)
|
||||
capped["cctv"] = []
|
||||
capped["sigint"] = _cap_startup_items(capped.get("sigint"), 500)
|
||||
capped["trains"] = _cap_startup_items(capped.get("trains"), 100)
|
||||
capped["startup_payload"] = True
|
||||
return capped
|
||||
|
||||
|
||||
def _cap_fast_dashboard_payload(payload: dict) -> dict:
|
||||
return payload
|
||||
|
||||
|
||||
def _world_and_continental_scale(has_bbox: bool, s, w, n, e) -> tuple:
|
||||
@@ -248,7 +348,7 @@ async def force_refresh(request: Request):
|
||||
return {"status": "refreshing in background"}
|
||||
|
||||
|
||||
@router.post("/api/ais/feed")
|
||||
@router.post("/api/ais/feed", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def ais_feed(request: Request):
|
||||
"""Accept AIS-catcher HTTP JSON feed (POST decoded AIS messages)."""
|
||||
@@ -264,6 +364,20 @@ async def ais_feed(request: Request):
|
||||
return {"status": "ok", "ingested": count}
|
||||
|
||||
|
||||
@router.get("/api/trail/flight/{icao24}")
|
||||
@limiter.limit("120/minute")
|
||||
async def get_selected_flight_trail(icao24: str, request: Request): # noqa: ARG001
|
||||
from services.fetchers.flights import get_flight_trail
|
||||
return {"id": icao24, "trail": get_flight_trail(icao24)}
|
||||
|
||||
|
||||
@router.get("/api/trail/ship/{mmsi}")
|
||||
@limiter.limit("120/minute")
|
||||
async def get_selected_ship_trail(mmsi: int, request: Request): # noqa: ARG001
|
||||
from services.ais_stream import get_vessel_trail
|
||||
return {"id": mmsi, "trail": get_vessel_trail(mmsi)}
|
||||
|
||||
|
||||
@router.post("/api/viewport")
|
||||
@limiter.limit("60/minute")
|
||||
async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001
|
||||
@@ -272,7 +386,7 @@ async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.post("/api/layers")
|
||||
@router.post("/api/layers", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def update_layers(update: LayerUpdate, request: Request):
|
||||
"""Receive frontend layer toggle state. Starts/stops streams accordingly."""
|
||||
@@ -303,11 +417,30 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
logger.info("AIS stream started (ship layer enabled)")
|
||||
from services.sigint_bridge import sigint_grid
|
||||
if old_mesh and not new_mesh:
|
||||
sigint_grid.mesh.stop()
|
||||
logger.info("Meshtastic MQTT bridge stopped (layer disabled)")
|
||||
try:
|
||||
from services.meshtastic_mqtt_settings import mqtt_bridge_enabled
|
||||
keep_chat_running = mqtt_bridge_enabled()
|
||||
except Exception:
|
||||
keep_chat_running = False
|
||||
if keep_chat_running:
|
||||
logger.info("Meshtastic map layer disabled; MQTT bridge kept running for MeshChat")
|
||||
else:
|
||||
sigint_grid.mesh.stop()
|
||||
logger.info("Meshtastic MQTT bridge stopped (layer disabled)")
|
||||
elif not old_mesh and new_mesh:
|
||||
sigint_grid.mesh.start()
|
||||
logger.info("Meshtastic MQTT bridge started (layer enabled)")
|
||||
try:
|
||||
from services.meshtastic_mqtt_settings import mqtt_bridge_enabled
|
||||
mqtt_enabled = mqtt_bridge_enabled()
|
||||
except Exception:
|
||||
mqtt_enabled = False
|
||||
if mqtt_enabled:
|
||||
sigint_grid.mesh.start()
|
||||
logger.info("Meshtastic MQTT bridge started (layer enabled)")
|
||||
else:
|
||||
logger.info(
|
||||
"Meshtastic layer enabled; MQTT bridge remains disabled "
|
||||
"(set MESH_MQTT_ENABLED=true to participate in the public broker)"
|
||||
)
|
||||
if old_aprs and not new_aprs:
|
||||
sigint_grid.aprs.stop()
|
||||
logger.info("APRS bridge stopped (layer disabled)")
|
||||
@@ -326,16 +459,116 @@ async def live_data(request: Request):
|
||||
return get_latest_data()
|
||||
|
||||
|
||||
@router.get("/api/bootstrap/critical")
|
||||
@limiter.limit("180/minute")
|
||||
async def bootstrap_critical(request: Request):
|
||||
"""Cached first-paint payload for the dashboard.
|
||||
|
||||
This endpoint is intentionally memory-only: no upstream calls, no refresh,
|
||||
and a bounded response. It exists so the map and threat feed can paint
|
||||
before slower panels and background enrichers finish warming up.
|
||||
"""
|
||||
etag = _current_etag(prefix="bootstrap|critical|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (
|
||||
active_layers,
|
||||
get_latest_data_subset_refs,
|
||||
get_source_timestamps_snapshot,
|
||||
)
|
||||
|
||||
d = get_latest_data_subset_refs(
|
||||
"last_updated", "commercial_flights", "military_flights", "private_flights",
|
||||
"private_jets", "tracked_flights", "ships", "uavs", "liveuamap", "gps_jamming",
|
||||
"satellites", "satellite_source", "satellite_analysis", "sigint", "sigint_totals",
|
||||
"trains", "news", "gdelt", "airports", "threat_level", "trending_markets",
|
||||
"correlations", "fimi", "crowdthreat",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
ships_enabled = any(active_layers.get(key, True) for key in (
|
||||
"ships_military", "ships_cargo", "ships_civilian", "ships_passenger", "ships_tracked_yachts"))
|
||||
sigint_items = _filter_sigint_by_layers(d.get("sigint") or [], active_layers)
|
||||
payload = {
|
||||
"last_updated": d.get("last_updated"),
|
||||
"commercial_flights": _cap_startup_items(
|
||||
(d.get("commercial_flights") or []) if active_layers.get("flights", True) else [],
|
||||
800,
|
||||
),
|
||||
"military_flights": _cap_startup_items(
|
||||
(d.get("military_flights") or []) if active_layers.get("military", True) else [],
|
||||
300,
|
||||
),
|
||||
"private_flights": _cap_startup_items(
|
||||
(d.get("private_flights") or []) if active_layers.get("private", True) else [],
|
||||
300,
|
||||
),
|
||||
"private_jets": _cap_startup_items(
|
||||
(d.get("private_jets") or []) if active_layers.get("jets", True) else [],
|
||||
150,
|
||||
),
|
||||
"tracked_flights": _cap_startup_items(
|
||||
(d.get("tracked_flights") or []) if active_layers.get("tracked", True) else [],
|
||||
250,
|
||||
),
|
||||
"ships": _cap_startup_items((d.get("ships") or []) if ships_enabled else [], 1500),
|
||||
"uavs": _cap_startup_items((d.get("uavs") or []) if active_layers.get("military", True) else [], 100),
|
||||
"liveuamap": _cap_startup_items(
|
||||
(d.get("liveuamap") or []) if active_layers.get("global_incidents", True) else [],
|
||||
300,
|
||||
),
|
||||
"gps_jamming": _cap_startup_items(
|
||||
(d.get("gps_jamming") or []) if active_layers.get("gps_jamming", True) else [],
|
||||
200,
|
||||
),
|
||||
"satellites": _cap_startup_items(
|
||||
(d.get("satellites") or []) if active_layers.get("satellites", True) else [],
|
||||
250,
|
||||
),
|
||||
"satellite_source": d.get("satellite_source", "none"),
|
||||
"satellite_analysis": (d.get("satellite_analysis") or {}) if active_layers.get("satellites", True) else {},
|
||||
"sigint": _cap_startup_items(
|
||||
sigint_items if (active_layers.get("sigint_meshtastic", True) or active_layers.get("sigint_aprs", True)) else [],
|
||||
500,
|
||||
),
|
||||
"sigint_totals": _sigint_totals_for_items(sigint_items),
|
||||
"trains": _cap_startup_items((d.get("trains") or []) if active_layers.get("trains", True) else [], 100),
|
||||
"news": _cap_startup_items(d.get("news") or [], 30),
|
||||
"gdelt": _cap_startup_items((d.get("gdelt") or []) if active_layers.get("global_incidents", True) else [], 300),
|
||||
"airports": _cap_startup_items(d.get("airports") or [], 500),
|
||||
"threat_level": d.get("threat_level"),
|
||||
"trending_markets": _cap_startup_items(d.get("trending_markets") or [], 10),
|
||||
"correlations": _cap_startup_items(
|
||||
(d.get("correlations") or []) if active_layers.get("correlations", True) else [],
|
||||
50,
|
||||
),
|
||||
"fimi": d.get("fimi"),
|
||||
"crowdthreat": _cap_startup_items(
|
||||
(d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
150,
|
||||
),
|
||||
"freshness": freshness,
|
||||
"bootstrap_ready": True,
|
||||
"bootstrap_payload": True,
|
||||
}
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/live-data/fast")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
etag = _current_etag(prefix="fast|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -371,6 +604,15 @@ async def live_data_fast(
|
||||
"trains": (d.get("trains") or []) if active_layers.get("trains", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
if initial:
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
@@ -379,12 +621,13 @@ async def live_data_fast(
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
):
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -438,6 +681,12 @@ async def live_data_slow(
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Static reference layers (datacenters, military bases,
|
||||
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||
# hides the infrastructure overlay the operator already has on screen.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
media_type="application/json",
|
||||
@@ -457,6 +706,23 @@ class OverflightRequest(BaseModel):
|
||||
hours: int = 24
|
||||
|
||||
|
||||
# Issue #202: compute_overflights() is O(catalog_size × timesteps), where
|
||||
# timesteps grows linearly with `hours`. An unbounded `hours` value is a
|
||||
# trivial CPU-exhaustion vector. We clamp silently rather than raising 422 —
|
||||
# the response shape is unchanged, callers asking for too many hours just
|
||||
# get a shorter window, which is friendlier than a hostile error.
|
||||
#
|
||||
# Override via OVERFLIGHTS_MAX_HOURS env var if you legitimately need a
|
||||
# longer window (e.g. a planning use case that wants a full week).
|
||||
def _overflight_max_hours() -> int:
|
||||
import os as _os
|
||||
try:
|
||||
raw = int(str(_os.environ.get("OVERFLIGHTS_MAX_HOURS", "72")).strip())
|
||||
except (TypeError, ValueError):
|
||||
raw = 72
|
||||
return max(1, raw)
|
||||
|
||||
|
||||
@router.post("/api/satellites/overflights")
|
||||
@limiter.limit("10/minute")
|
||||
async def satellite_overflights(request: Request, body: OverflightRequest):
|
||||
@@ -465,5 +731,15 @@ async def satellite_overflights(request: Request, body: OverflightRequest):
|
||||
if not gp_data:
|
||||
return JSONResponse({"total": 0, "by_mission": {}, "satellites": [], "error": "No GP data cached yet"})
|
||||
bbox = {"s": body.s, "w": body.w, "n": body.n, "e": body.e}
|
||||
result = compute_overflights(gp_data, bbox, hours=body.hours)
|
||||
|
||||
# Silent clamp — see comment on _overflight_max_hours().
|
||||
requested_hours = max(1, int(body.hours or 0))
|
||||
effective_hours = min(requested_hours, _overflight_max_hours())
|
||||
|
||||
result = compute_overflights(gp_data, bbox, hours=effective_hours)
|
||||
# If we clamped, surface the effective window in the response so the
|
||||
# caller can detect it if they care, without it being an error.
|
||||
if isinstance(result, dict) and effective_hours != requested_hours:
|
||||
result.setdefault("requested_hours", requested_hours)
|
||||
result.setdefault("effective_hours", effective_hours)
|
||||
return JSONResponse(result)
|
||||
|
||||
@@ -8,7 +8,7 @@ from services.data_fetcher import get_latest_data
|
||||
from services.schemas import HealthResponse
|
||||
import os
|
||||
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.7")
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.81")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -54,6 +54,37 @@ async def health_check(request: Request):
|
||||
top_status = "error"
|
||||
elif slo_summary.get("yellow", 0) > 0:
|
||||
top_status = "degraded"
|
||||
|
||||
# Issue #258: surface AIS proxy degraded TLS state so operators can see
|
||||
# when the SPKI-pinned fallback is in effect. The data plane keeps
|
||||
# flowing (this is by design — see ais_proxy.js comments) but observers
|
||||
# who care about MITM-protection posture deserve a visible signal.
|
||||
#
|
||||
# Plus connectivity health (added 2026-05-23 when stream.aisstream.io
|
||||
# went fully offline): ``connected`` tells the frontend whether ship
|
||||
# data is actually flowing. When false, a banner explains that ships
|
||||
# are unavailable due to an upstream outage — better than the user
|
||||
# silently seeing an empty ocean and assuming we broke something.
|
||||
ais_status: dict = {}
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
ais_status = ais_proxy_status() or {}
|
||||
except Exception:
|
||||
ais_status = {}
|
||||
if ais_status.get("degraded_tls") and top_status == "ok":
|
||||
# Don't override a worse top-level status if SLOs already failed,
|
||||
# but escalate ok -> degraded so the field surfaces in dashboards.
|
||||
top_status = "degraded"
|
||||
# AIS_API_KEY not configured is "feature off", not "system broken" —
|
||||
# so we only escalate when the operator opted into AIS (key set) AND
|
||||
# the stream is currently offline.
|
||||
if (
|
||||
os.environ.get("AIS_API_KEY")
|
||||
and ais_status.get("connected") is False
|
||||
and top_status == "ok"
|
||||
):
|
||||
top_status = "degraded"
|
||||
|
||||
return {
|
||||
"status": top_status,
|
||||
"version": _get_app_version(),
|
||||
@@ -76,6 +107,7 @@ async def health_check(request: Request):
|
||||
"uptime_seconds": round(_time_mod.time() - _get_start_time()),
|
||||
"slo": slo_statuses,
|
||||
"slo_summary": slo_summary,
|
||||
"ais_proxy": ais_status,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -223,11 +223,21 @@ async def oracle_markets_more(request: Request, category: str = "NEWS", offset:
|
||||
"has_more": offset + limit < len(cat_markets), "total": len(cat_markets)}
|
||||
|
||||
|
||||
@router.post("/api/mesh/oracle/resolve")
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve(request: Request):
|
||||
"""Resolve a prediction market."""
|
||||
"""Resolve a prediction market.
|
||||
|
||||
Issue #240 (tg12): requires admin authentication. The
|
||||
``mesh_write_exempt`` decorator below is **metadata only** — it tags
|
||||
the route as not requiring a mesh signed-write envelope, it does
|
||||
NOT itself enforce caller authorization. The ``Depends(require_admin)``
|
||||
on the route decorator is what actually gates access.
|
||||
"""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
body = await request.json()
|
||||
market_title = body.get("market_title", "")
|
||||
@@ -327,11 +337,18 @@ async def oracle_predictions(request: Request, node_id: str = ""):
|
||||
active_predictions, authenticated=_scoped_view_authenticated(request, "mesh.audit"))
|
||||
|
||||
|
||||
@router.post("/api/mesh/oracle/resolve-stakes")
|
||||
@router.post(
|
||||
"/api/mesh/oracle/resolve-stakes",
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
@limiter.limit("5/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.ADMIN_CONTROL)
|
||||
async def oracle_resolve_stakes(request: Request):
|
||||
"""Resolve all expired stake contests."""
|
||||
"""Resolve all expired stake contests.
|
||||
|
||||
Issue #241 (tg12): requires admin authentication. See the note on
|
||||
``oracle_resolve`` above — ``mesh_write_exempt`` is metadata only.
|
||||
"""
|
||||
from services.mesh.mesh_oracle import oracle_ledger
|
||||
resolutions = oracle_ledger.resolve_expired_stakes()
|
||||
return {"ok": True, "resolutions": resolutions, "count": len(resolutions)}
|
||||
|
||||
@@ -55,6 +55,12 @@ def _hydrate_gate_store_from_chain(events: list) -> int:
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def infonet_peer_push(request: Request):
|
||||
@@ -82,9 +88,68 @@ async def infonet_peer_push(request: Request):
|
||||
return {"ok": True, "accepted": 0, "duplicates": 0, "rejected": []}
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@router.post("/api/mesh/dm/replicate-envelope")
|
||||
@limiter.limit("60/minute")
|
||||
async def dm_replicate_envelope(request: Request):
|
||||
"""Accept a DM envelope replicated from a peer relay (cross-node mailbox).
|
||||
|
||||
Companion endpoint to ``DMRelay.replicate_to_peers`` (outbound, in
|
||||
``mesh_dm_relay.py``). The sender's relay POSTs an encrypted DM
|
||||
envelope here after a successful local ``deposit``; this endpoint
|
||||
re-enforces the per-(sender, recipient) anti-spam cap and stores
|
||||
the envelope in the local mailbox if accepted.
|
||||
|
||||
The cap is the network rule: a hostile sender's relay can spool
|
||||
extras locally, but every honest peer enforces the cap on inbound
|
||||
replication. Recipient polling from any honest peer therefore
|
||||
never sees more than ``MESH_DM_PENDING_PER_SENDER_LIMIT`` pending
|
||||
from any one sender, no matter how many spam attempts were tried.
|
||||
|
||||
Same HMAC auth pattern as ``infonet_peer_push`` and ``gate_peer_push``.
|
||||
"""
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
# DM envelopes are bounded by MESH_DM_MAX_MSG_BYTES + envelope
|
||||
# overhead; 64 KB is a generous ceiling.
|
||||
if int(content_length) > 65_536:
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Request body too large (max 64KB)"}',
|
||||
status_code=413, media_type="application/json",
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
body_bytes = await request.body()
|
||||
if not _verify_peer_push_hmac(request, body_bytes):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid or missing peer HMAC"}',
|
||||
status_code=403, media_type="application/json",
|
||||
)
|
||||
try:
|
||||
body = json_mod.loads(body_bytes or b"{}")
|
||||
except (ValueError, TypeError):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid JSON body"}',
|
||||
status_code=400, media_type="application/json",
|
||||
)
|
||||
envelope = body.get("envelope")
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
|
||||
originating_peer = _peer_hmac_url_from_request(request) or ""
|
||||
|
||||
from services.mesh.mesh_dm_relay import dm_relay
|
||||
result = dm_relay.accept_replica(
|
||||
envelope=envelope,
|
||||
originating_peer_url=originating_peer,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/mesh/gate/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def gate_peer_push(request: Request):
|
||||
|
||||
+178
-16
@@ -65,6 +65,7 @@ from services.mesh.mesh_signed_events import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
_INFONET_SYNC_RATE_LIMIT = "600/minute"
|
||||
|
||||
|
||||
def _signed_body(request: Request) -> dict[str, Any]:
|
||||
@@ -263,6 +264,19 @@ def _redact_public_event(event: dict) -> dict:
|
||||
return _redact_vote_gate(_redact_key_rotate_payload(_redact_gate_metadata(event)))
|
||||
|
||||
|
||||
def _infonet_private_transport_required() -> bool:
|
||||
import main as _m
|
||||
|
||||
return bool(_m._infonet_private_transport_required())
|
||||
|
||||
|
||||
def _infonet_sync_response_events(events: list[dict], request=None) -> list[dict]:
|
||||
"""Build the sync event surface for the current transport policy."""
|
||||
import main as _m
|
||||
|
||||
return _m._infonet_sync_response_events(events, request=request)
|
||||
|
||||
|
||||
def _trusted_gate_reply_to(event: dict) -> str:
|
||||
if not isinstance(event, dict):
|
||||
return ""
|
||||
@@ -574,6 +588,12 @@ def _hydrate_gate_store_from_chain(events: list[dict]) -> int:
|
||||
pass
|
||||
return count
|
||||
|
||||
|
||||
def _hydrate_dm_relay_from_chain(events: list[dict]) -> int:
|
||||
import main as _m
|
||||
|
||||
return int(_m._hydrate_dm_relay_from_chain(events))
|
||||
|
||||
# --- Safe type helpers ---
|
||||
|
||||
def _safe_int(val, default=0):
|
||||
@@ -721,9 +741,11 @@ async def mesh_send(request: Request):
|
||||
any_ok = any(r.ok for r in results)
|
||||
|
||||
# ─── Mirror to Meshtastic bridge feed ────────────────────────
|
||||
# The MQTT broker won't echo our own publishes back to our subscriber,
|
||||
# so inject successfully-sent messages into the bridge's deque directly.
|
||||
if any_ok and envelope.routed_via == "meshtastic":
|
||||
# The MQTT broker won't echo our own publishes back to our subscriber, so
|
||||
# inject successfully-sent channel broadcasts into the bridge directly.
|
||||
# Node-targeted packets must not appear in the public channel feed.
|
||||
is_direct_destination = MeshtasticTransport._parse_node_id(destination) is not None
|
||||
if any_ok and envelope.routed_via == "meshtastic" and not is_direct_destination:
|
||||
try:
|
||||
from services.sigint_bridge import sigint_grid
|
||||
|
||||
@@ -734,7 +756,7 @@ async def mesh_send(request: Request):
|
||||
bridge.messages.appendleft(
|
||||
{
|
||||
"from": MeshtasticTransport.mesh_address_for_sender(node_id),
|
||||
"to": destination if MeshtasticTransport._parse_node_id(destination) is not None else "broadcast",
|
||||
"to": "broadcast",
|
||||
"text": message,
|
||||
"region": credentials.get("mesh_region", "US"),
|
||||
"channel": body.get("channel", "LongFast"),
|
||||
@@ -750,6 +772,122 @@ async def mesh_send(request: Request):
|
||||
"event_id": "",
|
||||
"routed_via": envelope.routed_via,
|
||||
"route_reason": envelope.route_reason,
|
||||
"direct": is_direct_destination,
|
||||
"channel_echo": not is_direct_destination,
|
||||
"results": [r.to_dict() for r in results],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/mesh/meshtastic/send", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
@mesh_write_exempt(MeshWriteExemption.LOCAL_OPERATOR_ONLY)
|
||||
async def meshtastic_public_send(request: Request):
|
||||
"""Local public-MQTT send path for standalone Meshtastic-style identities."""
|
||||
body = await request.json()
|
||||
destination = str(body.get("destination", "") or "").strip() or "broadcast"
|
||||
message = str(body.get("message", "") or "")
|
||||
sender_id = str(body.get("sender_id", "") or "").strip().lower()
|
||||
if not message:
|
||||
return {"ok": False, "detail": "Missing required field: message"}
|
||||
|
||||
from services.mesh.mesh_router import (
|
||||
MeshEnvelope,
|
||||
MeshtasticTransport,
|
||||
Priority,
|
||||
TransportResult,
|
||||
mesh_router,
|
||||
)
|
||||
from services.meshtastic_mqtt_settings import mqtt_bridge_enabled
|
||||
|
||||
if MeshtasticTransport._parse_node_id(sender_id) is None:
|
||||
return {"ok": False, "detail": "Missing or invalid public Meshtastic address"}
|
||||
if not mqtt_bridge_enabled():
|
||||
return {"ok": False, "detail": "Meshtastic MQTT bridge is disabled"}
|
||||
|
||||
payload_bytes = len(message.encode("utf-8"))
|
||||
payload_type = str(body.get("payload_type", "text") or "text")
|
||||
max_bytes = _BYTE_LIMITS.get(payload_type, 200)
|
||||
if payload_bytes > max_bytes:
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": f"Message too long ({payload_bytes} bytes). Maximum: {max_bytes} bytes for {payload_type} messages.",
|
||||
}
|
||||
|
||||
priority_str = str(body.get("priority", "normal") or "normal").lower()
|
||||
throttle_ok, throttle_reason = _check_throttle(sender_id, priority_str, "meshtastic")
|
||||
if not throttle_ok:
|
||||
return {"ok": False, "detail": throttle_reason}
|
||||
|
||||
priority_map = {
|
||||
"emergency": Priority.EMERGENCY,
|
||||
"high": Priority.HIGH,
|
||||
"normal": Priority.NORMAL,
|
||||
"low": Priority.LOW,
|
||||
}
|
||||
priority = priority_map.get(priority_str, Priority.NORMAL)
|
||||
envelope = MeshEnvelope(
|
||||
sender_id=sender_id,
|
||||
destination=destination,
|
||||
channel=str(body.get("channel", "LongFast") or "LongFast"),
|
||||
priority=priority,
|
||||
payload=message,
|
||||
ephemeral=bool(body.get("ephemeral", False)),
|
||||
trust_tier="public_degraded",
|
||||
)
|
||||
|
||||
if not mesh_router.meshtastic.can_reach(envelope):
|
||||
results = [TransportResult(False, "meshtastic", "Message exceeds Meshtastic payload limit")]
|
||||
else:
|
||||
cb_ok, cb_reason = mesh_router.breakers["meshtastic"].check_and_record(envelope.priority)
|
||||
if not cb_ok:
|
||||
results = [TransportResult(False, "meshtastic", cb_reason)]
|
||||
else:
|
||||
is_direct_destination = MeshtasticTransport._parse_node_id(destination) is not None
|
||||
envelope.route_reason = (
|
||||
"Local public Meshtastic MQTT path"
|
||||
if not is_direct_destination
|
||||
else "Local public Meshtastic direct node path"
|
||||
)
|
||||
credentials = {"mesh_region": str(body.get("mesh_region", "US") or "US")}
|
||||
result = mesh_router.meshtastic.send(envelope, credentials)
|
||||
if result.ok:
|
||||
envelope.routed_via = mesh_router.meshtastic.NAME
|
||||
results = [result]
|
||||
|
||||
any_ok = any(r.ok for r in results)
|
||||
is_direct_destination = MeshtasticTransport._parse_node_id(destination) is not None
|
||||
if any_ok and envelope.routed_via == "meshtastic" and not is_direct_destination:
|
||||
try:
|
||||
from datetime import datetime
|
||||
from services.sigint_bridge import sigint_grid
|
||||
|
||||
bridge = sigint_grid.mesh
|
||||
if bridge:
|
||||
record = {
|
||||
"from": MeshtasticTransport.mesh_address_for_sender(sender_id),
|
||||
"to": "broadcast",
|
||||
"text": message,
|
||||
"region": str(body.get("mesh_region", "US") or "US"),
|
||||
"root": str(body.get("mesh_region", "US") or "US"),
|
||||
"channel": str(body.get("channel", "LongFast") or "LongFast"),
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
append_text = getattr(bridge, "append_text_message", None)
|
||||
if callable(append_text):
|
||||
append_text(record)
|
||||
else:
|
||||
bridge.messages.appendleft(record)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"ok": any_ok,
|
||||
"message_id": envelope.message_id,
|
||||
"event_id": "",
|
||||
"routed_via": envelope.routed_via,
|
||||
"route_reason": envelope.route_reason,
|
||||
"direct": is_direct_destination,
|
||||
"channel_echo": not is_direct_destination,
|
||||
"results": [r.to_dict() for r in results],
|
||||
}
|
||||
|
||||
@@ -848,6 +986,7 @@ async def mesh_messages(
|
||||
root: str = "",
|
||||
channel: str = "",
|
||||
limit: int = 30,
|
||||
include_direct: bool = False,
|
||||
):
|
||||
"""Get recent Meshtastic text messages from the MQTT bridge."""
|
||||
from services.sigint_bridge import sigint_grid
|
||||
@@ -869,6 +1008,12 @@ async def mesh_messages(
|
||||
msgs = [m for m in msgs if m.get("root", "").upper() == root_filter]
|
||||
if channel:
|
||||
msgs = [m for m in msgs if m.get("channel", "").lower() == channel.lower()]
|
||||
if not include_direct:
|
||||
msgs = [
|
||||
m
|
||||
for m in msgs
|
||||
if str(m.get("to") or "broadcast").strip().lower() in {"", "broadcast", "^all"}
|
||||
]
|
||||
return msgs[: min(limit, 100)]
|
||||
|
||||
|
||||
@@ -1342,25 +1487,37 @@ def _submit_gate_message_envelope(request: Request, gate_id: str, body: dict[str
|
||||
@router.get("/api/mesh/infonet/status")
|
||||
@limiter.limit("30/minute")
|
||||
async def infonet_status(request: Request, verify_signatures: bool = False):
|
||||
"""Get Infonet metadata — event counts, head hash, chain size."""
|
||||
"""Get Infonet metadata — event counts, head hash, chain size.
|
||||
|
||||
The ``verify_signatures`` query parameter is honored ONLY when the
|
||||
caller has authenticated via scoped auth or local-operator credentials.
|
||||
Verifying every signature in a long chain is O(n_events) work — letting
|
||||
anonymous callers trigger it is a DoS surface (issue #207). For
|
||||
anonymous callers we silently fall back to the cheap path; the response
|
||||
structure is identical so legitimate frontends see no behavior change.
|
||||
"""
|
||||
from services.mesh.mesh_hashchain import infonet
|
||||
from services.wormhole_supervisor import get_wormhole_state
|
||||
|
||||
# Silently downgrade for unauthenticated callers — no error surfaced.
|
||||
authenticated = _scoped_view_authenticated(request, "mesh.audit")
|
||||
effective_verify_signatures = bool(verify_signatures) and authenticated
|
||||
|
||||
info = infonet.get_info()
|
||||
valid, reason = infonet.validate_chain(verify_signatures=verify_signatures)
|
||||
valid, reason = infonet.validate_chain(verify_signatures=effective_verify_signatures)
|
||||
try:
|
||||
wormhole = get_wormhole_state()
|
||||
except Exception:
|
||||
wormhole = {"configured": False, "ready": False, "rns_ready": False}
|
||||
info["valid"] = valid
|
||||
info["validation"] = reason
|
||||
info["verify_signatures"] = verify_signatures
|
||||
info["verify_signatures"] = effective_verify_signatures
|
||||
info["private_lane_tier"] = _current_private_lane_tier(wormhole)
|
||||
info["private_lane_policy"] = _private_infonet_policy_snapshot()
|
||||
info.update(_node_runtime_snapshot())
|
||||
return _redact_private_lane_control_fields(
|
||||
info,
|
||||
authenticated=_scoped_view_authenticated(request, "mesh.audit"),
|
||||
authenticated=authenticated,
|
||||
)
|
||||
|
||||
|
||||
@@ -1394,7 +1551,7 @@ async def infonet_locator(request: Request, limit: int = Query(32, ge=4, le=128)
|
||||
|
||||
|
||||
@router.post("/api/mesh/infonet/sync")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
@mesh_write_exempt(MeshWriteExemption.PEER_GOSSIP)
|
||||
async def infonet_sync_post(
|
||||
request: Request,
|
||||
@@ -1447,8 +1604,7 @@ async def infonet_sync_post(
|
||||
elif matched_hash == GENESIS_HASH and len(locator) > 1:
|
||||
forked = True
|
||||
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
|
||||
response = {
|
||||
"events": events,
|
||||
@@ -1509,7 +1665,7 @@ async def mesh_rns_status(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/mesh/infonet/sync")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit(_INFONET_SYNC_RATE_LIMIT)
|
||||
async def infonet_sync(
|
||||
request: Request,
|
||||
after_hash: str = "",
|
||||
@@ -1547,8 +1703,7 @@ async def infonet_sync(
|
||||
)
|
||||
base = after_hash or GENESIS_HASH
|
||||
events = infonet.get_events_after(base, limit=limit)
|
||||
# Filter out legacy gate_message events — not part of the public sync surface.
|
||||
events = [_redact_public_event(e) for e in events if e.get("event_type") != "gate_message"]
|
||||
events = _infonet_sync_response_events(events, request=request)
|
||||
return {
|
||||
"events": events,
|
||||
"after_hash": base,
|
||||
@@ -1587,6 +1742,7 @@ async def infonet_ingest(request: Request):
|
||||
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
_hydrate_dm_relay_from_chain(events)
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@@ -2142,6 +2298,12 @@ async def infonet_event(request: Request, event_id: str):
|
||||
)
|
||||
return _strip_gate_for_access(evt, access)
|
||||
return {"ok": False, "detail": "Event not found"}
|
||||
if evt.get("event_type") == "dm_message":
|
||||
return await _private_plane_refusal_response(
|
||||
request,
|
||||
status_code=403,
|
||||
payload=_private_plane_access_denied_payload(),
|
||||
)
|
||||
if evt.get("event_type") == "gate_message":
|
||||
gate_id = str(evt.get("payload", {}).get("gate", "") or evt.get("gate", "") or "").strip()
|
||||
access = _verify_gate_access(request, gate_id) if gate_id else ""
|
||||
@@ -2166,7 +2328,7 @@ async def infonet_node_events(
|
||||
from services.mesh.mesh_hashchain import infonet
|
||||
|
||||
events = infonet.get_events_by_node(node_id, limit=limit)
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
events = _redact_public_node_history(
|
||||
events,
|
||||
@@ -2191,7 +2353,7 @@ async def infonet_events_by_type(
|
||||
else:
|
||||
events = list(reversed(infonet.events))
|
||||
events = events[offset : offset + limit]
|
||||
events = [e for e in events if e.get("event_type") != "gate_message"]
|
||||
events = [e for e in events if e.get("event_type") not in {"gate_message", "dm_message"}]
|
||||
events = [_redact_public_event(e) for e in infonet.decorate_events(events)]
|
||||
return {
|
||||
"events": events,
|
||||
|
||||
@@ -21,14 +21,30 @@ async def api_get_openmhz_systems(request: Request):
|
||||
return get_openmhz_systems()
|
||||
|
||||
|
||||
@router.get("/api/radio/openmhz/calls/{sys_name}")
|
||||
# Issue #213: rotating sys_name bypasses the 20s TTL cache and lets an
|
||||
# anonymous caller hammer api.openmhz.com through this proxy, risking an
|
||||
# IP-ban for the project. require_local_operator scopes this to the local
|
||||
# UI (which goes through the Next.js proxy with admin-key injection) and
|
||||
# scoped agent tokens.
|
||||
@router.get(
|
||||
"/api/radio/openmhz/calls/{sys_name}",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("60/minute")
|
||||
async def api_get_openmhz_calls(request: Request, sys_name: str):
|
||||
from services.radio_intercept import get_recent_openmhz_calls
|
||||
return get_recent_openmhz_calls(sys_name)
|
||||
|
||||
|
||||
@router.get("/api/radio/openmhz/audio")
|
||||
# Issue #214: this is a streaming bandwidth relay. An anonymous caller can
|
||||
# stream audio through the backend, saturating the operator's outbound
|
||||
# bandwidth. Scope to local operator; the legitimate browser UI still
|
||||
# works because relative /api/... paths go through the Next.js proxy
|
||||
# which injects the admin key automatically.
|
||||
@router.get(
|
||||
"/api/radio/openmhz/audio",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("120/minute")
|
||||
async def api_get_openmhz_audio(request: Request, url: str = Query(..., min_length=10)):
|
||||
from services.radio_intercept import openmhz_audio_response
|
||||
|
||||
@@ -21,7 +21,7 @@ async def oracle_region_intel(
|
||||
return get_region_oracle_intel(lat, lng, news_items)
|
||||
|
||||
|
||||
@router.get("/api/thermal/verify")
|
||||
@router.get("/api/thermal/verify", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def thermal_verify(
|
||||
request: Request,
|
||||
@@ -35,7 +35,7 @@ async def thermal_verify(
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/sigint/transmit")
|
||||
@router.post("/api/sigint/transmit", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def sigint_transmit(request: Request):
|
||||
"""Send an APRS-IS message to a specific callsign. Requires ham radio credentials."""
|
||||
|
||||
+121
-13
@@ -85,7 +85,30 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
@router.get("/api/sentinel2/search")
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
# operator-only helpers — they MUST NOT be callable by anonymous remote
|
||||
# users, because:
|
||||
#
|
||||
# * /api/sentinel/token — caller supplies their own Sentinel client_id +
|
||||
# client_secret. Without operator gating, the backend becomes a free
|
||||
# anonymous OAuth-mint relay for any Copernicus account.
|
||||
# * /api/sentinel/tile — same shape as the token route but for tile
|
||||
# imagery. Without gating, the backend acts as an anonymous quota and
|
||||
# bandwidth relay for Sentinel Hub Process API calls.
|
||||
# * /api/sentinel2/search — hits the Planetary Computer STAC search API
|
||||
# and falls back to Esri imagery. No caller credentials are involved,
|
||||
# but the route is still an anonymous external-search relay. We gate
|
||||
# it the same way for consistency with the rest of the operator-only
|
||||
# helper surface.
|
||||
#
|
||||
# Gating is via require_local_operator (loopback / bridge / admin key),
|
||||
# matching the same allowlist already used by /api/region-dossier and
|
||||
# the other operator helpers further up this file. Single-operator nodes
|
||||
# see no behavior change — their dashboard already lives on loopback or
|
||||
# the trusted Docker bridge, so it still resolves.
|
||||
@router.get("/api/sentinel2/search", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
def api_sentinel2_search(
|
||||
request: Request,
|
||||
@@ -97,18 +120,60 @@ def api_sentinel2_search(
|
||||
return search_sentinel2_scene(lat, lng)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token")
|
||||
# Issue #298 (tg12): Sentinel credentials moved server-side
|
||||
# ---------------------------------------------------------------------------
|
||||
# Previously the frontend kept Copernicus CDSE client_id + client_secret in
|
||||
# browser localStorage / sessionStorage and forwarded them on every tile
|
||||
# request through this proxy. That exposed real third-party credentials to
|
||||
# any same-origin script (XSS, malicious browser extension, dev-tools HAR
|
||||
# export).
|
||||
#
|
||||
# Resolution order (first match wins):
|
||||
# 1. Request body — kept for back-compat. A small number of legacy
|
||||
# operator setups may still post credentials; we don't break them.
|
||||
# 2. Backend .env — SENTINEL_CLIENT_ID / SENTINEL_CLIENT_SECRET, managed
|
||||
# through the existing /api/settings/api-keys flow (admin-gated).
|
||||
#
|
||||
# The frontend in ``sentinelHub.ts`` no longer reads browser storage and no
|
||||
# longer forwards credentials — every dashboard request now lands in (2).
|
||||
# The require_local_operator gate (added in #303/PR #303) stays — both layers
|
||||
# are independent: the gate blocks anonymous callers, the env fallback lets
|
||||
# legitimate (gated) callers omit credentials from the body.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _resolve_sentinel_credentials(body_id: str, body_secret: str) -> tuple[str, str]:
|
||||
"""Return (client_id, client_secret) using body values when present,
|
||||
otherwise falling back to backend .env. Empty strings if neither is set."""
|
||||
import os as _os
|
||||
cid = (body_id or "").strip() or (_os.environ.get("SENTINEL_CLIENT_ID", "") or "").strip()
|
||||
csec = (body_secret or "").strip() or (_os.environ.get("SENTINEL_CLIENT_SECRET", "") or "").strip()
|
||||
return cid, csec
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_sentinel_token(request: Request):
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block)."""
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block).
|
||||
|
||||
Credentials are resolved by ``_resolve_sentinel_credentials`` — body
|
||||
fields are honored for back-compat, otherwise the backend .env values
|
||||
populated through ``/api/settings/api-keys`` are used.
|
||||
"""
|
||||
import requests as req
|
||||
body = await request.body()
|
||||
from urllib.parse import parse_qs
|
||||
params = parse_qs(body.decode("utf-8"))
|
||||
client_id = params.get("client_id", [""])[0]
|
||||
client_secret = params.get("client_secret", [""])[0]
|
||||
body_id = params.get("client_id", [""])[0]
|
||||
body_secret = params.get("client_secret", [""])[0]
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(400, "client_id and client_secret required")
|
||||
# Friendly, non-hostile error — points the operator at the place
|
||||
# they configure other API keys instead of just saying "required".
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||
try:
|
||||
resp = await asyncio.to_thread(req.post, token_url,
|
||||
@@ -120,10 +185,39 @@ async def api_sentinel_token(request: Request):
|
||||
raise HTTPException(502, "Token request failed")
|
||||
|
||||
|
||||
_sh_token_cache: dict = {"token": None, "expiry": 0, "client_id": ""}
|
||||
# Cache key is an HMAC of (client_id, client_secret) — a caller cannot hit
|
||||
# this cache without knowing the same secret that originally populated it.
|
||||
# Without this binding, the lookup only checked client_id, so anyone who
|
||||
# knew a valid client_id could reuse another caller's cached token (and
|
||||
# burn their Copernicus quota / access tiles on their account).
|
||||
_sh_token_cache: dict = {"token": None, "expiry": 0, "credential_fp": ""}
|
||||
|
||||
|
||||
@router.post("/api/sentinel/tile")
|
||||
def _credential_fingerprint(client_id: str, client_secret: str) -> str:
|
||||
"""Return a stable, secret-binding fingerprint for the Sentinel cache key.
|
||||
|
||||
Uses HMAC-SHA256 so the raw secret is never stored in process memory as
|
||||
a cache key. The HMAC key is a per-process random value, which means the
|
||||
fingerprint cannot be precomputed across restarts (additional defense
|
||||
against an attacker who learned a valid client_id but not the secret).
|
||||
"""
|
||||
import hashlib
|
||||
import hmac
|
||||
|
||||
return hmac.new(
|
||||
_SH_TOKEN_CACHE_HMAC_KEY,
|
||||
f"{client_id}\x00{client_secret}".encode("utf-8"),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
|
||||
# Per-process random HMAC key. Regenerated on each backend startup so cached
|
||||
# fingerprints don't survive restarts.
|
||||
import os as _os
|
||||
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/tile", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("300/minute")
|
||||
async def api_sentinel_tile(request: Request):
|
||||
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
||||
@@ -134,8 +228,11 @@ async def api_sentinel_tile(request: Request):
|
||||
except Exception:
|
||||
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
||||
|
||||
client_id = body.get("client_id", "")
|
||||
client_secret = body.get("client_secret", "")
|
||||
# Issue #298: same resolution order as /api/sentinel/token — body
|
||||
# values for back-compat, otherwise backend .env.
|
||||
body_id = body.get("client_id", "")
|
||||
body_secret = body.get("client_secret", "")
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
preset = body.get("preset", "TRUE-COLOR")
|
||||
date_str = body.get("date", "")
|
||||
z = body.get("z", 0)
|
||||
@@ -143,10 +240,21 @@ async def api_sentinel_tile(request: Request):
|
||||
y = body.get("y", 0)
|
||||
|
||||
if not client_id or not client_secret or not date_str:
|
||||
raise HTTPException(400, "client_id, client_secret, and date required")
|
||||
# Distinguish "no creds" from "no date" so the operator knows
|
||||
# what to fix. Same friendly pointer as the /token route.
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
raise HTTPException(400, "date required")
|
||||
|
||||
now = _time.time()
|
||||
if (_sh_token_cache["token"] and _sh_token_cache["client_id"] == client_id
|
||||
credential_fp = _credential_fingerprint(client_id, client_secret)
|
||||
if (_sh_token_cache["token"]
|
||||
and _sh_token_cache["credential_fp"] == credential_fp
|
||||
and now < _sh_token_cache["expiry"] - 30):
|
||||
token = _sh_token_cache["token"]
|
||||
else:
|
||||
@@ -161,7 +269,7 @@ async def api_sentinel_tile(request: Request):
|
||||
token = tdata["access_token"]
|
||||
_sh_token_cache["token"] = token
|
||||
_sh_token_cache["expiry"] = now + tdata.get("expires_in", 300)
|
||||
_sh_token_cache["client_id"] = client_id
|
||||
_sh_token_cache["credential_fp"] = credential_fp
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception:
|
||||
|
||||
+91
-323
@@ -78,6 +78,21 @@ export_wormhole_dm_invite = getattr(
|
||||
"export_wormhole_dm_invite",
|
||||
_wormhole_identity_unavailable,
|
||||
)
|
||||
list_prekey_lookup_handle_records_for_ui = getattr(
|
||||
_mesh_wormhole_identity,
|
||||
"list_prekey_lookup_handle_records_for_ui",
|
||||
_wormhole_identity_unavailable,
|
||||
)
|
||||
rename_prekey_lookup_handle = getattr(
|
||||
_mesh_wormhole_identity,
|
||||
"rename_prekey_lookup_handle",
|
||||
_wormhole_identity_unavailable,
|
||||
)
|
||||
revoke_prekey_lookup_handle = getattr(
|
||||
_mesh_wormhole_identity,
|
||||
"revoke_prekey_lookup_handle",
|
||||
_wormhole_identity_unavailable,
|
||||
)
|
||||
import_wormhole_dm_invite = getattr(
|
||||
_mesh_wormhole_identity,
|
||||
"import_wormhole_dm_invite",
|
||||
@@ -145,8 +160,13 @@ router = APIRouter()
|
||||
|
||||
# --- Constants ---
|
||||
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
||||
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||
# "is this on?" boolean. Transport choice, anonymous-mode state, and
|
||||
# the named privacy profile were all leaking actionable recon to
|
||||
# unauthenticated callers and are now gated behind authenticated reads.
|
||||
# See the matching block in backend/main.py for the full rationale.
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||
@@ -311,6 +331,10 @@ class WormholeDmInviteImportRequest(BaseModel):
|
||||
alias: str = ""
|
||||
|
||||
|
||||
class WormholeDmInviteHandleUpdateRequest(BaseModel):
|
||||
label: str = ""
|
||||
|
||||
|
||||
class WormholeDmSenderTokenRequest(BaseModel):
|
||||
recipient_id: str
|
||||
delivery_class: str
|
||||
@@ -477,6 +501,7 @@ def decrypt_wormhole_dm_envelope(
|
||||
remote_alias: str | None = None,
|
||||
session_welcome: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Delegate to main.py, which owns current MLS/alias/legacy gating behavior."""
|
||||
import main as _m
|
||||
|
||||
return _m.decrypt_wormhole_dm_envelope(
|
||||
@@ -489,71 +514,13 @@ def decrypt_wormhole_dm_envelope(
|
||||
session_welcome=session_welcome,
|
||||
)
|
||||
|
||||
resolved_local, resolved_remote = _resolve_dm_aliases(
|
||||
peer_id=peer_id,
|
||||
local_alias=local_alias,
|
||||
remote_alias=remote_alias,
|
||||
)
|
||||
normalized_format = str(payload_format or "dm1").strip().lower() or "dm1"
|
||||
if normalized_format != "mls1" and is_dm_locked_to_mls(resolved_local, resolved_remote):
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": "DM session is locked to MLS format",
|
||||
"required_format": "mls1",
|
||||
"current_format": normalized_format,
|
||||
}
|
||||
if normalized_format == "mls1":
|
||||
has_session = has_mls_dm_session(resolved_local, resolved_remote)
|
||||
if not has_session.get("ok"):
|
||||
return has_session
|
||||
if not has_session.get("exists"):
|
||||
ensured = ensure_mls_dm_session(resolved_local, resolved_remote, str(session_welcome or ""))
|
||||
if not ensured.get("ok"):
|
||||
return ensured
|
||||
decrypted = decrypt_mls_dm(
|
||||
resolved_local,
|
||||
resolved_remote,
|
||||
str(ciphertext or ""),
|
||||
str(nonce or ""),
|
||||
)
|
||||
if not decrypted.get("ok"):
|
||||
return decrypted
|
||||
return {
|
||||
"ok": True,
|
||||
"peer_id": str(peer_id or "").strip(),
|
||||
"local_alias": resolved_local,
|
||||
"remote_alias": resolved_remote,
|
||||
"plaintext": str(decrypted.get("plaintext", "") or ""),
|
||||
"format": "mls1",
|
||||
}
|
||||
|
||||
from services.wormhole_supervisor import get_transport_tier
|
||||
|
||||
current_tier = get_transport_tier()
|
||||
if str(current_tier or "").startswith("private_"):
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": "MLS format required in private transport mode — legacy DM decrypt blocked",
|
||||
}
|
||||
logger.warning("legacy dm decrypt path used")
|
||||
legacy = decrypt_wormhole_dm(peer_id=str(peer_id or ""), ciphertext=str(ciphertext or ""))
|
||||
if not legacy.get("ok"):
|
||||
return legacy
|
||||
return {
|
||||
"ok": True,
|
||||
"peer_id": str(peer_id or "").strip(),
|
||||
"local_alias": resolved_local,
|
||||
"remote_alias": resolved_remote,
|
||||
"plaintext": str(legacy.get("result", "") or ""),
|
||||
"format": "dm1",
|
||||
}
|
||||
|
||||
|
||||
|
||||
# --- Routes ---
|
||||
|
||||
@router.get("/api/settings/wormhole")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_get_wormhole_settings(request: Request):
|
||||
settings = await asyncio.to_thread(read_wormhole_settings)
|
||||
return _redact_wormhole_settings(settings, authenticated=_scoped_view_authenticated(request, "wormhole"))
|
||||
@@ -582,248 +549,9 @@ async def api_set_wormhole_settings(request: Request, body: WormholeUpdate):
|
||||
return {**updated, "requires_restart": False, "runtime": state}
|
||||
|
||||
|
||||
class PrivacyProfileUpdate(BaseModel):
|
||||
profile: str
|
||||
|
||||
|
||||
class WormholeSignRequest(BaseModel):
|
||||
event_type: str
|
||||
payload: dict
|
||||
sequence: int | None = None
|
||||
gate_id: str | None = None
|
||||
|
||||
|
||||
class WormholeSignRawRequest(BaseModel):
|
||||
message: str
|
||||
|
||||
|
||||
class WormholeDmEncryptRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
plaintext: str
|
||||
local_alias: str | None = None
|
||||
remote_alias: str | None = None
|
||||
remote_prekey_bundle: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WormholeDmComposeRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
plaintext: str
|
||||
local_alias: str | None = None
|
||||
remote_alias: str | None = None
|
||||
remote_prekey_bundle: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WormholeDmDecryptRequest(BaseModel):
|
||||
peer_id: str
|
||||
ciphertext: str
|
||||
format: str = "dm1"
|
||||
nonce: str = ""
|
||||
local_alias: str | None = None
|
||||
remote_alias: str | None = None
|
||||
session_welcome: str | None = None
|
||||
|
||||
|
||||
class WormholeDmResetRequest(BaseModel):
|
||||
peer_id: str | None = None
|
||||
|
||||
|
||||
class WormholeDmBootstrapEncryptRequest(BaseModel):
|
||||
peer_id: str
|
||||
plaintext: str
|
||||
|
||||
|
||||
class WormholeDmBootstrapDecryptRequest(BaseModel):
|
||||
sender_id: str = ""
|
||||
ciphertext: str
|
||||
|
||||
|
||||
class WormholeDmSenderTokenRequest(BaseModel):
|
||||
recipient_id: str
|
||||
delivery_class: str
|
||||
recipient_token: str = ""
|
||||
count: int = 1
|
||||
|
||||
|
||||
class WormholeOpenSealRequest(BaseModel):
|
||||
sender_seal: str
|
||||
candidate_dh_pub: str = ""
|
||||
recipient_id: str
|
||||
expected_msg_id: str
|
||||
|
||||
|
||||
class WormholeBuildSealRequest(BaseModel):
|
||||
recipient_id: str
|
||||
recipient_dh_pub: str = ""
|
||||
msg_id: str
|
||||
timestamp: int
|
||||
|
||||
|
||||
class WormholeDeadDropTokenRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
peer_ref: str = ""
|
||||
|
||||
|
||||
class WormholePairwiseAliasRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
|
||||
|
||||
class WormholePairwiseAliasRotateRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
grace_ms: int = 45_000
|
||||
|
||||
|
||||
class WormholeDeadDropContactsRequest(BaseModel):
|
||||
contacts: list[dict[str, Any]]
|
||||
limit: int = 24
|
||||
|
||||
|
||||
class WormholeSasRequest(BaseModel):
|
||||
peer_id: str
|
||||
peer_dh_pub: str = ""
|
||||
words: int = 8
|
||||
peer_ref: str = ""
|
||||
|
||||
|
||||
class WormholeGateRequest(BaseModel):
|
||||
gate_id: str
|
||||
rotate: bool = False
|
||||
|
||||
|
||||
class WormholeGatePersonaCreateRequest(BaseModel):
|
||||
gate_id: str
|
||||
label: str = ""
|
||||
|
||||
|
||||
class WormholeGatePersonaActivateRequest(BaseModel):
|
||||
gate_id: str
|
||||
persona_id: str
|
||||
|
||||
|
||||
class WormholeGateKeyGrantRequest(BaseModel):
|
||||
gate_id: str
|
||||
recipient_node_id: str
|
||||
recipient_dh_pub: str
|
||||
recipient_scope: str = "member"
|
||||
|
||||
|
||||
class WormholeGateComposeRequest(BaseModel):
|
||||
gate_id: str
|
||||
plaintext: str
|
||||
reply_to: str = ""
|
||||
compat_plaintext: bool = False
|
||||
|
||||
|
||||
class WormholeGateDecryptRequest(BaseModel):
|
||||
gate_id: str
|
||||
epoch: int = 0
|
||||
ciphertext: str
|
||||
nonce: str = ""
|
||||
sender_ref: str = ""
|
||||
format: str = "mls1"
|
||||
gate_envelope: str = ""
|
||||
envelope_hash: str = ""
|
||||
recovery_envelope: bool = False
|
||||
compat_decrypt: bool = False
|
||||
event_id: str = ""
|
||||
|
||||
|
||||
class WormholeGateDecryptBatchRequest(BaseModel):
|
||||
messages: list[WormholeGateDecryptRequest]
|
||||
|
||||
|
||||
class WormholeGateRotateRequest(BaseModel):
|
||||
gate_id: str
|
||||
reason: str = "manual_rotate"
|
||||
|
||||
def decrypt_wormhole_dm_envelope(
|
||||
*,
|
||||
peer_id: str,
|
||||
ciphertext: str,
|
||||
payload_format: str = "dm1",
|
||||
nonce: str = "",
|
||||
local_alias: str | None = None,
|
||||
remote_alias: str | None = None,
|
||||
session_welcome: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
import main as _m
|
||||
|
||||
return _m.decrypt_wormhole_dm_envelope(
|
||||
peer_id=peer_id,
|
||||
ciphertext=ciphertext,
|
||||
payload_format=payload_format,
|
||||
nonce=nonce,
|
||||
local_alias=local_alias,
|
||||
remote_alias=remote_alias,
|
||||
session_welcome=session_welcome,
|
||||
)
|
||||
|
||||
resolved_local, resolved_remote = _resolve_dm_aliases(
|
||||
peer_id=peer_id,
|
||||
local_alias=local_alias,
|
||||
remote_alias=remote_alias,
|
||||
)
|
||||
normalized_format = str(payload_format or "dm1").strip().lower() or "dm1"
|
||||
if normalized_format != "mls1" and is_dm_locked_to_mls(resolved_local, resolved_remote):
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": "DM session is locked to MLS format",
|
||||
"required_format": "mls1",
|
||||
"current_format": normalized_format,
|
||||
}
|
||||
if normalized_format == "mls1":
|
||||
has_session = has_mls_dm_session(resolved_local, resolved_remote)
|
||||
if not has_session.get("ok"):
|
||||
return has_session
|
||||
if not has_session.get("exists"):
|
||||
ensured = ensure_mls_dm_session(resolved_local, resolved_remote, str(session_welcome or ""))
|
||||
if not ensured.get("ok"):
|
||||
return ensured
|
||||
decrypted = decrypt_mls_dm(
|
||||
resolved_local,
|
||||
resolved_remote,
|
||||
str(ciphertext or ""),
|
||||
str(nonce or ""),
|
||||
)
|
||||
if not decrypted.get("ok"):
|
||||
return decrypted
|
||||
return {
|
||||
"ok": True,
|
||||
"peer_id": str(peer_id or "").strip(),
|
||||
"local_alias": resolved_local,
|
||||
"remote_alias": resolved_remote,
|
||||
"plaintext": str(decrypted.get("plaintext", "") or ""),
|
||||
"format": "mls1",
|
||||
}
|
||||
|
||||
from services.wormhole_supervisor import get_transport_tier
|
||||
|
||||
current_tier = get_transport_tier()
|
||||
if str(current_tier or "").startswith("private_"):
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": "MLS format required in private transport mode — legacy DM decrypt blocked",
|
||||
}
|
||||
logger.warning("legacy dm decrypt path used")
|
||||
legacy = decrypt_wormhole_dm(peer_id=str(peer_id or ""), ciphertext=str(ciphertext or ""))
|
||||
if not legacy.get("ok"):
|
||||
return legacy
|
||||
return {
|
||||
"ok": True,
|
||||
"peer_id": str(peer_id or "").strip(),
|
||||
"local_alias": resolved_local,
|
||||
"remote_alias": resolved_remote,
|
||||
"plaintext": str(legacy.get("result", "") or ""),
|
||||
"format": "dm1",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/settings/privacy-profile")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_get_privacy_profile(request: Request):
|
||||
data = await asyncio.to_thread(read_wormhole_settings)
|
||||
return _redact_privacy_profile_settings(
|
||||
@@ -833,7 +561,7 @@ async def api_get_privacy_profile(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/settings/wormhole-status")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_get_wormhole_status(request: Request):
|
||||
state = await asyncio.to_thread(get_wormhole_state)
|
||||
transport_tier = _current_private_lane_tier(state)
|
||||
@@ -869,21 +597,35 @@ async def api_get_wormhole_status(request: Request):
|
||||
@router.post("/api/wormhole/join", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_wormhole_join(request: Request):
|
||||
from services.config import get_settings
|
||||
|
||||
existing = read_wormhole_settings()
|
||||
updated = write_wormhole_settings(
|
||||
enabled=True,
|
||||
transport="direct",
|
||||
socks_proxy="",
|
||||
transport="tor_arti",
|
||||
socks_proxy=f"socks5h://127.0.0.1:{int(get_settings().MESH_ARTI_SOCKS_PORT or 9050)}",
|
||||
socks_dns=True,
|
||||
anonymous_mode=False,
|
||||
anonymous_mode=True,
|
||||
)
|
||||
transport_changed = (
|
||||
str(existing.get("transport", "direct")) != "direct"
|
||||
or str(existing.get("socks_proxy", "")) != ""
|
||||
str(existing.get("transport", "direct")) != "tor_arti"
|
||||
or str(existing.get("socks_proxy", "")) != str(updated.get("socks_proxy", ""))
|
||||
or bool(existing.get("socks_dns", True)) is not True
|
||||
or bool(existing.get("anonymous_mode", False)) is not False
|
||||
or bool(existing.get("anonymous_mode", False)) is not True
|
||||
or bool(existing.get("enabled", False)) is not True
|
||||
)
|
||||
tor_result: dict[str, Any] = {"ok": False, "detail": "not started"}
|
||||
try:
|
||||
import asyncio
|
||||
from routers.ai_intel import _write_env_value
|
||||
from services.tor_hidden_service import tor_service
|
||||
|
||||
tor_result = await asyncio.to_thread(tor_service.start)
|
||||
if tor_result.get("ok"):
|
||||
_write_env_value("MESH_ARTI_ENABLED", "true")
|
||||
get_settings.cache_clear()
|
||||
except Exception as exc:
|
||||
tor_result = {"ok": False, "detail": str(exc or type(exc).__name__)}
|
||||
bootstrap_wormhole_identity()
|
||||
bootstrap_wormhole_persona_state()
|
||||
state = (
|
||||
@@ -893,7 +635,7 @@ async def api_wormhole_join(request: Request):
|
||||
)
|
||||
|
||||
# Enable node participation so the sync/push workers connect to peers.
|
||||
# This is the voluntary opt-in — the node only joins the network when
|
||||
# This is the voluntary opt-in — the node only joins the network when
|
||||
# the user explicitly opens the Wormhole.
|
||||
from services.node_settings import write_node_settings
|
||||
|
||||
@@ -905,19 +647,19 @@ async def api_wormhole_join(request: Request):
|
||||
"identity": get_transport_identity(),
|
||||
"runtime": state,
|
||||
"settings": updated,
|
||||
"tor": tor_result,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/wormhole/leave", dependencies=[Depends(require_local_operator)])
|
||||
@router.post("/api/wormhole/leave")
|
||||
@limiter.limit("10/minute")
|
||||
async def api_wormhole_leave(request: Request):
|
||||
updated = write_wormhole_settings(enabled=False)
|
||||
state = disconnect_wormhole(reason="leave_wormhole")
|
||||
|
||||
# Disable node participation when the user leaves the Wormhole.
|
||||
from services.node_settings import write_node_settings
|
||||
|
||||
write_node_settings(enabled=False)
|
||||
# Leaving private DM mode must not disable Infonet participation. Infonet
|
||||
# sync has its own private transport warmup and can remain connected to
|
||||
# seed/peer nodes while MeshChat stays separately opt-in.
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
@@ -927,7 +669,7 @@ async def api_wormhole_leave(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/wormhole/identity", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_wormhole_identity(request: Request):
|
||||
try:
|
||||
bootstrap_wormhole_persona_state()
|
||||
@@ -956,7 +698,7 @@ async def api_wormhole_identity_bootstrap(request: Request):
|
||||
|
||||
|
||||
@router.get("/api/wormhole/dm/identity", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_wormhole_dm_identity(request: Request):
|
||||
try:
|
||||
bootstrap_wormhole_persona_state()
|
||||
@@ -968,11 +710,37 @@ async def api_wormhole_dm_identity(request: Request):
|
||||
|
||||
@router.get("/api/wormhole/dm/invite", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_invite(request: Request):
|
||||
return export_wormhole_dm_invite()
|
||||
async def api_wormhole_dm_invite(
|
||||
request: Request,
|
||||
label: str = Query("", max_length=96),
|
||||
expires_in_s: int = Query(0, ge=0, le=2_592_000),
|
||||
):
|
||||
return export_wormhole_dm_invite(label=label, expires_in_s=expires_in_s)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/invite/import", dependencies=[Depends(require_admin)])
|
||||
@router.get("/api/wormhole/dm/invite/handles", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("240/minute")
|
||||
async def api_wormhole_dm_invite_handles(request: Request):
|
||||
return list_prekey_lookup_handle_records_for_ui()
|
||||
|
||||
|
||||
@router.patch("/api/wormhole/dm/invite/handles/{handle}", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_wormhole_dm_invite_handle_update(
|
||||
request: Request,
|
||||
handle: str,
|
||||
body: WormholeDmInviteHandleUpdateRequest,
|
||||
):
|
||||
return rename_prekey_lookup_handle(handle, str(body.label or "").strip())
|
||||
|
||||
|
||||
@router.delete("/api/wormhole/dm/invite/handles/{handle}", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_invite_handle_revoke(request: Request, handle: str):
|
||||
return revoke_prekey_lookup_handle(handle)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/dm/invite/import", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_dm_invite_import(request: Request, body: WormholeDmInviteImportRequest):
|
||||
return import_wormhole_dm_invite(
|
||||
@@ -1191,7 +959,7 @@ async def api_wormhole_gate_message_sign_encrypted(
|
||||
return await _m.api_wormhole_gate_message_sign_encrypted(request, body)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/gate/message/post-encrypted", dependencies=[Depends(require_local_operator)])
|
||||
@router.post("/api/wormhole/gate/message/post-encrypted")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_wormhole_gate_message_post_encrypted(
|
||||
request: Request,
|
||||
@@ -1533,7 +1301,7 @@ class PrivateDeliveryActionRequest(BaseModel):
|
||||
|
||||
|
||||
@router.get("/api/wormhole/status")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_wormhole_status(request: Request):
|
||||
import main as _m
|
||||
|
||||
@@ -1576,7 +1344,7 @@ async def api_wormhole_private_delivery_action(
|
||||
|
||||
|
||||
@router.get("/api/wormhole/health")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("240/minute")
|
||||
async def api_wormhole_health(request: Request):
|
||||
state = get_wormhole_state()
|
||||
transport_tier = _current_private_lane_tier(state)
|
||||
@@ -1597,7 +1365,7 @@ async def api_wormhole_health(request: Request):
|
||||
return _redact_wormhole_status(full_state, authenticated=ok)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/connect", dependencies=[Depends(require_admin)])
|
||||
@router.post("/api/wormhole/connect", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_wormhole_connect(request: Request):
|
||||
settings = read_wormhole_settings()
|
||||
|
||||
@@ -20,7 +20,17 @@ OUT_PATH = Path(__file__).parent.parent / "data" / "power_plants.json"
|
||||
|
||||
def main() -> None:
|
||||
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
# Round 7a: release-time data refresher. Uses the per-operator UA if
|
||||
# available, otherwise a release-script-specific identifier. This
|
||||
# script is run by the maintainer at release time, NOT at runtime,
|
||||
# so an aggregate UA is acceptable; we still use the helper so the
|
||||
# behavior matches the rest of the project.
|
||||
try:
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua = outbound_user_agent("release-script-power-plants")
|
||||
except Exception:
|
||||
ua = "Shadowbroker/0.9 (release-script-power-plants; +https://github.com/BigBodyCobain/Shadowbroker/issues)"
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
|
||||
|
||||
@@ -17,6 +17,18 @@ AIS_WS_URL = "wss://stream.aisstream.io/v0/stream"
|
||||
API_KEY = os.environ.get("AIS_API_KEY", "")
|
||||
|
||||
|
||||
def _env_truthy(name: str) -> bool:
|
||||
return str(os.getenv(name, "")).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def ais_stream_proxy_enabled() -> bool:
|
||||
"""Return whether the external Node AIS proxy may be started."""
|
||||
setting = str(os.getenv("SHADOWBROKER_ENABLE_AIS_STREAM_PROXY", "")).strip().lower()
|
||||
if setting:
|
||||
return _env_truthy("SHADOWBROKER_ENABLE_AIS_STREAM_PROXY")
|
||||
return True
|
||||
|
||||
|
||||
# AIS vessel type code classification
|
||||
# See: https://coast.noaa.gov/data/marinecadastre/ais/VesselTypeCodes2018.pdf
|
||||
def classify_vessel(ais_type: int, mmsi: int) -> str:
|
||||
@@ -327,16 +339,117 @@ def get_country_from_mmsi(mmsi: int) -> str:
|
||||
|
||||
# Global vessel store: MMSI → vessel dict
|
||||
_vessels: dict[int, dict] = {}
|
||||
_vessel_trails: dict[int, dict] = {}
|
||||
_vessels_lock = threading.Lock()
|
||||
_ws_thread: threading.Thread | None = None
|
||||
_ws_running = False
|
||||
_proxy_process = None
|
||||
# Issue #258: latest status snapshot emitted by ais_proxy.js. Populated when
|
||||
# the proxy reports e.g. {"__ais_proxy_status": {"degraded_tls": true}} on
|
||||
# stdout, which it does when it falls back to the SPKI-pinned insecure-date
|
||||
# path during an upstream cert outage. Surfaced via ais_proxy_status() for
|
||||
# /api/health.
|
||||
_proxy_status: dict = {}
|
||||
# Upstream-connectivity telemetry (added when stream.aisstream.io went fully
|
||||
# offline on 2026-05-23). ``_last_msg_at`` is the unix timestamp of the most
|
||||
# recent vessel message received from the proxy. ``_proxy_spawn_count`` is
|
||||
# how many times we've started the node proxy; combined with no recent
|
||||
# messages it tells us the proxy is respawning in a tight loop because the
|
||||
# upstream is unreachable. Surfaced via ais_proxy_status() so the operator
|
||||
# can see "AIS is dead" instead of guessing whether it's their map filter,
|
||||
# their api key, or upstream.
|
||||
_last_msg_at: float = 0.0
|
||||
_proxy_spawn_count: int = 0
|
||||
_VESSEL_TRAIL_INTERVAL_S = 120
|
||||
_VESSEL_TRAIL_MAX_POINTS = 240
|
||||
|
||||
|
||||
# How stale "last vessel message" can be before we consider the stream
|
||||
# disconnected. AISStream typically pushes multiple messages/sec, so a 60s
|
||||
# gap means something's wrong upstream or in transit.
|
||||
_AIS_CONNECTED_FRESHNESS_S = 60
|
||||
|
||||
|
||||
def ais_proxy_status() -> dict:
|
||||
"""Return a copy of the latest ais_proxy.js status + connectivity health.
|
||||
|
||||
Fields:
|
||||
* ``degraded_tls`` (bool, issue #258) — true when the proxy is using
|
||||
SPKI-pinned fallback because AISStream's cert expired.
|
||||
* ``connected`` (bool) — true when we received a vessel message in
|
||||
the last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
|
||||
* ``last_msg_age_seconds`` (int | None) — seconds since the last
|
||||
vessel message; None if we've never received one.
|
||||
* ``proxy_spawn_count`` (int) — how many times we've spawned the
|
||||
node proxy. Sustained increases here without ``connected`` means
|
||||
we're respawning in a tight loop because upstream is dead.
|
||||
|
||||
Returns an empty dict when called before the AIS subsystem starts
|
||||
(e.g. during tests or when no API key is set).
|
||||
"""
|
||||
with _vessels_lock:
|
||||
status = dict(_proxy_status)
|
||||
last = _last_msg_at
|
||||
spawns = _proxy_spawn_count
|
||||
|
||||
now = time.time()
|
||||
if last > 0:
|
||||
last_age = int(now - last)
|
||||
status["last_msg_age_seconds"] = last_age
|
||||
status["connected"] = last_age <= _AIS_CONNECTED_FRESHNESS_S
|
||||
else:
|
||||
status["last_msg_age_seconds"] = None
|
||||
status["connected"] = False
|
||||
status["proxy_spawn_count"] = spawns
|
||||
return status
|
||||
|
||||
import os
|
||||
|
||||
CACHE_FILE = os.path.join(os.path.dirname(__file__), "ais_cache.json")
|
||||
|
||||
|
||||
def _record_vessel_trail_locked(mmsi: int, lat, lng, sog=0, now_ts: float | None = None) -> None:
|
||||
"""Append a sampled AIS trail point. Caller must hold _vessels_lock."""
|
||||
if lat is None or lng is None:
|
||||
return
|
||||
try:
|
||||
lat_f = float(lat)
|
||||
lng_f = float(lng)
|
||||
except (TypeError, ValueError):
|
||||
return
|
||||
if abs(lat_f) > 90 or abs(lng_f) > 180 or (lat_f == 0 and lng_f == 0):
|
||||
return
|
||||
now = now_ts or time.time()
|
||||
trail_data = _vessel_trails.setdefault(int(mmsi), {"points": [], "last_seen": now})
|
||||
point = [round(lat_f, 5), round(lng_f, 5), round(float(sog or 0), 1), round(now)]
|
||||
last_point_ts = trail_data["points"][-1][3] if trail_data["points"] else 0
|
||||
if now - last_point_ts < _VESSEL_TRAIL_INTERVAL_S:
|
||||
trail_data["last_seen"] = now
|
||||
return
|
||||
if (
|
||||
trail_data["points"]
|
||||
and trail_data["points"][-1][0] == point[0]
|
||||
and trail_data["points"][-1][1] == point[1]
|
||||
):
|
||||
trail_data["last_seen"] = now
|
||||
return
|
||||
trail_data["points"].append(point)
|
||||
trail_data["last_seen"] = now
|
||||
if len(trail_data["points"]) > _VESSEL_TRAIL_MAX_POINTS:
|
||||
trail_data["points"] = trail_data["points"][-_VESSEL_TRAIL_MAX_POINTS:]
|
||||
|
||||
|
||||
def get_vessel_trail(mmsi: int) -> list:
|
||||
"""Return the accumulated trail for a single vessel without expanding live payloads."""
|
||||
try:
|
||||
key = int(mmsi)
|
||||
except (TypeError, ValueError):
|
||||
return []
|
||||
with _vessels_lock:
|
||||
points = _vessel_trails.get(key, {}).get("points", [])
|
||||
return [list(point) for point in points]
|
||||
|
||||
|
||||
def _save_cache():
|
||||
"""Save vessel data to disk for persistence across restarts."""
|
||||
try:
|
||||
@@ -379,6 +492,7 @@ def prune_stale_vessels():
|
||||
stale_keys = [k for k, v in _vessels.items() if v.get("_updated", 0) < stale_cutoff]
|
||||
for k in stale_keys:
|
||||
del _vessels[k]
|
||||
_vessel_trails.pop(k, None)
|
||||
if stale_keys:
|
||||
logger.info(f"AIS pruned {len(stale_keys)} stale vessels")
|
||||
|
||||
@@ -447,6 +561,7 @@ def ingest_ais_catcher(msgs: list[dict]) -> int:
|
||||
heading = msg.get("heading", 511)
|
||||
vessel["heading"] = heading if heading != 511 else vessel.get("cog", 0)
|
||||
vessel["_updated"] = now
|
||||
_record_vessel_trail_locked(mmsi, lat, lon, vessel["sog"], now)
|
||||
if msg.get("shipname"):
|
||||
vessel["name"] = msg["shipname"].strip()
|
||||
count += 1
|
||||
@@ -496,6 +611,12 @@ def _ais_stream_loop():
|
||||
logger.info("Starting Node.js AIS Stream Proxy...")
|
||||
proxy_env = os.environ.copy()
|
||||
proxy_env["AIS_API_KEY"] = API_KEY
|
||||
popen_kwargs = {}
|
||||
if os.name == "nt":
|
||||
popen_kwargs["creationflags"] = (
|
||||
getattr(subprocess, "CREATE_NO_WINDOW", 0)
|
||||
| getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
|
||||
)
|
||||
process = subprocess.Popen(
|
||||
["node", proxy_script],
|
||||
stdin=subprocess.PIPE,
|
||||
@@ -504,9 +625,12 @@ def _ais_stream_loop():
|
||||
text=True,
|
||||
bufsize=1,
|
||||
env=proxy_env,
|
||||
**popen_kwargs,
|
||||
)
|
||||
global _proxy_spawn_count
|
||||
with _vessels_lock:
|
||||
_proxy_process = process
|
||||
_proxy_spawn_count += 1
|
||||
|
||||
# Drain stderr in a background thread to prevent deadlock
|
||||
import threading
|
||||
@@ -542,6 +666,18 @@ def _ais_stream_loop():
|
||||
logger.error(f"AIS Stream error: {data['error']}")
|
||||
continue
|
||||
|
||||
# Issue #258: ais_proxy.js emits status markers (e.g.
|
||||
# {"__ais_proxy_status": {"degraded_tls": true}}) when the
|
||||
# SPKI-pinned fallback is in use. We snapshot the latest
|
||||
# status so the backend can expose it on /api/health.
|
||||
if isinstance(data, dict) and "__ais_proxy_status" in data:
|
||||
status = data.get("__ais_proxy_status") or {}
|
||||
if isinstance(status, dict):
|
||||
with _vessels_lock:
|
||||
_proxy_status.clear()
|
||||
_proxy_status.update(status)
|
||||
continue
|
||||
|
||||
msg_type = data.get("MessageType", "")
|
||||
metadata = data.get("MetaData", {})
|
||||
message = data.get("Message", {})
|
||||
@@ -550,9 +686,15 @@ def _ais_stream_loop():
|
||||
if not mmsi:
|
||||
continue
|
||||
|
||||
# Telemetry: stamp the timestamp of the most recent real
|
||||
# vessel message. ais_proxy_status() reads this to decide
|
||||
# whether the stream is currently "connected" — i.e. has
|
||||
# any data flowed in the last 60s.
|
||||
global _last_msg_at
|
||||
with _vessels_lock:
|
||||
_last_msg_at = time.time()
|
||||
if mmsi not in _vessels:
|
||||
_vessels[mmsi] = {"_updated": time.time()}
|
||||
_vessels[mmsi] = {"_updated": _last_msg_at}
|
||||
vessel = _vessels[mmsi]
|
||||
|
||||
# Update position from PositionReport or StandardClassBPositionReport
|
||||
@@ -576,7 +718,9 @@ def _ais_stream_loop():
|
||||
vessel["cog"] = report.get("Cog", 0)
|
||||
heading = report.get("TrueHeading", 511)
|
||||
vessel["heading"] = heading if heading != 511 else report.get("Cog", 0)
|
||||
vessel["_updated"] = time.time()
|
||||
now_ts = time.time()
|
||||
vessel["_updated"] = now_ts
|
||||
_record_vessel_trail_locked(mmsi, lat, lng, vessel["sog"], now_ts)
|
||||
# Use metadata name if we don't have one yet
|
||||
if not vessel.get("name") or vessel["name"] == "UNKNOWN":
|
||||
vessel["name"] = (
|
||||
@@ -646,6 +790,22 @@ def _run_ais_loop():
|
||||
def start_ais_stream():
|
||||
"""Start the AIS WebSocket stream in a background thread."""
|
||||
global _ws_thread, _ws_running
|
||||
|
||||
# Always load cached vessel data first so the ships layer can paint even
|
||||
# when live streaming is disabled or the upstream is unavailable.
|
||||
_load_cache()
|
||||
|
||||
if not API_KEY:
|
||||
logger.info("AIS_API_KEY not set — ship tracking disabled. Set AIS_API_KEY to enable.")
|
||||
return
|
||||
|
||||
if not ais_stream_proxy_enabled():
|
||||
logger.info(
|
||||
"AIS live stream proxy disabled for this runtime; using cached AIS vessels. "
|
||||
"Set SHADOWBROKER_ENABLE_AIS_STREAM_PROXY=1 to opt in."
|
||||
)
|
||||
return
|
||||
|
||||
with _vessels_lock:
|
||||
if _ws_running:
|
||||
logger.info("AIS Stream already running")
|
||||
@@ -656,9 +816,6 @@ def start_ais_stream():
|
||||
logger.info("AIS Stream already running")
|
||||
return
|
||||
|
||||
# Load cached vessel data from disk
|
||||
_load_cache()
|
||||
|
||||
_ws_thread = threading.Thread(target=_run_ais_loop, daemon=True, name="ais-stream")
|
||||
_ws_thread.start()
|
||||
logger.info("AIS Stream background thread started")
|
||||
|
||||
@@ -4,12 +4,21 @@ Keys are stored in the backend .env file and loaded via python-dotenv.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
# Path to the backend .env file
|
||||
ENV_PATH = Path(__file__).parent.parent / ".env"
|
||||
# Path to the example template that ships with the repo
|
||||
ENV_EXAMPLE_PATH = Path(__file__).parent.parent.parent / ".env.example"
|
||||
DATA_DIR = Path(os.environ.get("SB_DATA_DIR", str(Path(__file__).parent.parent / "data")))
|
||||
if not DATA_DIR.is_absolute():
|
||||
DATA_DIR = Path(__file__).parent.parent / DATA_DIR
|
||||
OPERATOR_KEYS_ENV_PATH = Path(
|
||||
os.environ.get("SHADOWBROKER_OPERATOR_KEYS_ENV", str(DATA_DIR / "operator_api_keys.env"))
|
||||
)
|
||||
_ENV_KEY_RE = re.compile(r"^[A-Z][A-Z0-9_]*$")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API Registry — every external service the dashboard depends on
|
||||
@@ -141,8 +150,112 @@ API_REGISTRY = [
|
||||
"url": "https://finnhub.io/register",
|
||||
"required": False,
|
||||
},
|
||||
# Issue #298 (tg12): Sentinel Hub / Copernicus Data Space Ecosystem
|
||||
# credentials were previously held in browser localStorage / sessionStorage
|
||||
# by the Settings panel. Moved server-side to the same .env-backed
|
||||
# store every other third-party API key lives in. The Sentinel proxy
|
||||
# routes (POST /api/sentinel/token, /tile) now fall back to these
|
||||
# env values when the request body omits credentials — see
|
||||
# backend/routers/tools.py for the resolution order.
|
||||
{
|
||||
"id": "sentinel_client_id",
|
||||
"env_key": "SENTINEL_CLIENT_ID",
|
||||
"name": "Sentinel Hub / Copernicus — Client ID",
|
||||
"description": "OAuth2 client ID for Copernicus Data Space Ecosystem (CDSE). Required for the Sentinel-2 imagery overlay and the right-click Sentinel-2 Intel Card. Sign in at dataspace.copernicus.eu and create OAuth credentials.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "sentinel_client_secret",
|
||||
"env_key": "SENTINEL_CLIENT_SECRET",
|
||||
"name": "Sentinel Hub / Copernicus — Client Secret",
|
||||
"description": "OAuth2 client secret paired with the Client ID above. Used by the backend to mint short-lived access tokens against the CDSE identity provider. Stored in the backend .env; never sent to the browser.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
|
||||
ALLOWED_ENV_KEYS = {
|
||||
str(api["env_key"])
|
||||
for api in API_REGISTRY
|
||||
if api.get("env_key")
|
||||
}
|
||||
|
||||
|
||||
def _parse_env_file(path: Path) -> dict[str, str]:
|
||||
values: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return values
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
except OSError:
|
||||
return values
|
||||
for raw_line in text.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
if not _ENV_KEY_RE.match(key):
|
||||
continue
|
||||
value = value.strip()
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
|
||||
value = value[1:-1]
|
||||
values[key] = value
|
||||
return values
|
||||
|
||||
|
||||
def _quote_env_value(value: str) -> str:
|
||||
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
||||
return f'"{escaped}"'
|
||||
|
||||
|
||||
def _write_env_values(path: Path, updates: dict[str, str]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lines = path.read_text(encoding="utf-8").splitlines() if path.exists() else []
|
||||
seen: set[str] = set()
|
||||
next_lines: list[str] = []
|
||||
for raw_line in lines:
|
||||
stripped = raw_line.strip()
|
||||
if "=" not in stripped or stripped.startswith("#"):
|
||||
next_lines.append(raw_line)
|
||||
continue
|
||||
key = stripped.split("=", 1)[0].strip()
|
||||
if key in updates:
|
||||
next_lines.append(f"{key}={_quote_env_value(updates[key])}")
|
||||
seen.add(key)
|
||||
else:
|
||||
next_lines.append(raw_line)
|
||||
for key, value in updates.items():
|
||||
if key not in seen:
|
||||
next_lines.append(f"{key}={_quote_env_value(value)}")
|
||||
|
||||
fd, tmp_name = tempfile.mkstemp(dir=str(path.parent), prefix=f"{path.name}.tmp.", text=True)
|
||||
tmp_path = Path(tmp_name)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8", newline="\n") as handle:
|
||||
handle.write("\n".join(next_lines).rstrip() + "\n")
|
||||
if os.name != "nt":
|
||||
os.chmod(tmp_path, 0o600)
|
||||
os.replace(tmp_path, path)
|
||||
if os.name != "nt":
|
||||
os.chmod(path, 0o600)
|
||||
finally:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def load_persisted_api_keys_into_environ() -> None:
|
||||
"""Load persisted operator API keys if no process env value exists."""
|
||||
for key, value in _parse_env_file(OPERATOR_KEYS_ENV_PATH).items():
|
||||
if key in ALLOWED_ENV_KEYS and value and not os.environ.get(key):
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
def get_env_path_info() -> dict:
|
||||
"""Return absolute paths for the backend .env and .env.example template.
|
||||
@@ -160,6 +273,10 @@ def get_env_path_info() -> dict:
|
||||
and (not env_path.exists() or os.access(env_path, os.W_OK)),
|
||||
"env_example_path": str(example_path),
|
||||
"env_example_path_exists": example_path.exists(),
|
||||
"operator_keys_env_path": str(OPERATOR_KEYS_ENV_PATH.resolve()),
|
||||
"operator_keys_env_path_exists": OPERATOR_KEYS_ENV_PATH.exists(),
|
||||
"operator_keys_env_path_writable": os.access(OPERATOR_KEYS_ENV_PATH.parent, os.W_OK)
|
||||
and (not OPERATOR_KEYS_ENV_PATH.exists() or os.access(OPERATOR_KEYS_ENV_PATH, os.W_OK)),
|
||||
}
|
||||
|
||||
|
||||
@@ -171,6 +288,7 @@ def get_api_keys():
|
||||
`is_set` to render a CONFIGURED / NOT CONFIGURED badge and the path
|
||||
info from `get_env_path_info()` to tell them where to put each key.
|
||||
"""
|
||||
load_persisted_api_keys_into_environ()
|
||||
result = []
|
||||
for api in API_REGISTRY:
|
||||
entry = {
|
||||
@@ -189,3 +307,57 @@ def get_api_keys():
|
||||
entry["is_set"] = bool(raw)
|
||||
result.append(entry)
|
||||
return result
|
||||
|
||||
|
||||
def save_api_keys(updates: dict[str, str]) -> dict:
|
||||
"""Persist allowed API keys from a local operator request.
|
||||
|
||||
Values are accepted write-only: the response includes only configured flags.
|
||||
"""
|
||||
clean: dict[str, str] = {}
|
||||
for key, value in updates.items():
|
||||
env_key = str(key or "").strip().upper()
|
||||
if env_key not in ALLOWED_ENV_KEYS:
|
||||
continue
|
||||
clean_value = str(value or "").strip()
|
||||
if clean_value:
|
||||
clean[env_key] = clean_value
|
||||
if not clean:
|
||||
return {"ok": False, "detail": "No supported API keys were provided."}
|
||||
|
||||
_write_env_values(OPERATOR_KEYS_ENV_PATH, clean)
|
||||
try:
|
||||
_write_env_values(ENV_PATH, clean)
|
||||
except OSError:
|
||||
# The persistent operator key file is the source of truth for Docker.
|
||||
pass
|
||||
for key, value in clean.items():
|
||||
os.environ[key] = value
|
||||
if "AIS_API_KEY" in clean:
|
||||
try:
|
||||
from services import ais_stream
|
||||
ais_stream.API_KEY = clean["AIS_API_KEY"]
|
||||
except Exception:
|
||||
pass
|
||||
if "OPENSKY_CLIENT_ID" in clean or "OPENSKY_CLIENT_SECRET" in clean:
|
||||
try:
|
||||
from services.fetchers import flights
|
||||
flights.opensky_client.client_id = os.environ.get("OPENSKY_CLIENT_ID", "")
|
||||
flights.opensky_client.client_secret = os.environ.get("OPENSKY_CLIENT_SECRET", "")
|
||||
flights.opensky_client.token = None
|
||||
flights.opensky_client.expires_at = 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from services.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"updated": sorted(clean.keys()),
|
||||
"keys": get_api_keys(),
|
||||
"env": get_env_path_info(),
|
||||
}
|
||||
|
||||
+407
-173
@@ -1,46 +1,90 @@
|
||||
"""
|
||||
Carrier Strike Group OSINT Tracker
|
||||
===================================
|
||||
Scrapes multiple OSINT sources to maintain current estimated positions
|
||||
for US Navy Carrier Strike Groups. Updates on startup + 00:00 & 12:00 UTC.
|
||||
Maintains estimated positions for US Navy Carrier Strike Groups with
|
||||
honest provenance and freshness signals.
|
||||
|
||||
Sources:
|
||||
1. GDELT News API — recent carrier movement headlines
|
||||
2. WikiVoyage / public port-call databases
|
||||
3. Fallback — last-known or static OSINT estimates
|
||||
Issues #244 / #245 / #246 (tg12 external audit):
|
||||
|
||||
The previous implementation baked a snapshot of USNI News Fleet &
|
||||
Marine Tracker positions (March 9, 2026) into the registry as
|
||||
``fallback_lat``/``fallback_lng`` and stamped ``updated = now()``
|
||||
every time the dossier was rendered. That presented stale editorial
|
||||
data as live state. It also persisted GDELT-derived positions to the
|
||||
on-disk cache with no freshness signal, so a single news mention from
|
||||
months ago could keep overriding the (already-stale) registry default
|
||||
indefinitely.
|
||||
|
||||
Architecture after this PR:
|
||||
|
||||
::
|
||||
|
||||
backend/data/carrier_seed.json read-only, shipped with image,
|
||||
used ONCE on first-ever startup
|
||||
to bootstrap carrier_cache.json.
|
||||
|
||||
backend/data/carrier_cache.json mutable, lives in the runtime data
|
||||
volume, written by every GDELT
|
||||
refresh + any future source.
|
||||
|
||||
Startup flow:
|
||||
|
||||
1. ``carrier_cache.json`` exists? → load it.
|
||||
2. Otherwise, copy ``carrier_seed.json`` → ``carrier_cache.json``,
|
||||
then load it. (This happens once, ever, per install.)
|
||||
3. Background: GDELT fetch runs. Any carrier mentioned in fresh news
|
||||
gets its entry replaced with the news-derived position.
|
||||
``position_source_at`` is set to the news article timestamp.
|
||||
|
||||
Freshness is a *labelling* decision, not an eviction decision:
|
||||
|
||||
- ``position_source_at`` within the configurable freshness window
|
||||
(default 14 days) → ``position_confidence = "recent"``.
|
||||
- Older than that → ``position_confidence = "stale"``.
|
||||
- Bootstrapped from the seed file (never updated) → ``"seed"``.
|
||||
- No cache entry at all (e.g. a carrier added to the registry after
|
||||
first install) → carrier renders at its homeport with
|
||||
``"homeport_default"``.
|
||||
|
||||
Carriers are never hidden, never teleported, never disappeared. The
|
||||
position the user sees is always the last position the system actually
|
||||
observed, with an honest "as-of" timestamp the UI can render however
|
||||
it likes. A year from now, the runtime cache reflects whatever this
|
||||
install has observed via GDELT — not the seed snapshot.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import random
|
||||
from datetime import datetime, timezone
|
||||
import shutil
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Carrier registry: hull number → metadata + fallback position
|
||||
# Carrier registry: hull number → identity only.
|
||||
#
|
||||
# Issue #244 (tg12): the previous registry carried hard-coded
|
||||
# ``fallback_lat``/``fallback_lng`` that were dated editorial
|
||||
# snapshots from a 2026-03-09 article. Those fields are DELETED. The
|
||||
# registry is now identity + homeport only; positions are sourced
|
||||
# exclusively from carrier_cache.json (and via that, from the
|
||||
# bootstrap seed or live OSINT).
|
||||
# -----------------------------------------------------------------
|
||||
CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
|
||||
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
|
||||
# --- Bremerton, WA (Naval Base Kitsap) ---
|
||||
# Distinct pier positions along Sinclair Inlet so carriers don't stack
|
||||
"CVN-68": {
|
||||
"name": "USS Nimitz (CVN-68)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5535,
|
||||
"homeport_lng": -122.6400,
|
||||
"fallback_lat": 47.5535,
|
||||
"fallback_lng": -122.6400,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Maintenance)",
|
||||
},
|
||||
"CVN-76": {
|
||||
"name": "USS Ronald Reagan (CVN-76)",
|
||||
@@ -48,23 +92,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5580,
|
||||
"homeport_lng": -122.6360,
|
||||
"fallback_lat": 47.5580,
|
||||
"fallback_lng": -122.6360,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Decommissioning)",
|
||||
},
|
||||
# --- Norfolk, VA (Naval Station Norfolk) ---
|
||||
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
|
||||
"CVN-69": {
|
||||
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9465,
|
||||
"homeport_lng": -76.3265,
|
||||
"fallback_lat": 36.9465,
|
||||
"fallback_lng": -76.3265,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
},
|
||||
"CVN-78": {
|
||||
"name": "USS Gerald R. Ford (CVN-78)",
|
||||
@@ -72,10 +107,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9505,
|
||||
"homeport_lng": -76.3250,
|
||||
"fallback_lat": 18.0,
|
||||
"fallback_lng": 39.5,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
"CVN-74": {
|
||||
"name": "USS John C. Stennis (CVN-74)",
|
||||
@@ -83,10 +114,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9540,
|
||||
"homeport_lng": -76.3235,
|
||||
"fallback_lat": 36.98,
|
||||
"fallback_lng": -76.43,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
},
|
||||
"CVN-75": {
|
||||
"name": "USS Harry S. Truman (CVN-75)",
|
||||
@@ -94,10 +121,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9580,
|
||||
"homeport_lng": -76.3220,
|
||||
"fallback_lat": 36.0,
|
||||
"fallback_lng": 15.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
},
|
||||
"CVN-77": {
|
||||
"name": "USS George H.W. Bush (CVN-77)",
|
||||
@@ -105,23 +128,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9620,
|
||||
"homeport_lng": -76.3210,
|
||||
"fallback_lat": 36.5,
|
||||
"fallback_lng": -74.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
},
|
||||
# --- San Diego, CA (Naval Base San Diego) ---
|
||||
# Carrier piers along the east shore of San Diego Bay, spread N-S
|
||||
"CVN-70": {
|
||||
"name": "USS Carl Vinson (CVN-70)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6840,
|
||||
"homeport_lng": -117.1290,
|
||||
"fallback_lat": 32.6840,
|
||||
"fallback_lng": -117.1290,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Homeport)",
|
||||
},
|
||||
"CVN-71": {
|
||||
"name": "USS Theodore Roosevelt (CVN-71)",
|
||||
@@ -129,10 +143,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6885,
|
||||
"homeport_lng": -117.1280,
|
||||
"fallback_lat": 32.6885,
|
||||
"fallback_lng": -117.1280,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Maintenance)",
|
||||
},
|
||||
"CVN-72": {
|
||||
"name": "USS Abraham Lincoln (CVN-72)",
|
||||
@@ -140,10 +150,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6925,
|
||||
"homeport_lng": -117.1275,
|
||||
"fallback_lat": 20.0,
|
||||
"fallback_lng": 64.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
# --- Yokosuka, Japan (CFAY) ---
|
||||
"CVN-73": {
|
||||
@@ -152,16 +158,18 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Yokosuka, Japan",
|
||||
"homeport_lat": 35.2830,
|
||||
"homeport_lng": 139.6700,
|
||||
"fallback_lat": 35.2830,
|
||||
"fallback_lng": 139.6700,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "Yokosuka, Japan (Forward deployed)",
|
||||
},
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Region → approximate center coordinates
|
||||
# Used to map textual geographic descriptions to lat/lng
|
||||
# Region → approximate center coordinates.
|
||||
#
|
||||
# Issue #245 (tg12): converting a region name straight into precise
|
||||
# map coordinates is false precision. We still use this table to
|
||||
# infer a coarse position from a headline mention, but the resulting
|
||||
# carrier object is now stamped ``position_confidence = "approximate"``
|
||||
# so the UI can render an uncertainty radius / dimmed icon. The
|
||||
# centroid is a best-effort midpoint of the named body of water.
|
||||
# -----------------------------------------------------------------
|
||||
REGION_COORDS: Dict[str, tuple] = {
|
||||
# Oceans & Seas
|
||||
@@ -220,9 +228,39 @@ REGION_COORDS: Dict[str, tuple] = {
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Cache file for persisting positions between restarts
|
||||
# Files
|
||||
# -----------------------------------------------------------------
|
||||
CACHE_FILE = Path(__file__).parent.parent / "carrier_cache.json"
|
||||
#
|
||||
# The seed lives in the read-only image data dir (it ships with each
|
||||
# release). The cache lives in the same data dir but is written at
|
||||
# runtime; under Docker compose this dir is volume-mounted so the
|
||||
# cache persists across container restarts, which is the whole point
|
||||
# of the seed-then-observe model — the user's runtime observations
|
||||
# survive image upgrades.
|
||||
SEED_FILE = Path(__file__).parent.parent / "data" / "carrier_seed.json"
|
||||
CACHE_FILE = Path(__file__).parent.parent / "data" / "carrier_cache.json"
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Freshness window for position_confidence labeling. Issue #246 (tg12):
|
||||
# previously persisted cache entries had no freshness signal at all.
|
||||
# After this change, the position itself is preserved (we never lose
|
||||
# what was last observed) but the confidence label flips from
|
||||
# "recent" to "stale" once the underlying source is older than this
|
||||
# window. Operator-overridable via env var.
|
||||
# -----------------------------------------------------------------
|
||||
_DEFAULT_FRESHNESS_WINDOW_DAYS = 14
|
||||
|
||||
|
||||
def _freshness_window_days() -> int:
|
||||
raw = str(os.environ.get("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "") or "").strip()
|
||||
if not raw:
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
try:
|
||||
n = int(raw)
|
||||
return n if n > 0 else _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
|
||||
|
||||
_carrier_positions: Dict[str, dict] = {}
|
||||
_positions_lock = threading.Lock()
|
||||
@@ -234,25 +272,159 @@ _GDELT_REQUEST_DELAY_SECONDS = 1.25
|
||||
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _parse_iso(ts: str) -> Optional[datetime]:
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
# Python's fromisoformat accepts +00:00 but not 'Z' until 3.11.
|
||||
normalized = ts.replace("Z", "+00:00")
|
||||
dt = datetime.fromisoformat(normalized)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _compute_position_confidence(entry: dict, *, now: Optional[datetime] = None) -> str:
|
||||
"""Return the public confidence label for a carrier cache entry.
|
||||
|
||||
Order of precedence:
|
||||
- explicit "homeport_default" / "seed" labels are preserved.
|
||||
- dated entries (with position_source_at) are "recent" if within
|
||||
the configured freshness window, else "stale".
|
||||
- missing position_source_at falls through to "stale".
|
||||
"""
|
||||
raw_label = str(entry.get("position_confidence", "") or "").strip()
|
||||
# Explicit "kind of provenance" labels are preserved as-is. They
|
||||
# describe HOW we got the position, not WHEN — a fresh headline-to-
|
||||
# centroid match (#245) is still imprecise no matter how recently
|
||||
# it was observed, and the seed (#244) is always the seed.
|
||||
if raw_label in {"seed", "homeport_default", "approximate"}:
|
||||
# Approximate entries can still age into "stale_approximate" if
|
||||
# they fall out of the freshness window — that distinction lets
|
||||
# the UI render a different badge for old-and-imprecise vs
|
||||
# recent-and-imprecise. seed/homeport_default never age (they
|
||||
# were never timestamped against real observations).
|
||||
if raw_label == "approximate":
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if source_at is not None:
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
if reference - source_at > timedelta(days=_freshness_window_days()):
|
||||
return "stale_approximate"
|
||||
return raw_label
|
||||
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if not source_at:
|
||||
return "stale"
|
||||
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
window = timedelta(days=_freshness_window_days())
|
||||
if reference - source_at <= window:
|
||||
return "recent"
|
||||
return "stale"
|
||||
|
||||
|
||||
def _load_seed() -> Dict[str, dict]:
|
||||
"""Load the read-only seed file shipped with the image.
|
||||
|
||||
Returns a hull→entry dict (no _meta wrapper). Missing or malformed
|
||||
seed files yield an empty dict — the caller falls back to homeport
|
||||
defaults.
|
||||
"""
|
||||
try:
|
||||
if not SEED_FILE.exists():
|
||||
logger.info("Carrier seed file not present at %s; first-run will fall back to homeport defaults", SEED_FILE)
|
||||
return {}
|
||||
raw = json.loads(SEED_FILE.read_text(encoding="utf-8"))
|
||||
carriers = raw.get("carriers", {}) if isinstance(raw, dict) else {}
|
||||
if not isinstance(carriers, dict):
|
||||
return {}
|
||||
logger.info("Carrier seed loaded: %d entries from %s", len(carriers), SEED_FILE)
|
||||
return carriers
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning("Failed to load carrier seed file %s: %s", SEED_FILE, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _load_cache() -> Dict[str, dict]:
|
||||
"""Load cached carrier positions from disk."""
|
||||
"""Load the mutable cache (last-known positions persisted between restarts)."""
|
||||
try:
|
||||
if CACHE_FILE.exists():
|
||||
data = json.loads(CACHE_FILE.read_text())
|
||||
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
|
||||
return data
|
||||
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
logger.info("Carrier cache loaded: %d carriers from %s", len(data), CACHE_FILE)
|
||||
return data
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Failed to load carrier cache: {e}")
|
||||
logger.warning("Failed to load carrier cache: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(positions: Dict[str, dict]):
|
||||
"""Persist carrier positions to disk."""
|
||||
def _save_cache(positions: Dict[str, dict]) -> None:
|
||||
"""Persist the mutable cache. Atomic write (temp + rename) so a crash
|
||||
mid-write can't leave the file truncated."""
|
||||
try:
|
||||
CACHE_FILE.write_text(json.dumps(positions, indent=2))
|
||||
logger.info(f"Carrier cache saved: {len(positions)} carriers")
|
||||
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = CACHE_FILE.with_suffix(CACHE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(positions, indent=2), encoding="utf-8")
|
||||
# On Windows os.replace is atomic and overwrites existing files.
|
||||
os.replace(tmp, CACHE_FILE)
|
||||
logger.info("Carrier cache saved: %d carriers", len(positions))
|
||||
except (IOError, OSError) as e:
|
||||
logger.warning(f"Failed to save carrier cache: {e}")
|
||||
logger.warning("Failed to save carrier cache: %s", e)
|
||||
|
||||
|
||||
def _homeport_entry_for(hull: str) -> Optional[dict]:
|
||||
"""Return a homeport-default cache entry for a hull, or None if the
|
||||
hull is not in the registry."""
|
||||
info = CARRIER_REGISTRY.get(hull)
|
||||
if not info:
|
||||
return None
|
||||
return {
|
||||
"lat": info["homeport_lat"],
|
||||
"lng": info["homeport_lng"],
|
||||
"heading": 0,
|
||||
"desc": f"{info['homeport']} (no observations yet)",
|
||||
"source": f"Homeport default ({info['homeport']})",
|
||||
"source_url": info.get("wiki", ""),
|
||||
"position_source_at": _now_iso(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
|
||||
|
||||
def _bootstrap_cache_if_missing() -> Dict[str, dict]:
|
||||
"""One-shot: if no cache exists, materialize one from the seed file.
|
||||
|
||||
Returns the cache contents (hull→entry). On first-ever startup,
|
||||
this writes ``carrier_cache.json`` so subsequent restarts skip the
|
||||
seed entirely. Operator-deleted caches re-bootstrap the same way —
|
||||
operators can use that to "reset" carrier positions, but it's an
|
||||
explicit operator action.
|
||||
"""
|
||||
if CACHE_FILE.exists():
|
||||
return _load_cache()
|
||||
|
||||
seed = _load_seed()
|
||||
if not seed:
|
||||
# No seed file either. Build a homeport-default cache so the
|
||||
# first save_cache call still produces something honest.
|
||||
homeports: Dict[str, dict] = {}
|
||||
for hull in CARRIER_REGISTRY:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
homeports[hull] = entry
|
||||
if homeports:
|
||||
_save_cache(homeports)
|
||||
return homeports
|
||||
|
||||
# Persist the seed as the first cache so subsequent runs skip this branch.
|
||||
_save_cache(seed)
|
||||
logger.info("Carrier cache bootstrapped from seed (first-ever startup)")
|
||||
return dict(seed)
|
||||
|
||||
|
||||
def _match_region(text: str) -> Optional[tuple]:
|
||||
@@ -270,10 +442,8 @@ def _match_carrier(text: str) -> Optional[str]:
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
hull_check = hull.lower().replace("-", "")
|
||||
name_parts = info["name"].lower()
|
||||
# Match hull number (e.g., "CVN-78", "CVN78")
|
||||
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
||||
return hull
|
||||
# Match ship name (e.g., "Ford", "Eisenhower", "Vinson")
|
||||
ship_name = name_parts.split("(")[0].strip()
|
||||
last_name = ship_name.split()[-1] if ship_name else ""
|
||||
if last_name and len(last_name) > 3 and last_name in text_lower:
|
||||
@@ -323,8 +493,9 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
articles = data.get("articles", [])
|
||||
for art in articles:
|
||||
title = art.get("title", "")
|
||||
url = art.get("url", "")
|
||||
results.append({"title": title, "url": url})
|
||||
article_url = art.get("url", "")
|
||||
article_at = art.get("seendate") or art.get("date") or ""
|
||||
results.append({"title": title, "url": article_url, "seendate": article_at})
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.debug(f"GDELT search failed for '{term}': {e}")
|
||||
continue
|
||||
@@ -340,108 +511,175 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
return results
|
||||
|
||||
|
||||
def _gdelt_seendate_to_iso(seendate: str) -> Optional[str]:
|
||||
"""GDELT returns YYYYMMDDhhmmss (UTC). Convert to ISO8601 for
|
||||
position_source_at. Returns None if the input is unparseable."""
|
||||
raw = (seendate or "").strip()
|
||||
if len(raw) < 8 or not raw.isdigit():
|
||||
return None
|
||||
try:
|
||||
dt = datetime.strptime(raw[:14] if len(raw) >= 14 else raw[:8] + "000000", "%Y%m%d%H%M%S")
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
||||
"""Parse carrier positions from news article titles and descriptions."""
|
||||
"""Parse carrier positions from news article titles.
|
||||
|
||||
Issue #245 (tg12): the position is a region centroid, which is
|
||||
coarse — we now stamp ``position_confidence = "approximate"`` so
|
||||
the UI can render that uncertainty. Issue #244: the
|
||||
``position_source_at`` field is the news article's actual seen
|
||||
date, NOT now(), so the freshness check correctly flips entries
|
||||
to "stale" once they age past the configured window.
|
||||
"""
|
||||
updates: Dict[str, dict] = {}
|
||||
|
||||
for article in articles:
|
||||
title = article.get("title", "")
|
||||
|
||||
# Try to match a carrier from the title
|
||||
hull = _match_carrier(title)
|
||||
if not hull:
|
||||
continue
|
||||
|
||||
# Try to match a region from the title
|
||||
coords = _match_region(title)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
# Only update if we haven't seen this carrier yet (first match wins — most recent)
|
||||
# First match wins (most recent article, GDELT returns newest first
|
||||
# per term).
|
||||
if hull not in updates:
|
||||
iso_at = _gdelt_seendate_to_iso(str(article.get("seendate", ""))) or _now_iso()
|
||||
updates[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": title[:100],
|
||||
"source": "GDELT News API",
|
||||
"source": "GDELT News API (headline region match — approximate)",
|
||||
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
"position_source_at": iso_at,
|
||||
# Headline-to-centroid match is explicitly approximate.
|
||||
"position_confidence": "approximate",
|
||||
}
|
||||
logger.info(
|
||||
f"Carrier update: {CARRIER_REGISTRY[hull]['name']} → {coords} (from: {title[:80]})"
|
||||
"Carrier update: %s → %s (from: %s)",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
coords,
|
||||
title[:80],
|
||||
)
|
||||
|
||||
return updates
|
||||
|
||||
|
||||
def _load_carrier_fallbacks() -> Dict[str, dict]:
|
||||
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
|
||||
positions: Dict[str, dict] = {}
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
positions[hull] = {
|
||||
"name": info["name"],
|
||||
"lat": info["fallback_lat"],
|
||||
"lng": info["fallback_lng"],
|
||||
"heading": info["fallback_heading"],
|
||||
"desc": info["fallback_desc"],
|
||||
"wiki": info["wiki"],
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
# Overlay cached positions from previous runs (may have GDELT data)
|
||||
cached = _load_cache()
|
||||
for hull, cached_pos in cached.items():
|
||||
if hull in positions:
|
||||
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get(
|
||||
"source", ""
|
||||
).startswith("News"):
|
||||
positions[hull].update(
|
||||
{
|
||||
"lat": cached_pos["lat"],
|
||||
"lng": cached_pos["lng"],
|
||||
"desc": cached_pos.get("desc", positions[hull]["desc"]),
|
||||
"source": cached_pos.get("source", "Cached OSINT"),
|
||||
"updated": cached_pos.get("updated", ""),
|
||||
}
|
||||
)
|
||||
return positions
|
||||
def _enrich_for_rendering(hull: str, entry: dict, *, now: Optional[datetime] = None) -> dict:
|
||||
"""Add live computed fields (confidence label, last_osint_update)
|
||||
on top of the persisted cache entry. The persisted entry is left
|
||||
untouched; this function builds the public-facing object.
|
||||
"""
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
confidence = _compute_position_confidence(entry, now=now)
|
||||
return {
|
||||
"name": entry.get("name", info.get("name", hull)),
|
||||
"lat": entry["lat"],
|
||||
"lng": entry["lng"],
|
||||
"heading": entry.get("heading", 0),
|
||||
"desc": entry.get("desc", ""),
|
||||
"wiki": entry.get("wiki", info.get("wiki", "")),
|
||||
"source": entry.get("source", "OSINT estimated position"),
|
||||
"source_url": entry.get("source_url", ""),
|
||||
"position_source_at": entry.get("position_source_at", ""),
|
||||
"position_confidence": confidence,
|
||||
# Existing field preserved for backward compatibility with the
|
||||
# current frontend ShipPopup; now reflects the SOURCE's observed
|
||||
# time (not now()), so "last reported X days ago" is honest.
|
||||
"last_osint_update": entry.get("position_source_at", ""),
|
||||
# Convenience boolean for the UI: true when the position is
|
||||
# NOT live OSINT (used to render dimmed icons / badges).
|
||||
"is_fallback": confidence in {"seed", "stale", "stale_approximate", "homeport_default"},
|
||||
}
|
||||
|
||||
|
||||
def update_carrier_positions():
|
||||
"""Main update function — called on startup and every 12h.
|
||||
def update_carrier_positions() -> None:
|
||||
"""Refresh carrier positions.
|
||||
|
||||
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
|
||||
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
|
||||
Phase 1 (instant): publish whatever's in carrier_cache.json (or
|
||||
bootstrap from seed on first-ever run), so the map has carriers
|
||||
immediately.
|
||||
|
||||
Phase 2 (slow): query GDELT and replace position entries for any
|
||||
carrier mentioned in fresh news. Persist back to cache.
|
||||
"""
|
||||
global _last_update
|
||||
|
||||
# --- Phase 1: instant fallback + cache ---
|
||||
positions = _load_carrier_fallbacks()
|
||||
# --- Phase 1: instant cache (bootstrap from seed on first-ever run) ---
|
||||
positions = _bootstrap_cache_if_missing()
|
||||
|
||||
# Ensure every registered hull has SOMETHING in the cache. A hull
|
||||
# the seed didn't cover (e.g. added after install) renders at its
|
||||
# homeport with "homeport_default" confidence.
|
||||
for hull in CARRIER_REGISTRY:
|
||||
if hull not in positions:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
positions[hull] = entry
|
||||
|
||||
with _positions_lock:
|
||||
# Only overwrite if positions are currently empty (first startup).
|
||||
# If we already have data from a previous cycle, keep it while GDELT runs.
|
||||
if not _carrier_positions:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)"
|
||||
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
)
|
||||
|
||||
# --- Phase 2: slow GDELT enrichment ---
|
||||
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||
#
|
||||
# USNI publishes a weekly editorial tracker with each carrier's
|
||||
# actual operating area, parsed from explicit prose like
|
||||
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
# These positions are tagged ``position_confidence: "recent"`` because
|
||||
# they reflect actual reporting, not headline-keyword centroids.
|
||||
# USNI updates are preferred over GDELT — they're authoritative on
|
||||
# US Navy positions where GDELT is just article-title text mining.
|
||||
try:
|
||||
from services.fetchers.usni_fleet_tracker import (
|
||||
fetch_latest_fleet_tracker_positions,
|
||||
)
|
||||
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||
for hull, pos in usni_positions.items():
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier USNI update: %s → %s",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
pos.get("desc", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||
|
||||
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||
#
|
||||
# Used only to backfill carriers USNI didn't mention this week. The
|
||||
# position is stamped ``approximate`` so the UI knows it's a
|
||||
# headline-centroid match (Issue #245).
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
if hull in positions:
|
||||
positions[hull].update(pos)
|
||||
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
|
||||
# Only overwrite if the existing entry is NOT a recent USNI
|
||||
# observation. A "recent" USNI position is higher-confidence
|
||||
# than a GDELT headline-centroid match — don't let GDELT
|
||||
# demote a real position to an approximate one.
|
||||
existing = positions.get(hull, {})
|
||||
existing_conf = _compute_position_confidence(existing)
|
||||
if existing_conf == "recent":
|
||||
continue
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier OSINT: updated %s from GDELT news",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
)
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"GDELT carrier fetch failed: {e}")
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
|
||||
# Save and update the global state with enriched positions
|
||||
with _positions_lock:
|
||||
_carrier_positions.clear()
|
||||
_carrier_positions.update(positions)
|
||||
@@ -449,21 +687,15 @@ def update_carrier_positions():
|
||||
|
||||
_save_cache(positions)
|
||||
|
||||
sources = {}
|
||||
for p in positions.values():
|
||||
src = p.get("source", "unknown")
|
||||
sources[src] = sources.get(src, 0) + 1
|
||||
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
|
||||
confidences: Dict[str, int] = {}
|
||||
for entry in positions.values():
|
||||
label = _compute_position_confidence(entry)
|
||||
confidences[label] = confidences.get(label, 0) + 1
|
||||
logger.info("Carrier tracker: %d carriers updated. Confidence: %s", len(positions), confidences)
|
||||
|
||||
|
||||
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
"""Offset carriers that share identical coordinates so they don't stack.
|
||||
|
||||
At port: offset along the pier axis (~500m / 0.004° apart).
|
||||
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
|
||||
so they're visibly separate but clearly operating together.
|
||||
"""
|
||||
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
|
||||
"""Offset carriers that share identical coordinates so they don't stack."""
|
||||
from collections import defaultdict
|
||||
|
||||
groups: dict[str, list[int]] = defaultdict(list)
|
||||
@@ -475,7 +707,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
if len(indices) < 2:
|
||||
continue
|
||||
n = len(indices)
|
||||
# Determine if this is a port (near a homeport) or at sea
|
||||
sample = result[indices[0]]
|
||||
at_port = any(
|
||||
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
||||
@@ -484,7 +715,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
)
|
||||
|
||||
if at_port:
|
||||
# Use each carrier's distinct homeport pier coordinates
|
||||
for idx in indices:
|
||||
carrier = result[idx]
|
||||
hull = None
|
||||
@@ -497,8 +727,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
carrier["lat"] = info["homeport_lat"]
|
||||
carrier["lng"] = info["homeport_lng"]
|
||||
else:
|
||||
# At sea: spread in a line perpendicular to travel (~0.08° apart)
|
||||
spacing = 0.08 # ~9km — close enough to see they're together
|
||||
spacing = 0.08
|
||||
start_offset = -(n - 1) * spacing / 2
|
||||
for j, idx in enumerate(indices):
|
||||
result[idx]["lng"] += start_offset + j * spacing
|
||||
@@ -507,36 +736,44 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
|
||||
|
||||
def get_carrier_positions() -> List[dict]:
|
||||
"""Return current carrier positions for the data pipeline."""
|
||||
"""Return current carrier positions for the data pipeline.
|
||||
|
||||
Each entry has the full provenance + freshness fields; the UI can
|
||||
decide how to render them. Carriers are never hidden — only
|
||||
labeled.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
with _positions_lock:
|
||||
result = []
|
||||
for hull, pos in _carrier_positions.items():
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
result: List[dict] = []
|
||||
for hull, entry in _carrier_positions.items():
|
||||
enriched = _enrich_for_rendering(hull, entry, now=now)
|
||||
result.append(
|
||||
{
|
||||
"name": pos.get("name", info.get("name", hull)),
|
||||
"name": enriched["name"],
|
||||
"type": "carrier",
|
||||
"lat": pos["lat"],
|
||||
"lng": pos["lng"],
|
||||
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
|
||||
"lat": enriched["lat"],
|
||||
"lng": enriched["lng"],
|
||||
"heading": None, # OSINT cannot determine true heading.
|
||||
"sog": 0,
|
||||
"cog": 0,
|
||||
"country": "United States",
|
||||
"desc": pos.get("desc", ""),
|
||||
"wiki": pos.get("wiki", info.get("wiki", "")),
|
||||
"desc": enriched["desc"],
|
||||
"wiki": enriched["wiki"],
|
||||
"estimated": True,
|
||||
"source": pos.get("source", "OSINT estimated position"),
|
||||
"source_url": pos.get(
|
||||
"source_url", "https://news.usni.org/category/fleet-tracker"
|
||||
),
|
||||
"last_osint_update": pos.get("updated", ""),
|
||||
"source": enriched["source"],
|
||||
"source_url": enriched["source_url"],
|
||||
"last_osint_update": enriched["last_osint_update"],
|
||||
# New fields (additive — existing UI continues to work):
|
||||
"position_source_at": enriched["position_source_at"],
|
||||
"position_confidence": enriched["position_confidence"],
|
||||
"is_fallback": enriched["is_fallback"],
|
||||
}
|
||||
)
|
||||
return _deconflict_positions(result)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily.
|
||||
# -----------------------------------------------------------------
|
||||
_scheduler_thread: Optional[threading.Thread] = None
|
||||
_scheduler_stop = threading.Event()
|
||||
@@ -544,7 +781,6 @@ _scheduler_stop = threading.Event()
|
||||
|
||||
def _scheduler_loop():
|
||||
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
||||
# Initial update on startup
|
||||
try:
|
||||
update_carrier_positions()
|
||||
except Exception as e:
|
||||
@@ -552,7 +788,6 @@ def _scheduler_loop():
|
||||
|
||||
while not _scheduler_stop.is_set():
|
||||
now = datetime.now(timezone.utc)
|
||||
# Next target: 00:00 or 12:00 UTC, whichever is sooner
|
||||
hour = now.hour
|
||||
if hour < 12:
|
||||
next_hour = 12
|
||||
@@ -561,18 +796,17 @@ def _scheduler_loop():
|
||||
|
||||
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
||||
if next_hour == 24:
|
||||
from datetime import timedelta
|
||||
|
||||
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
wait_seconds = (next_run - now).total_seconds()
|
||||
logger.info(
|
||||
f"Carrier tracker: next update at {next_run.isoformat()} ({wait_seconds/3600:.1f}h)"
|
||||
"Carrier tracker: next update at %s (%.1fh)",
|
||||
next_run.isoformat(),
|
||||
wait_seconds / 3600,
|
||||
)
|
||||
|
||||
# Wait until next scheduled time, or until stop event
|
||||
if _scheduler_stop.wait(timeout=wait_seconds):
|
||||
break # Stop event was set
|
||||
break
|
||||
|
||||
try:
|
||||
update_carrier_positions()
|
||||
|
||||
@@ -987,7 +987,7 @@ _KML_NS = {"kml": "http://www.opengis.net/kml/2.2"}
|
||||
|
||||
def _find_kml_element(element, tag):
|
||||
"""Find first descendant matching tag, ignoring XML namespace prefix."""
|
||||
import xml.etree.ElementTree as ET
|
||||
import defusedxml.ElementTree as ET
|
||||
el = element.find(f".//{tag}")
|
||||
if el is not None:
|
||||
return el
|
||||
@@ -1015,7 +1015,7 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import xml.etree.ElementTree as ET
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
|
||||
@@ -32,17 +32,34 @@ class Settings(BaseSettings):
|
||||
MESH_ARTI_ENABLED: bool = False
|
||||
MESH_ARTI_SOCKS_PORT: int = 9050
|
||||
MESH_RELAY_PEERS: str = ""
|
||||
MESH_DEFAULT_SYNC_PEERS: str = "https://node.shadowbroker.info"
|
||||
MESH_PUBLIC_PEER_URL: str = ""
|
||||
# Bootstrap seeds are discovery hints, not authoritative network roots.
|
||||
# Nodes promote healthy discovered peers from the store/manifest over time.
|
||||
MESH_BOOTSTRAP_SEED_PEERS: str = "http://gqpbunqbgtkcqilvclm3xrkt3zowjyl3s62kkktvojgvxzizamvbrqid.onion:8000"
|
||||
# Legacy name kept for older compose/.env files.
|
||||
MESH_DEFAULT_SYNC_PEERS: str = ""
|
||||
# Infonet/Wormhole must fail closed to private transports by default.
|
||||
# Set true only for local relay development or explicitly public testnets.
|
||||
MESH_INFONET_ALLOW_CLEARNET_SYNC: bool = False
|
||||
MESH_BOOTSTRAP_DISABLED: bool = False
|
||||
MESH_BOOTSTRAP_MANIFEST_PATH: str = "data/bootstrap_peers.json"
|
||||
MESH_BOOTSTRAP_SIGNER_PUBLIC_KEY: str = ""
|
||||
MESH_NODE_MODE: str = "participant"
|
||||
MESH_SYNC_INTERVAL_S: int = 300
|
||||
MESH_SYNC_FAILURE_BACKOFF_S: int = 60
|
||||
MESH_SYNC_TIMEOUT_S: int = 5
|
||||
MESH_SYNC_MAX_PEERS_PER_CYCLE: int = 3
|
||||
MESH_RELAY_PUSH_TIMEOUT_S: int = 10
|
||||
MESH_RELAY_MAX_FAILURES: int = 3
|
||||
MESH_RELAY_FAILURE_COOLDOWN_S: int = 120
|
||||
MESH_BOOTSTRAP_SEED_FAILURE_COOLDOWN_S: int = 15
|
||||
MESH_PEER_PUSH_SECRET: str = ""
|
||||
# Issue #256 (tg12): optional per-peer HMAC secret map. Comma-separated
|
||||
# `url=secret` pairs. When a peer URL appears here, only that per-peer
|
||||
# secret is accepted for it — the global MESH_PEER_PUSH_SECRET above is
|
||||
# ignored for that specific URL. Single-peer installs and unmigrated
|
||||
# multi-peer installs leave this empty and behavior is unchanged.
|
||||
MESH_PEER_SECRETS: str = ""
|
||||
MESH_RNS_APP_NAME: str = "shadowbroker"
|
||||
MESH_RNS_ASPECT: str = "infonet"
|
||||
MESH_RNS_IDENTITY_PATH: str = ""
|
||||
@@ -100,6 +117,21 @@ class Settings(BaseSettings):
|
||||
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
||||
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
||||
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
||||
# Anti-spam: cap on distinct UNACKED messages a single sender can have
|
||||
# parked in a single recipient's mailbox at any one time. Once the
|
||||
# recipient pulls (acks) a message, the sender's quota for that pair
|
||||
# frees up. Default 2 — a sender who wants to deliver more must wait
|
||||
# for the recipient to actually read the prior messages.
|
||||
#
|
||||
# This cap is enforced TWICE: once on the local deposit path (the
|
||||
# sender's own node refuses to spool the 3rd message) AND once on
|
||||
# the replication-acceptance path (honest peer relays refuse to
|
||||
# accept inbound replicas that would put them over the cap). The
|
||||
# double enforcement makes the rule a NETWORK rule — patching out
|
||||
# the local check on a hostile sender's relay doesn't let extras
|
||||
# propagate, because every honest peer enforces the same cap on
|
||||
# inbound replication.
|
||||
MESH_DM_PENDING_PER_SENDER_LIMIT: int = 2
|
||||
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
||||
@@ -210,6 +242,7 @@ class Settings(BaseSettings):
|
||||
MESH_ALLOW_RAW_SECURE_STORAGE_FALLBACK: bool = False
|
||||
MESH_ACK_RAW_FALLBACK_AT_OWN_RISK: bool = False
|
||||
MESH_SECURE_STORAGE_SECRET: str = ""
|
||||
MESH_SECURE_STORAGE_SECRET_FILE: str = ""
|
||||
MESH_PRIVATE_LOG_TTL_S: int = 900
|
||||
# Sprint 1 rollout: restored DM boot probes stay disabled by default until
|
||||
# the architect reviews false positives from the observe-only path.
|
||||
@@ -278,6 +311,19 @@ class Settings(BaseSettings):
|
||||
# service operator can identify per-install traffic instead of a generic
|
||||
# "ShadowBroker" aggregate.
|
||||
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
||||
# Per-install operator handle used in the User-Agent for EVERY third-party
|
||||
# API the backend calls (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz,
|
||||
# Broadcastify, weather.gov, NUFORC, etc.). The default is empty, in which
|
||||
# case backend/services/network_utils.py auto-generates a stable
|
||||
# pseudonymous handle like "operator-7f3a92" on first use and caches it.
|
||||
# Operators who want to identify themselves with a real handle can set
|
||||
# this; operators who want to stay pseudonymous can leave it empty.
|
||||
#
|
||||
# The handle is sent ONLY to public third-party APIs. It is NEVER mixed
|
||||
# into mesh / Wormhole / Infonet identity (those have their own crypto
|
||||
# identity layer; conflating the two would leak public attribution into
|
||||
# private mesh state).
|
||||
OPERATOR_HANDLE: str = ""
|
||||
|
||||
# SAR (Synthetic Aperture Radar) data layer
|
||||
# Mode A — free catalog metadata, no account, default-on
|
||||
@@ -302,6 +348,11 @@ class Settings(BaseSettings):
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
try:
|
||||
from services.api_settings import load_persisted_api_keys_into_environ
|
||||
load_persisted_api_keys_into_environ()
|
||||
except Exception:
|
||||
pass
|
||||
return Settings()
|
||||
|
||||
|
||||
|
||||
@@ -11,8 +11,13 @@ DEFAULT_TRAIL_TTL_S = 300 # 5 min - trail TTL for non-tracked flights
|
||||
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
|
||||
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
|
||||
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
|
||||
GPS_JAMMING_MIN_RATIO = 0.30 # 30% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 5 # Min aircraft in grid cell for statistical significance
|
||||
# Tuned 2026-05: previously 0.30 / 5 aircraft which — combined with the
|
||||
# -1 noise cushion in the detector AND the pre-fix nac_p==0 filter that
|
||||
# discarded jamming victims — meant the layer almost never lit up.
|
||||
# Lowering the bar so genuine jamming zones with sparser ADS-B coverage
|
||||
# clear (eastern Med, Russia/Ukraine border, Iran/Iraq).
|
||||
GPS_JAMMING_MIN_RATIO = 0.20 # 20% degraded aircraft to flag zone
|
||||
GPS_JAMMING_MIN_AIRCRAFT = 3 # Min aircraft in grid cell for statistical significance
|
||||
|
||||
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
|
||||
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
|
||||
|
||||
@@ -19,6 +19,7 @@ import concurrent.futures
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
@@ -105,7 +106,7 @@ _SLOW_FETCH_S = float(os.environ.get("FETCH_SLOW_THRESHOLD_S", "5"))
|
||||
# Hard wall-clock limit per individual fetch task. A task that exceeds this
|
||||
# is treated as a failure so it cannot block an entire fetch tier indefinitely.
|
||||
_TASK_HARD_TIMEOUT_S = float(os.environ.get("FETCH_TASK_TIMEOUT_S", "120"))
|
||||
_FAST_STARTUP_CACHE_MAX_AGE_S = float(os.environ.get("FAST_STARTUP_CACHE_MAX_AGE_S", "300"))
|
||||
_FAST_STARTUP_CACHE_MAX_AGE_S = float(os.environ.get("FAST_STARTUP_CACHE_MAX_AGE_S", "21600"))
|
||||
_FAST_STARTUP_CACHE_PATH = Path(__file__).resolve().parents[1] / "data" / "fast_startup_cache.json"
|
||||
_FAST_STARTUP_CACHE_KEYS = (
|
||||
"commercial_flights",
|
||||
@@ -123,10 +124,32 @@ _FAST_STARTUP_CACHE_KEYS = (
|
||||
"sigint_totals",
|
||||
"trains",
|
||||
)
|
||||
_INTEL_STARTUP_CACHE_MAX_AGE_S = float(os.environ.get("INTEL_STARTUP_CACHE_MAX_AGE_S", "21600"))
|
||||
_INTEL_STARTUP_CACHE_PATH = Path(__file__).resolve().parents[1] / "data" / "intel_startup_cache.json"
|
||||
_INTEL_STARTUP_CACHE_KEYS = (
|
||||
"news",
|
||||
"gdelt",
|
||||
"liveuamap",
|
||||
"threat_level",
|
||||
"trending_markets",
|
||||
"correlations",
|
||||
"fimi",
|
||||
"crowdthreat",
|
||||
"uap_sightings",
|
||||
"military_bases",
|
||||
"wastewater",
|
||||
)
|
||||
_STARTUP_PRIORITY_TIMEOUT_S = float(os.environ.get("SHADOWBROKER_STARTUP_PRIORITY_TIMEOUT_S", "18"))
|
||||
_STARTUP_HEAVY_REFRESH_DELAY_S = float(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_REFRESH_DELAY_S", "90"))
|
||||
_STARTUP_HEAVY_REFRESH_STARTED = False
|
||||
_STARTUP_HEAVY_REFRESH_LOCK = threading.Lock()
|
||||
_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_FETCH_WORKERS", "8"))
|
||||
_SLOW_FETCH_CONCURRENCY = int(os.environ.get("SHADOWBROKER_SLOW_FETCH_CONCURRENCY", "4"))
|
||||
_STARTUP_HEAVY_CONCURRENCY = int(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY", "2"))
|
||||
|
||||
# Shared thread pool — reused across all fetch cycles instead of creating/destroying per tick
|
||||
_SHARED_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=20, thread_name_prefix="fetch"
|
||||
max_workers=max(2, _FETCH_WORKERS), thread_name_prefix="fetch"
|
||||
)
|
||||
|
||||
|
||||
@@ -140,6 +163,14 @@ def _cache_json_safe(value):
|
||||
return value
|
||||
|
||||
|
||||
def _has_cache_value(value) -> bool:
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, (list, tuple, dict, set)):
|
||||
return bool(value)
|
||||
return True
|
||||
|
||||
|
||||
def _load_fast_startup_cache_if_available() -> bool:
|
||||
"""Seed moving layers from a recent disk cache while live fetches warm up."""
|
||||
if _FAST_STARTUP_CACHE_MAX_AGE_S <= 0 or not _FAST_STARTUP_CACHE_PATH.exists():
|
||||
@@ -184,10 +215,15 @@ def _save_fast_startup_cache() -> None:
|
||||
"""Persist recent moving layers for the next cold start."""
|
||||
try:
|
||||
with _data_lock:
|
||||
layers = {
|
||||
key: latest_data.get(key)
|
||||
for key in _FAST_STARTUP_CACHE_KEYS
|
||||
if _has_cache_value(latest_data.get(key))
|
||||
}
|
||||
payload = {
|
||||
"cached_at": time.time(),
|
||||
"last_updated": latest_data.get("last_updated"),
|
||||
"layers": {key: latest_data.get(key) for key in _FAST_STARTUP_CACHE_KEYS},
|
||||
"layers": layers,
|
||||
"freshness": {
|
||||
key: source_timestamps.get(key)
|
||||
for key in _FAST_STARTUP_CACHE_KEYS
|
||||
@@ -204,14 +240,106 @@ def _save_fast_startup_cache() -> None:
|
||||
logger.debug("Fast startup cache save skipped: %s", e)
|
||||
|
||||
|
||||
def _load_intel_startup_cache_if_available() -> bool:
|
||||
"""Seed the right-side intelligence panel from disk while live feeds warm up."""
|
||||
if _INTEL_STARTUP_CACHE_MAX_AGE_S <= 0 or not _INTEL_STARTUP_CACHE_PATH.exists():
|
||||
return False
|
||||
try:
|
||||
with _INTEL_STARTUP_CACHE_PATH.open("r", encoding="utf-8") as fh:
|
||||
payload = json.load(fh)
|
||||
cached_at = float(payload.get("cached_at") or 0)
|
||||
age_s = time.time() - cached_at
|
||||
if cached_at <= 0 or age_s > _INTEL_STARTUP_CACHE_MAX_AGE_S:
|
||||
logger.info("Skipping stale intel startup cache (age %.1fs)", age_s)
|
||||
return False
|
||||
layers = payload.get("layers") or {}
|
||||
freshness = payload.get("freshness") or {}
|
||||
loaded: list[str] = []
|
||||
with _data_lock:
|
||||
for key in _INTEL_STARTUP_CACHE_KEYS:
|
||||
if key in layers:
|
||||
latest_data[key] = layers[key]
|
||||
loaded.append(key)
|
||||
for key, ts in freshness.items():
|
||||
source_timestamps[str(key)] = ts
|
||||
if payload.get("last_updated"):
|
||||
latest_data["last_updated"] = payload.get("last_updated")
|
||||
if not loaded:
|
||||
return False
|
||||
from services.fetchers._store import bump_data_version
|
||||
|
||||
bump_data_version()
|
||||
logger.info(
|
||||
"Loaded intel startup cache for %d layers (age %.1fs) so Global Threat Intercept can paint early",
|
||||
len(loaded),
|
||||
age_s,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("Intel startup cache load failed (non-fatal): %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _save_intel_startup_cache() -> None:
|
||||
"""Persist compact right-side intelligence data for the next cold start."""
|
||||
try:
|
||||
with _data_lock:
|
||||
layers = {
|
||||
key: latest_data.get(key)
|
||||
for key in _INTEL_STARTUP_CACHE_KEYS
|
||||
if _has_cache_value(latest_data.get(key))
|
||||
}
|
||||
payload = {
|
||||
"cached_at": time.time(),
|
||||
"last_updated": latest_data.get("last_updated"),
|
||||
"layers": layers,
|
||||
"freshness": {
|
||||
key: source_timestamps.get(key)
|
||||
for key in _INTEL_STARTUP_CACHE_KEYS
|
||||
if source_timestamps.get(key)
|
||||
},
|
||||
}
|
||||
safe_payload = _cache_json_safe(payload)
|
||||
_INTEL_STARTUP_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = _INTEL_STARTUP_CACHE_PATH.with_suffix(".tmp")
|
||||
with tmp_path.open("w", encoding="utf-8") as fh:
|
||||
json.dump(safe_payload, fh, separators=(",", ":"))
|
||||
tmp_path.replace(_INTEL_STARTUP_CACHE_PATH)
|
||||
except Exception as e:
|
||||
logger.debug("Intel startup cache save skipped: %s", e)
|
||||
|
||||
|
||||
def seed_startup_caches() -> None:
|
||||
"""Load disk-backed first-paint caches without touching remote services."""
|
||||
load_meshtastic_cache_if_available()
|
||||
_load_fast_startup_cache_if_available()
|
||||
_load_intel_startup_cache_if_available()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler & Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
def _run_tasks(label: str, funcs: list):
|
||||
def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
"""Run tasks concurrently and log any exceptions (do not fail silently)."""
|
||||
if not funcs:
|
||||
return
|
||||
futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in funcs}
|
||||
if max_concurrency is None:
|
||||
if label.startswith("slow-tier"):
|
||||
max_concurrency = _SLOW_FETCH_CONCURRENCY
|
||||
elif label.startswith("startup-heavy"):
|
||||
max_concurrency = _STARTUP_HEAVY_CONCURRENCY
|
||||
else:
|
||||
max_concurrency = len(funcs)
|
||||
max_concurrency = max(1, min(max_concurrency, len(funcs)))
|
||||
|
||||
remaining_funcs = list(funcs)
|
||||
while remaining_funcs:
|
||||
batch, remaining_funcs = remaining_funcs[:max_concurrency], remaining_funcs[max_concurrency:]
|
||||
futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
_drain_task_futures(label, futures)
|
||||
|
||||
|
||||
def _drain_task_futures(label: str, futures: dict):
|
||||
# Iterate directly so future.result(timeout=...) is the blocking call.
|
||||
# as_completed() blocks inside __next__() waiting for completion — the timeout
|
||||
# on result() would never be reached for a hanging task under that pattern.
|
||||
@@ -262,7 +390,6 @@ def update_fast_data():
|
||||
fetch_satellites,
|
||||
fetch_sigint,
|
||||
fetch_trains,
|
||||
fetch_tinygs,
|
||||
]
|
||||
_run_tasks("fast-tier", fast_funcs)
|
||||
with _data_lock:
|
||||
@@ -289,6 +416,7 @@ def update_slow_data():
|
||||
fetch_cctv,
|
||||
fetch_kiwisdr,
|
||||
fetch_satnogs,
|
||||
fetch_tinygs,
|
||||
fetch_frontlines,
|
||||
fetch_datacenters,
|
||||
fetch_military_bases,
|
||||
@@ -313,9 +441,76 @@ def update_slow_data():
|
||||
logger.error("Correlation engine failed: %s", e)
|
||||
from services.fetchers._store import bump_data_version
|
||||
bump_data_version()
|
||||
_save_intel_startup_cache()
|
||||
logger.info("Slow-tier update complete.")
|
||||
|
||||
|
||||
def _record_fetch_success(label: str, name: str, start: float) -> None:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_success
|
||||
|
||||
record_success(name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning(f"{label} task slow: {name} took {duration:.2f}s")
|
||||
|
||||
|
||||
def _record_fetch_failure(label: str, name: str, start: float, error: Exception) -> None:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(name, error=error, duration_s=duration)
|
||||
logger.exception(f"{label} task failed: {name}")
|
||||
|
||||
|
||||
def _load_cctv_cache_for_startup() -> None:
|
||||
"""Load cached CCTV rows without running remote ingestors during first paint."""
|
||||
try:
|
||||
fetch_cctv()
|
||||
except Exception as e:
|
||||
logger.warning("Startup CCTV cache load failed (non-fatal): %s", e)
|
||||
|
||||
|
||||
def _run_delayed_startup_heavy_refresh() -> None:
|
||||
if _STARTUP_HEAVY_REFRESH_DELAY_S > 0:
|
||||
logger.info(
|
||||
"Startup heavy synthesis delayed %.0fs so the dashboard can finish first paint",
|
||||
_STARTUP_HEAVY_REFRESH_DELAY_S,
|
||||
)
|
||||
time.sleep(_STARTUP_HEAVY_REFRESH_DELAY_S)
|
||||
logger.info("Startup heavy synthesis beginning (slow feeds, enrichment, daily products)...")
|
||||
_run_tasks(
|
||||
"startup-heavy",
|
||||
[
|
||||
update_slow_data,
|
||||
fetch_volcanoes,
|
||||
fetch_viirs_change_nodes,
|
||||
fetch_unusual_whales,
|
||||
fetch_fimi,
|
||||
fetch_uap_sightings,
|
||||
fetch_wastewater,
|
||||
fetch_sar_catalog,
|
||||
fetch_sar_products,
|
||||
],
|
||||
)
|
||||
logger.info("Startup heavy synthesis complete.")
|
||||
|
||||
|
||||
def _schedule_delayed_startup_heavy_refresh() -> None:
|
||||
global _STARTUP_HEAVY_REFRESH_STARTED
|
||||
if _STARTUP_HEAVY_REFRESH_DELAY_S < 0:
|
||||
logger.info("Startup heavy synthesis disabled by SHADOWBROKER_STARTUP_HEAVY_REFRESH_DELAY_S")
|
||||
return
|
||||
with _STARTUP_HEAVY_REFRESH_LOCK:
|
||||
if _STARTUP_HEAVY_REFRESH_STARTED:
|
||||
return
|
||||
_STARTUP_HEAVY_REFRESH_STARTED = True
|
||||
threading.Thread(
|
||||
target=_run_delayed_startup_heavy_refresh,
|
||||
name="startup-heavy-refresh",
|
||||
daemon=True,
|
||||
).start()
|
||||
|
||||
|
||||
def update_all_data(*, startup_mode: bool = False):
|
||||
"""Full refresh.
|
||||
|
||||
@@ -324,50 +519,79 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
"""
|
||||
logger.info("Full data update starting (parallel)...")
|
||||
# Preload Meshtastic map cache immediately (instant, from disk)
|
||||
load_meshtastic_cache_if_available()
|
||||
_load_fast_startup_cache_if_available()
|
||||
seed_startup_caches()
|
||||
with _data_lock:
|
||||
meshtastic_seeded = bool(latest_data.get("meshtastic_map_nodes"))
|
||||
futures = {
|
||||
_SHARED_EXECUTOR.submit(fetch_airports): ("fetch_airports", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(update_fast_data): ("update_fast_data", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(update_slow_data): ("update_slow_data", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_volcanoes): ("fetch_volcanoes", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_viirs_change_nodes): ("fetch_viirs_change_nodes", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_unusual_whales): ("fetch_unusual_whales", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_fimi): ("fetch_fimi", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_gdelt): ("fetch_gdelt", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(update_liveuamap): ("update_liveuamap", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_uap_sightings): ("fetch_uap_sightings", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_wastewater): ("fetch_wastewater", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_crowdthreat): ("fetch_crowdthreat", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_sar_catalog): ("fetch_sar_catalog", time.perf_counter()),
|
||||
_SHARED_EXECUTOR.submit(fetch_sar_products): ("fetch_sar_products", time.perf_counter()),
|
||||
}
|
||||
if startup_mode:
|
||||
_load_cctv_cache_for_startup()
|
||||
priority_funcs = [
|
||||
fetch_airports,
|
||||
update_fast_data,
|
||||
fetch_news,
|
||||
fetch_gdelt,
|
||||
fetch_crowdthreat,
|
||||
fetch_firms_fires,
|
||||
fetch_weather_alerts,
|
||||
]
|
||||
if not meshtastic_seeded:
|
||||
priority_funcs.append(fetch_meshtastic_nodes)
|
||||
else:
|
||||
logger.info(
|
||||
"Startup preload: Meshtastic cache already loaded, deferring remote map refresh to scheduled cadence"
|
||||
)
|
||||
logger.info("Startup priority preload starting (%d tasks)...", len(priority_funcs))
|
||||
cycle_start = time.perf_counter()
|
||||
futures = {
|
||||
_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter())
|
||||
for func in priority_funcs
|
||||
}
|
||||
for future, (name, start) in futures.items():
|
||||
remaining = _STARTUP_PRIORITY_TIMEOUT_S - (time.perf_counter() - cycle_start)
|
||||
if remaining <= 0:
|
||||
logger.info("Startup priority budget reached; %s will continue in background", name)
|
||||
continue
|
||||
try:
|
||||
future.result(timeout=remaining)
|
||||
_record_fetch_success("startup-priority", name, start)
|
||||
except concurrent.futures.TimeoutError:
|
||||
logger.info(
|
||||
"Startup priority task still warming after %.1fs: %s",
|
||||
time.perf_counter() - start,
|
||||
name,
|
||||
)
|
||||
except Exception as e:
|
||||
_record_fetch_failure("startup-priority", name, start, e)
|
||||
logger.info("Startup preload: deferring Playwright Liveuamap scraper to scheduled cadence")
|
||||
_save_intel_startup_cache()
|
||||
_schedule_delayed_startup_heavy_refresh()
|
||||
logger.info("Startup priority preload complete; slow synthesis is warming in background.")
|
||||
return
|
||||
refresh_funcs = [
|
||||
fetch_airports,
|
||||
update_fast_data,
|
||||
update_slow_data,
|
||||
fetch_volcanoes,
|
||||
fetch_viirs_change_nodes,
|
||||
fetch_unusual_whales,
|
||||
fetch_fimi,
|
||||
fetch_gdelt,
|
||||
fetch_uap_sightings,
|
||||
fetch_wastewater,
|
||||
fetch_crowdthreat,
|
||||
fetch_sar_catalog,
|
||||
fetch_sar_products,
|
||||
]
|
||||
if not startup_mode or not meshtastic_seeded:
|
||||
futures[_SHARED_EXECUTOR.submit(fetch_meshtastic_nodes)] = (
|
||||
"fetch_meshtastic_nodes",
|
||||
time.perf_counter(),
|
||||
)
|
||||
refresh_funcs.append(fetch_meshtastic_nodes)
|
||||
else:
|
||||
logger.info(
|
||||
"Startup preload: Meshtastic cache already loaded, deferring remote map refresh to scheduled cadence"
|
||||
)
|
||||
for future, (name, start) in futures.items():
|
||||
try:
|
||||
future.result(timeout=_TASK_HARD_TIMEOUT_S)
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_success
|
||||
|
||||
record_success(name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning(f"full-refresh task slow: {name} took {duration:.2f}s")
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(name, error=e, duration_s=duration)
|
||||
logger.exception(f"full-refresh task failed: {name}")
|
||||
if not startup_mode:
|
||||
refresh_funcs.append(update_liveuamap)
|
||||
else:
|
||||
logger.info("Startup preload: deferring Playwright Liveuamap scraper to scheduled cadence")
|
||||
_run_tasks("full-refresh", refresh_funcs, max_concurrency=_STARTUP_HEAVY_CONCURRENCY)
|
||||
# Run CCTV ingest immediately so cameras are available on first request
|
||||
# (the scheduled job also runs every 10 min for ongoing refresh).
|
||||
if startup_mode:
|
||||
@@ -408,7 +632,7 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
|
||||
|
||||
_scheduler = None
|
||||
_STARTUP_CCTV_INGEST_DELAY_S = 30
|
||||
_STARTUP_CCTV_INGEST_DELAY_S = int(os.environ.get("SHADOWBROKER_STARTUP_CCTV_INGEST_DELAY_S", "180"))
|
||||
_FINANCIAL_REFRESH_MINUTES = 30
|
||||
|
||||
|
||||
@@ -553,6 +777,39 @@ def start_scheduler():
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# Flight observation pruning — drops icao24 → first_seen_at entries we
|
||||
# haven't seen in an hour. Same cadence as AIS prune for symmetry; the
|
||||
# per-tick scan is O(in-flight aircraft) so it's cheap.
|
||||
from services.fetchers.flight_observations import prune as _prune_flight_observations
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(_prune_flight_observations, "prune_flight_observations"),
|
||||
"interval",
|
||||
minutes=5,
|
||||
id="flight_observation_prune",
|
||||
max_instances=1,
|
||||
misfire_grace_time=60,
|
||||
)
|
||||
|
||||
# AISHub REST fallback — slow polling when the AISStream WebSocket
|
||||
# primary is offline. Configurable interval via
|
||||
# AISHUB_POLL_INTERVAL_MINUTES env (default 20 min). Operator must
|
||||
# set AISHUB_USERNAME to opt in. The fetcher is gated internally on
|
||||
# the primary being disconnected, so this job is cheap when the
|
||||
# WebSocket is healthy (early-returns after a status check).
|
||||
from services.fetchers.aishub_fallback import (
|
||||
aishub_poll_interval_minutes,
|
||||
fetch_aishub_vessels,
|
||||
)
|
||||
_aishub_interval = aishub_poll_interval_minutes()
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_aishub_vessels, "fetch_aishub_vessels"),
|
||||
"interval",
|
||||
minutes=_aishub_interval,
|
||||
id="aishub_fallback",
|
||||
max_instances=1,
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
|
||||
# days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
|
||||
# and broken upstream). Airline schedules change on a quarterly cycle,
|
||||
@@ -736,16 +993,19 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# UAP sightings (NUFORC) — daily at 12:00 UTC
|
||||
# UAP sightings (NUFORC) — weekly on Mondays at 12:00 UTC. The layer is a
|
||||
# rolling last-60-days digest; refreshing once a week is enough cadence
|
||||
# for human-readable map exploration and keeps load on nuforc.org light.
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(
|
||||
lambda: fetch_uap_sightings(force_refresh=True),
|
||||
"fetch_uap_sightings",
|
||||
),
|
||||
"cron",
|
||||
day_of_week="mon",
|
||||
hour=12,
|
||||
minute=0,
|
||||
id="uap_sightings_daily",
|
||||
id="uap_sightings_weekly",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
@@ -16,8 +16,15 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _feed_ingester_user_agent() -> str:
|
||||
# Round 7a: per-install attribution for operator-curated feed URLs.
|
||||
return outbound_user_agent("feed-ingester")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -157,7 +164,7 @@ def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
||||
resp = requests.get(
|
||||
feed_url,
|
||||
timeout=_FETCH_TIMEOUT,
|
||||
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
||||
headers={"User-Agent": _feed_ingester_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -318,7 +318,7 @@ active_layers: dict[str, bool] = {
|
||||
"uap_sightings": True,
|
||||
"wastewater": True,
|
||||
"ai_intel": True,
|
||||
"crowdthreat": True,
|
||||
"crowdthreat": False,
|
||||
"sar": True,
|
||||
}
|
||||
|
||||
|
||||
@@ -16,11 +16,18 @@ import csv
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any
|
||||
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _aircraft_db_user_agent() -> str:
|
||||
"""Round 7a: lazy import so the per-install operator handle is included."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("aircraft-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUCKET_LIST_URL = (
|
||||
@@ -31,11 +38,7 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_LIST_TIMEOUT_S = 30
|
||||
_DOWNLOAD_TIMEOUT_S = 600
|
||||
_USER_AGENT = (
|
||||
"ShadowBroker-OSINT/0.9.7 "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"contact: bigbodycobain@gmail.com)"
|
||||
)
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_aircraft_by_hex: dict[str, dict[str, str]] = {}
|
||||
@@ -48,7 +51,7 @@ def _latest_snapshot_key() -> str:
|
||||
response = requests.get(
|
||||
_BUCKET_LIST_URL,
|
||||
timeout=_LIST_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.text)
|
||||
@@ -75,7 +78,7 @@ def _stream_csv_index(url: str) -> dict[str, dict[str, str]]:
|
||||
url,
|
||||
timeout=_DOWNLOAD_TIMEOUT_S,
|
||||
stream=True,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
line_iter = (
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
"""AISHub REST fallback for ship tracking when AISStream is unreachable.
|
||||
|
||||
Background
|
||||
----------
|
||||
On 2026-05-23 ``stream.aisstream.io`` (the primary live AIS WebSocket feed)
|
||||
went fully offline. Backend's only ship signal vanished. This module polls
|
||||
``data.aishub.net``'s free REST API on a slow cadence (default 20 min) when
|
||||
the WebSocket primary is disconnected, so the ships layer doesn't go fully
|
||||
dark during upstream outages.
|
||||
|
||||
Why 20 minutes
|
||||
--------------
|
||||
AISHub's free tier is rate-limited and explicitly asks consumers to be
|
||||
courteous. 20 minutes is well inside their limits, gives ships time to
|
||||
move enough to look "alive" on the map, and won't drain their service.
|
||||
Configurable via the ``AISHUB_POLL_INTERVAL_MINUTES`` env var (clamped to
|
||||
[1, 360]).
|
||||
|
||||
Why slow vs primary
|
||||
-------------------
|
||||
This is degraded mode, not a replacement. A ship at 20 knots moves about
|
||||
6 nautical miles in 20 minutes — visible on the map but coarser than the
|
||||
real-time WebSocket signal. When AISStream comes back online, the
|
||||
WebSocket data will overwrite these records via the same ``_vessels``
|
||||
dict and ``source`` will flip from ``"aishub"`` back to upstream-live.
|
||||
|
||||
Opt-in
|
||||
------
|
||||
Operator must set ``AISHUB_USERNAME`` (free registration at
|
||||
https://www.aishub.net/api). If unset, this fetcher is a no-op.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
AISHUB_URL = "https://data.aishub.net/ws.php"
|
||||
|
||||
|
||||
def aishub_username() -> str:
|
||||
return str(os.environ.get("AISHUB_USERNAME", "")).strip()
|
||||
|
||||
|
||||
def aishub_fallback_enabled() -> bool:
|
||||
"""Returns True only when the operator has registered with AISHub and
|
||||
set ``AISHUB_USERNAME``. The presence of the username is the opt-in."""
|
||||
return bool(aishub_username())
|
||||
|
||||
|
||||
def aishub_poll_interval_minutes() -> int:
|
||||
"""Default 20 minutes. Clamped to [1, 360] so a hostile or
|
||||
misconfigured env var can't either hammer the upstream or silence the
|
||||
fallback for a day."""
|
||||
raw = os.environ.get("AISHUB_POLL_INTERVAL_MINUTES", "20")
|
||||
try:
|
||||
value = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
value = 20
|
||||
return max(1, min(360, value))
|
||||
|
||||
|
||||
def _should_run_fallback() -> bool:
|
||||
"""Only run when the primary WebSocket is disconnected. Avoids stomping
|
||||
over fresher live data when AISStream is healthy.
|
||||
|
||||
Returns False if:
|
||||
* AISHub isn't configured (no username)
|
||||
* AISStream primary is currently connected (recent vessel messages)
|
||||
|
||||
Returns True only when AIS is configured-but-down. The
|
||||
``proxy_spawn_count > 0`` guard means "the primary has at least tried
|
||||
to run" — if the user set AISHUB_USERNAME but not AIS_API_KEY at all,
|
||||
AISHub will still serve as a primary on its own slow cadence.
|
||||
"""
|
||||
if not aishub_fallback_enabled():
|
||||
return False
|
||||
try:
|
||||
from services.ais_stream import ais_proxy_status
|
||||
status = ais_proxy_status() or {}
|
||||
except Exception:
|
||||
return True # ais_stream not importable? still try AISHub.
|
||||
# If the WebSocket primary is connected, skip the fallback — fresher
|
||||
# data is already flowing.
|
||||
if status.get("connected") is True:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _parse_aishub_response(payload: str) -> list[dict]:
|
||||
"""Parse the AISHub JSON response into a list of vessel records.
|
||||
|
||||
Successful response shape::
|
||||
|
||||
[
|
||||
{"ERROR": false, "USERNAME": "...", "FORMAT": "1", "RECORDS": N},
|
||||
[{"MMSI": ..., "LATITUDE": ..., "LONGITUDE": ..., ...}, ...]
|
||||
]
|
||||
|
||||
Error response shape::
|
||||
|
||||
[{"ERROR": true, "ERROR_MESSAGE": "..."}]
|
||||
|
||||
Empty payload (e.g. silent rate-limit drop) returns ``[]``.
|
||||
"""
|
||||
if not payload or not payload.strip():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("AISHub: response is not JSON: %s", e)
|
||||
return []
|
||||
if not isinstance(data, list) or not data:
|
||||
return []
|
||||
header = data[0] if isinstance(data[0], dict) else {}
|
||||
if header.get("ERROR") is True:
|
||||
logger.warning(
|
||||
"AISHub: upstream error: %s",
|
||||
header.get("ERROR_MESSAGE", "<unspecified>"),
|
||||
)
|
||||
return []
|
||||
if len(data) < 2 or not isinstance(data[1], list):
|
||||
return []
|
||||
return [row for row in data[1] if isinstance(row, dict)]
|
||||
|
||||
|
||||
def _normalize_record(row: dict) -> dict | None:
|
||||
"""Map an AISHub vessel record to our internal vessel schema.
|
||||
|
||||
Returns None when the record can't be used (no MMSI, bad position,
|
||||
sentinel "not available" lat/lng).
|
||||
"""
|
||||
try:
|
||||
mmsi = int(row.get("MMSI") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not mmsi:
|
||||
return None
|
||||
try:
|
||||
lat = float(row.get("LATITUDE"))
|
||||
lng = float(row.get("LONGITUDE"))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
# AIS uses 91/181 as "no position available" sentinels.
|
||||
if abs(lat) > 90 or abs(lng) > 180:
|
||||
return None
|
||||
if lat == 91.0 or lng == 181.0:
|
||||
return None
|
||||
# SOG raw 102.3 is "speed not available"; sanitize to 0.
|
||||
try:
|
||||
sog_raw = float(row.get("SOG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
sog_raw = 0.0
|
||||
sog = 0.0 if sog_raw >= 102.2 else sog_raw
|
||||
try:
|
||||
cog = float(row.get("COG") or 0)
|
||||
except (TypeError, ValueError):
|
||||
cog = 0.0
|
||||
try:
|
||||
heading_raw = int(row.get("HEADING") or 511)
|
||||
except (TypeError, ValueError):
|
||||
heading_raw = 511
|
||||
# AIS heading sentinel 511 = "not available" — fall back to COG.
|
||||
heading = heading_raw if heading_raw != 511 else cog
|
||||
try:
|
||||
ais_type = int(row.get("TYPE") or 0)
|
||||
except (TypeError, ValueError):
|
||||
ais_type = 0
|
||||
return {
|
||||
"mmsi": mmsi,
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"sog": sog,
|
||||
"cog": cog,
|
||||
"heading": heading,
|
||||
"name": str(row.get("NAME") or "").strip() or "UNKNOWN",
|
||||
"callsign": str(row.get("CALLSIGN") or "").strip(),
|
||||
"destination": str(row.get("DEST") or "").strip().replace("@", "") or "",
|
||||
"imo": int(row.get("IMO") or 0),
|
||||
"ais_type_code": ais_type,
|
||||
}
|
||||
|
||||
|
||||
def fetch_aishub_vessels() -> int:
|
||||
"""Poll AISHub and merge vessels into the shared ``_vessels`` store.
|
||||
|
||||
Returns the number of vessels updated (0 on skip, error, or no data).
|
||||
Designed to be called by the APScheduler tier — see
|
||||
``data_fetcher.py`` for the 20-minute interval job that wraps this.
|
||||
"""
|
||||
if not _should_run_fallback():
|
||||
logger.debug("AISHub fallback skipped: primary connected or not configured")
|
||||
return 0
|
||||
|
||||
username = aishub_username()
|
||||
url = (
|
||||
f"{AISHUB_URL}?username={username}&format=1&output=json"
|
||||
f"&compress=0"
|
||||
)
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=30)
|
||||
except Exception as e:
|
||||
logger.warning("AISHub fetch failed: %s", e)
|
||||
return 0
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
logger.warning(
|
||||
"AISHub HTTP %s",
|
||||
getattr(response, "status_code", "None"),
|
||||
)
|
||||
return 0
|
||||
|
||||
rows = _parse_aishub_response(getattr(response, "text", "") or "")
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
# Inline imports to avoid a circular dependency at module load time
|
||||
# (ais_stream imports lots of things and is loaded by main.py).
|
||||
from services.ais_stream import (
|
||||
_vessels,
|
||||
_vessels_lock,
|
||||
_record_vessel_trail_locked,
|
||||
classify_vessel,
|
||||
get_country_from_mmsi,
|
||||
)
|
||||
|
||||
now = time.time()
|
||||
count = 0
|
||||
with _vessels_lock:
|
||||
for row in rows:
|
||||
normalized = _normalize_record(row)
|
||||
if normalized is None:
|
||||
continue
|
||||
mmsi = normalized["mmsi"]
|
||||
vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
|
||||
# Don't overwrite fresher live data: if the WebSocket pushed an
|
||||
# update for this MMSI more recently than now-1s (race during
|
||||
# the brief reconnection window) keep the live one.
|
||||
last = float(vessel.get("_updated") or 0)
|
||||
if last > now - 1:
|
||||
continue
|
||||
vessel.update(
|
||||
{
|
||||
"lat": normalized["lat"],
|
||||
"lng": normalized["lng"],
|
||||
"sog": normalized["sog"],
|
||||
"cog": normalized["cog"],
|
||||
"heading": normalized["heading"],
|
||||
"_updated": now,
|
||||
"source": "aishub",
|
||||
}
|
||||
)
|
||||
if normalized["name"] and normalized["name"] != "UNKNOWN":
|
||||
vessel["name"] = normalized["name"]
|
||||
if normalized["callsign"]:
|
||||
vessel["callsign"] = normalized["callsign"]
|
||||
if normalized["destination"]:
|
||||
vessel["destination"] = normalized["destination"]
|
||||
if normalized["imo"]:
|
||||
vessel["imo"] = normalized["imo"]
|
||||
if normalized["ais_type_code"]:
|
||||
vessel["ais_type_code"] = normalized["ais_type_code"]
|
||||
vessel["type"] = classify_vessel(normalized["ais_type_code"], mmsi)
|
||||
if not vessel.get("country"):
|
||||
vessel["country"] = get_country_from_mmsi(mmsi)
|
||||
_record_vessel_trail_locked(
|
||||
mmsi,
|
||||
normalized["lat"],
|
||||
normalized["lng"],
|
||||
normalized["sog"],
|
||||
now,
|
||||
)
|
||||
count += 1
|
||||
|
||||
if count:
|
||||
logger.info(
|
||||
"AISHub fallback: merged %d vessels (poll interval %d min)",
|
||||
count,
|
||||
aishub_poll_interval_minutes(),
|
||||
)
|
||||
return count
|
||||
@@ -7,6 +7,7 @@ No API key required — the /threats endpoint is unauthenticated.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh, is_any_active
|
||||
@@ -16,6 +17,16 @@ logger = logging.getLogger("services.data_fetcher")
|
||||
|
||||
_CT_BASE = "https://backend.crowdthreat.world"
|
||||
|
||||
|
||||
def crowdthreat_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into CrowdThreat pulls."""
|
||||
return str(os.environ.get("CROWDTHREAT_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
# CrowdThreat category_id → icon ID used on the MapLibre layer
|
||||
_CATEGORY_ICON = {
|
||||
1: "ct-security", # Security & Conflict (red)
|
||||
@@ -43,6 +54,12 @@ _CATEGORY_COLOUR = {
|
||||
@with_retry(max_retries=2, base_delay=5)
|
||||
def fetch_crowdthreat():
|
||||
"""Fetch verified threat reports from CrowdThreat public API."""
|
||||
if not crowdthreat_fetch_enabled():
|
||||
logger.debug("CrowdThreat fetch skipped; set CROWDTHREAT_ENABLED=true to opt in")
|
||||
with _data_lock:
|
||||
latest_data["crowdthreat"] = []
|
||||
_mark_fresh("crowdthreat")
|
||||
return
|
||||
if not is_any_active("crowdthreat"):
|
||||
return
|
||||
|
||||
|
||||
@@ -15,7 +15,11 @@ import time
|
||||
import heapq
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import (
|
||||
external_curl_fallback_enabled,
|
||||
fetch_with_curl,
|
||||
outbound_user_agent,
|
||||
)
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.nuforc_enrichment import enrich_sighting
|
||||
from services.fetchers.retry import with_retry
|
||||
@@ -279,9 +283,13 @@ def fetch_weather_alerts():
|
||||
return
|
||||
alerts = []
|
||||
try:
|
||||
# weather.gov requires a User-Agent per their API policy. Round 7a:
|
||||
# send the per-install operator handle so they can rate-limit per
|
||||
# operator instead of treating "Shadowbroker" as one entity.
|
||||
from services.network_utils import outbound_user_agent
|
||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||
headers = {
|
||||
"User-Agent": "(ShadowBroker OSINT Dashboard, github.com/BigBodyCobain/Shadowbroker)",
|
||||
"User-Agent": outbound_user_agent("weather-gov"),
|
||||
"Accept": "application/geo+json",
|
||||
}
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
@@ -685,6 +693,8 @@ _NUFORC_TOKEN = os.environ.get("NUFORC_MAPBOX_TOKEN", "").strip()
|
||||
_NUFORC_RADIUS_M = 200_000 # 200 km query radius
|
||||
_NUFORC_LIMIT = 50 # max features per tilequery call
|
||||
_NUFORC_RECENT_DAYS = int(os.environ.get("NUFORC_RECENT_DAYS", "60"))
|
||||
_NUFORC_HF_FALLBACK_LIMIT = max(25, int(os.environ.get("NUFORC_HF_FALLBACK_LIMIT", "250")))
|
||||
_NUFORC_HF_GEOCODE_LIMIT = max(25, int(os.environ.get("NUFORC_HF_GEOCODE_LIMIT", "150")))
|
||||
_NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1")))
|
||||
# Photon (Komoot) is more lenient than Nominatim — ~200ms per query in
|
||||
# practice, so a 0.3s spacing keeps us well under any soft throttle while
|
||||
@@ -707,7 +717,12 @@ _NUFORC_LIVE_NONCE_RE = re.compile(
|
||||
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
||||
)
|
||||
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
||||
_NUFORC_LIVE_USER_AGENT = "Mozilla/5.0 (ShadowBroker-OSINT NUFORC-fetcher)"
|
||||
# Round 7a: NUFORC's site is sensitive to non-browser UAs but we send a
|
||||
# per-install operator handle prefixed by Mozilla/5.0 so we're identifiable
|
||||
# without being aggregately blocked. Operators who want stricter privacy
|
||||
# can override the entire UA via SHADOWBROKER_USER_AGENT.
|
||||
def _nuforc_live_user_agent() -> str:
|
||||
return f"Mozilla/5.0 ({outbound_user_agent('nuforc-live')})"
|
||||
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
||||
|
||||
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
||||
@@ -951,7 +966,7 @@ def _photon_lookup(query: str) -> list[float] | None:
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (NUFORC-UAP-layer)",
|
||||
"User-Agent": outbound_user_agent("nuforc-uap-geocode"),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=10,
|
||||
@@ -1034,12 +1049,20 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
if not external_curl_fallback_enabled():
|
||||
logger.warning(
|
||||
"NUFORC live: external curl disabled on Windows for %s; "
|
||||
"set SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=1 to opt in.",
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
@@ -1075,7 +1098,7 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
@@ -1340,6 +1363,171 @@ def _build_recent_uap_sightings() -> list[dict]:
|
||||
return sightings
|
||||
|
||||
|
||||
def _split_uap_location(location: str) -> tuple[str, str, str]:
|
||||
parts = [p.strip() for p in str(location or "").split(",") if p.strip()]
|
||||
city = parts[0] if parts else ""
|
||||
state = ""
|
||||
country = ""
|
||||
if len(parts) >= 2:
|
||||
state = parts[1]
|
||||
if len(parts) >= 3:
|
||||
country = parts[-1]
|
||||
if country and country.upper() in _US_COUNTRY_ALIASES:
|
||||
country = "US"
|
||||
return city, state, country
|
||||
|
||||
|
||||
def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
"""Build visible UAP points from the public Hugging Face NUFORC mirror.
|
||||
|
||||
This is a resilience fallback for local/Windows runs where nuforc.org is
|
||||
Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
|
||||
as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
|
||||
|
||||
Date-cutoff guard: the kcimc/NUFORC HF dataset is a static snapshot whose
|
||||
maintainer refreshes it sporadically. Without a cutoff, sorting by
|
||||
occurred-desc and taking the top N rows returns whatever the mirror's
|
||||
newest rows happen to be — which can be years old if the snapshot is
|
||||
stale. We apply the same ``_NUFORC_RECENT_DAYS`` window the live path
|
||||
uses (60 days). If the HF mirror has nothing inside the window we return
|
||||
``[]`` rather than silently serving 3-year-old "newest" rows.
|
||||
"""
|
||||
from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
|
||||
from services.geocode_validate import coord_in_country
|
||||
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
|
||||
if not response or response.status_code != 200:
|
||||
logger.warning(
|
||||
"UAP sightings: HF fallback failed HTTP %s",
|
||||
getattr(response, "status_code", "None"),
|
||||
)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning("UAP sightings: HF fallback download failed: %s", e)
|
||||
return []
|
||||
|
||||
candidates: list[dict] = []
|
||||
stale_rows_dropped = 0
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(response.text))
|
||||
for row in reader:
|
||||
occurred = _parse_date(
|
||||
row.get("Occurred", "")
|
||||
or row.get("Date / Time", "")
|
||||
or row.get("Date", "")
|
||||
)
|
||||
if not occurred:
|
||||
continue
|
||||
if occurred < cutoff_str:
|
||||
stale_rows_dropped += 1
|
||||
continue
|
||||
raw_location = _normalize_uap_location(
|
||||
row.get("Location", "")
|
||||
or row.get("City", "")
|
||||
or row.get("location", "")
|
||||
)
|
||||
if not raw_location:
|
||||
continue
|
||||
city, state, country = _split_uap_location(raw_location)
|
||||
if not city:
|
||||
continue
|
||||
sighting_id = str(row.get("Sighting", "") or "").strip()
|
||||
if not sighting_id:
|
||||
sighting_id = hashlib.sha1(
|
||||
f"{occurred}|{raw_location}|{row.get('Summary', '')}".encode("utf-8", "ignore")
|
||||
).hexdigest()[:12]
|
||||
summary = (row.get("Summary", "") or row.get("Text", "") or "Sighting reported").strip()
|
||||
if len(summary) > 280:
|
||||
summary = summary[:277] + "..."
|
||||
candidates.append({
|
||||
"id": f"NUFORC-{sighting_id}",
|
||||
"occurred": occurred,
|
||||
"posted": _parse_date(row.get("Posted", "") or row.get("Reported", "")) or occurred,
|
||||
"location": raw_location,
|
||||
"city": city,
|
||||
"state": state,
|
||||
"country": country or _uap_country_from_location(raw_location, state),
|
||||
"shape_raw": (row.get("Shape", "") or "Unknown").strip(),
|
||||
"duration": (row.get("Duration", "") or "").strip(),
|
||||
"summary": summary,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("UAP sightings: HF fallback parse failed: %s", e)
|
||||
return []
|
||||
|
||||
if not candidates:
|
||||
# HF mirror returned rows, but none inside the rolling window. This is
|
||||
# the smoking gun for "the public HF dataset hasn't been refreshed in
|
||||
# years" — log loudly so the operator sees it instead of guessing.
|
||||
logger.error(
|
||||
"UAP sightings: HF fallback yielded 0 rows within last %d days "
|
||||
"(dropped %d stale rows). HF mirror is likely stale; the layer "
|
||||
"will be empty until the live NUFORC path recovers.",
|
||||
_NUFORC_RECENT_DAYS,
|
||||
stale_rows_dropped,
|
||||
)
|
||||
return []
|
||||
|
||||
candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
|
||||
candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]
|
||||
|
||||
location_cache = _load_nuforc_location_cache()
|
||||
sightings: list[dict] = []
|
||||
geocoded = 0
|
||||
for row in candidates:
|
||||
coords = location_cache.get(row["location"])
|
||||
if row["location"] not in location_cache and geocoded < _NUFORC_HF_GEOCODE_LIMIT:
|
||||
try:
|
||||
coords = _geocode_uap_location(
|
||||
row["location"], row["city"], row["state"], row["country"]
|
||||
)
|
||||
except Exception:
|
||||
coords = None
|
||||
location_cache[row["location"]] = coords
|
||||
geocoded += 1
|
||||
if geocoded < _NUFORC_HF_GEOCODE_LIMIT:
|
||||
time.sleep(_NUFORC_GEOCODE_SPACING_S)
|
||||
if not coords:
|
||||
continue
|
||||
if row.get("country"):
|
||||
try:
|
||||
inside = coord_in_country(coords[0], coords[1], row["country"])
|
||||
except Exception:
|
||||
inside = None
|
||||
if inside is False:
|
||||
continue
|
||||
shape_raw = row["shape_raw"] or "Unknown"
|
||||
sightings.append({
|
||||
"id": row["id"],
|
||||
"date_time": row["occurred"],
|
||||
"city": row["city"],
|
||||
"state": row["state"],
|
||||
"country": row["country"],
|
||||
"shape": _normalize_uap_shape(shape_raw) if shape_raw != "Unknown" else "unknown",
|
||||
"shape_raw": shape_raw,
|
||||
"duration": row["duration"],
|
||||
"summary": row["summary"],
|
||||
"posted": row["posted"],
|
||||
"lat": float(coords[0]),
|
||||
"lng": float(coords[1]),
|
||||
"count": 1,
|
||||
"source": "NUFORC-HF",
|
||||
})
|
||||
|
||||
_save_nuforc_location_cache(location_cache)
|
||||
logger.info(
|
||||
"UAP sightings: %d mapped reports from HF fallback (%d candidates, %d geocoded)",
|
||||
len(sightings),
|
||||
len(candidates),
|
||||
geocoded,
|
||||
)
|
||||
return sightings
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
"""Fetch last-year UAP sightings from NUFORC.
|
||||
@@ -1355,12 +1543,34 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
|
||||
sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
|
||||
if sightings is None:
|
||||
sightings = _build_recent_uap_sightings()
|
||||
_save_nuforc_sightings_cache(sightings)
|
||||
live_error: Exception | None = None
|
||||
try:
|
||||
sightings = _build_recent_uap_sightings()
|
||||
except Exception as e:
|
||||
live_error = e
|
||||
logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
|
||||
sightings = _build_uap_sightings_from_hf_mirror()
|
||||
if sightings:
|
||||
_save_nuforc_sightings_cache(sightings)
|
||||
elif live_error is not None:
|
||||
# Both paths failed: live raised AND HF fallback returned empty
|
||||
# (either the HF mirror is stale beyond the cutoff or the network
|
||||
# is gone entirely). The previous code silently set the layer to
|
||||
# ``[]`` and kept marking it fresh; that masked the failure for
|
||||
# days. Surface it via assert_canary so the health registry shows
|
||||
# the layer as broken instead of "fresh and empty".
|
||||
from services.slo import assert_canary
|
||||
assert_canary("uap_sightings", 0)
|
||||
logger.error(
|
||||
"UAP sightings: both live NUFORC and HF fallback produced 0 "
|
||||
"rows; layer is unavailable. Live error: %s",
|
||||
live_error,
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
latest_data["uap_sightings"] = sightings
|
||||
_mark_fresh("uap_sightings")
|
||||
latest_data["uap_sightings"] = sightings or []
|
||||
if sightings:
|
||||
_mark_fresh("uap_sightings")
|
||||
return
|
||||
|
||||
cutoff = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
|
||||
@@ -5,6 +5,7 @@ debunked claims, threat actor mentions, and target country references.
|
||||
Refreshes every 12 hours (FIMI data updates weekly).
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
@@ -18,6 +19,16 @@ logger = logging.getLogger("services.data_fetcher")
|
||||
|
||||
_FIMI_FEED_URL = "https://euvsdisinfo.eu/feed/"
|
||||
|
||||
|
||||
def fimi_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into FIMI pulls."""
|
||||
return str(os.environ.get("FIMI_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
# ── Threat actor keywords ──────────────────────────────────────────────────
|
||||
# Map of keyword → canonical actor name. Checked case-insensitively.
|
||||
_THREAT_ACTORS: dict[str, str] = {
|
||||
@@ -173,6 +184,12 @@ def _is_major_wave(narratives: list[dict], targets: dict[str, int]) -> bool:
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_fimi():
|
||||
"""Fetch and parse the EUvsDisinfo RSS feed."""
|
||||
if not fimi_fetch_enabled():
|
||||
logger.debug("FIMI fetch skipped; set FIMI_ENABLED=true to opt in")
|
||||
with _data_lock:
|
||||
latest_data["fimi"] = []
|
||||
_mark_fresh("fimi")
|
||||
return
|
||||
try:
|
||||
resp = fetch_with_curl(_FIMI_FEED_URL, timeout=15)
|
||||
feed = feedparser.parse(resp.text)
|
||||
|
||||
@@ -82,10 +82,37 @@ def _fetch_yfinance_single(symbol: str, period: str = "2d"):
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=1)
|
||||
def financial_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into financial pulls.
|
||||
|
||||
Either ``FINANCIAL_ENABLED=true`` or the presence of ``FINNHUB_API_KEY``
|
||||
counts as an explicit opt-in. Without either, the default yfinance path
|
||||
is disabled to avoid silent outbound calls to finance.yahoo.com.
|
||||
"""
|
||||
if os.getenv("FINNHUB_API_KEY", "").strip():
|
||||
return True
|
||||
return str(os.environ.get("FINANCIAL_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def fetch_financial_markets():
|
||||
"""Fetches full market list with smart throttling (3s for Finnhub, 60s for yfinance)."""
|
||||
global _last_fetch_time, _last_fetch_results, _rotating_index
|
||||
|
||||
|
||||
if not financial_fetch_enabled():
|
||||
logger.debug(
|
||||
"Financial fetch skipped; set FINANCIAL_ENABLED=true or supply "
|
||||
"FINNHUB_API_KEY to opt in"
|
||||
)
|
||||
with _data_lock:
|
||||
latest_data["financial"] = {}
|
||||
_mark_fresh("financial")
|
||||
return
|
||||
|
||||
finnhub_key = os.getenv("FINNHUB_API_KEY", "").strip()
|
||||
use_finnhub = bool(finnhub_key)
|
||||
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Per-aircraft observation tracking for cumulative fuel/CO2 estimates.
|
||||
|
||||
Background
|
||||
----------
|
||||
The pre-existing emissions enrichment attached a *rate* to each flight
|
||||
(GPH and kg/hr) based on aircraft model. Users — reasonably — wanted the
|
||||
running total: how much fuel HAS this plane burned since we started
|
||||
seeing it? Multiplying the rate by elapsed observation time gets us
|
||||
there, but it requires somewhere to remember "when did this icao24
|
||||
first appear on our radar?"
|
||||
|
||||
Why this lives outside ``flight_trails``
|
||||
----------------------------------------
|
||||
``flight_trails`` is sized and pruned aggressively for map rendering
|
||||
(5-minute TTL for untracked aircraft, 200 trail points max). That's
|
||||
wrong for cumulative burn: if a plane has been airborne 2 hours but
|
||||
its trail was pruned 30 min in, the "first trail point" timestamp is
|
||||
30 min ago, not 2h ago. Worse, when the trail expires and re-creates,
|
||||
the cumulative counter would reset mid-flight.
|
||||
|
||||
This module tracks observation lifecycle separately:
|
||||
|
||||
* When a hex is first observed: start a new flight session.
|
||||
* While observed regularly (gap < ``REOPEN_GAP_S``): keep accumulating.
|
||||
* When unseen for longer than ``REOPEN_GAP_S``: treat next sighting as
|
||||
a new session (the plane landed and took off again, or it's a
|
||||
different leg). Reset ``first_seen_at``.
|
||||
* Stale sessions are pruned every ``PRUNE_INTERVAL_S`` so memory stays
|
||||
bounded.
|
||||
|
||||
The user explicitly asked for this counting semantic: "as soon as a
|
||||
plane appears there should be a counter that keeps a running count of
|
||||
the fuel being burned... If there is no estimate take off time then it
|
||||
can just be from the time the server starts to keep a log of whats in
|
||||
the air."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
# Gap between sightings that resets the session. ADS-B refreshes the
|
||||
# whole aircraft list every minute or two, so anything over a few
|
||||
# minutes means the plane left our coverage window (landed, transit
|
||||
# through dead zone, etc). 15 minutes is conservative.
|
||||
REOPEN_GAP_S = 15 * 60
|
||||
|
||||
# Don't accumulate runaway memory: drop entries unseen for an hour.
|
||||
PRUNE_AFTER_S = 60 * 60
|
||||
|
||||
# Cap on accumulated airtime per session so a single bug elsewhere
|
||||
# (e.g. ts clock skew) can't produce comically large numbers.
|
||||
MAX_SESSION_SECONDS = 24 * 3600 # 24h — longest realistic civilian leg
|
||||
|
||||
|
||||
_observations: dict[str, dict[str, float]] = {}
|
||||
_lock = threading.Lock()
|
||||
_last_prune_at = 0.0
|
||||
|
||||
|
||||
def record_observation(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Record a sighting of ``icao_hex`` and return airtime so far (seconds).
|
||||
|
||||
Returns 0 for the first-ever sighting (no elapsed time yet) or when
|
||||
``icao_hex`` is falsy. The caller can multiply the returned seconds
|
||||
by ``rate_per_hour / 3600`` to get cumulative consumption.
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
if not key:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
# Use explicit ``is None`` checks instead of ``or`` short-circuit:
|
||||
# ``0.0`` is a legitimate timestamp value (e.g. test fixtures
|
||||
# seeding a far-past first_seen_at to exercise the clamp) but
|
||||
# ``0.0 or fallback`` collapses to ``fallback`` because 0.0 is
|
||||
# falsy. Bit me on my own test — leaving the safer form here.
|
||||
last_raw = entry.get("last_seen_at")
|
||||
last_seen = float(last_raw) if last_raw is not None else current
|
||||
gap = current - last_seen
|
||||
if gap > REOPEN_GAP_S:
|
||||
# Treat as a new flight session — the plane landed/disappeared
|
||||
# long enough that the prior cumulative count is no longer
|
||||
# the same flight.
|
||||
_observations[key] = {"first_seen_at": current, "last_seen_at": current}
|
||||
return 0
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
# Clamp absurd values from clock skew or bad input.
|
||||
elapsed = max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
entry["last_seen_at"] = current
|
||||
return elapsed
|
||||
|
||||
|
||||
def prune(*, now: float | None = None) -> int:
|
||||
"""Drop entries we haven't seen in ``PRUNE_AFTER_S`` seconds.
|
||||
|
||||
Returns number of entries dropped. Safe to call from a scheduler tick;
|
||||
cheap (single dict scan) so cadence doesn't matter much.
|
||||
"""
|
||||
current = float(now if now is not None else time.time())
|
||||
dropped = 0
|
||||
with _lock:
|
||||
stale_keys = []
|
||||
for k, v in _observations.items():
|
||||
last_raw = v.get("last_seen_at")
|
||||
last = float(last_raw) if last_raw is not None else 0.0
|
||||
if current - last > PRUNE_AFTER_S:
|
||||
stale_keys.append(k)
|
||||
for k in stale_keys:
|
||||
del _observations[k]
|
||||
dropped += 1
|
||||
return dropped
|
||||
|
||||
|
||||
def get_session_seconds(icao_hex: str, *, now: float | None = None) -> int:
|
||||
"""Read-only accessor: airtime for a known icao without bumping last-seen.
|
||||
|
||||
Used by tests and external consumers (e.g. when rendering a snapshot
|
||||
of all in-flight aircraft, you want the current value, not to update
|
||||
last_seen_at as a side effect).
|
||||
"""
|
||||
if not icao_hex:
|
||||
return 0
|
||||
key = str(icao_hex).strip().lower()
|
||||
with _lock:
|
||||
entry = _observations.get(key)
|
||||
if entry is None:
|
||||
return 0
|
||||
current = float(now if now is not None else time.time())
|
||||
first_raw = entry.get("first_seen_at")
|
||||
first = float(first_raw) if first_raw is not None else current
|
||||
return max(0, min(int(current - first), MAX_SESSION_SECONDS))
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Drop all observations. Test helper only."""
|
||||
with _lock:
|
||||
_observations.clear()
|
||||
@@ -17,6 +17,7 @@ from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.retry import with_retry
|
||||
from services.fetchers.route_database import lookup_route
|
||||
from services.fetchers.aircraft_database import lookup_aircraft_type
|
||||
@@ -29,6 +30,88 @@ _RE_AIRLINE_CODE_1 = re.compile(r"^([A-Z]{3})\d")
|
||||
_RE_AIRLINE_CODE_2 = re.compile(r"^([A-Z]{3})[A-Z\d]")
|
||||
|
||||
|
||||
def detect_gps_jamming_zones(
|
||||
raw_flights: list[dict],
|
||||
*,
|
||||
min_aircraft: int | None = None,
|
||||
min_ratio: float | None = None,
|
||||
nacp_threshold: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Detect GPS interference zones from a snapshot of raw ADS-B aircraft.
|
||||
|
||||
Methodology mirrors GPSJam.org / Flightradar24: bin aircraft into 1°x1°
|
||||
grid cells, flag cells where the fraction of aircraft reporting degraded
|
||||
NACp clears a threshold.
|
||||
|
||||
Inputs
|
||||
------
|
||||
raw_flights:
|
||||
Iterable of dicts. Each item is expected to carry ``lat``, ``lng``
|
||||
(or ``lon``), and ``nac_p``. Records missing position OR missing
|
||||
``nac_p`` entirely (typical for OpenSky-sourced flights) are
|
||||
skipped — absence-of-data isn't evidence of anything.
|
||||
|
||||
nac_p == 0 IS counted as degraded. Pre-fix code skipped it on the theory
|
||||
that "0 = old transponder, never computed accuracy." That's only half
|
||||
right: modern Mode-S Enhanced Surveillance transponders also fall back
|
||||
to nac_p=0 when they lose GPS lock entirely — which is exactly the
|
||||
jamming signature we're trying to detect. Filtering 0 out was discarding
|
||||
the strongest evidence.
|
||||
|
||||
Denoising:
|
||||
1. Require ``min_aircraft`` per grid cell for statistical validity.
|
||||
2. Subtract 1 from degraded count per cell (GPSJam's technique) so
|
||||
a single quirky transponder can't flag an entire zone.
|
||||
3. Require ratio ``adjusted_degraded / total > min_ratio``.
|
||||
|
||||
All thresholds default to the module-level constants but can be
|
||||
overridden for testing.
|
||||
"""
|
||||
min_aircraft = GPS_JAMMING_MIN_AIRCRAFT if min_aircraft is None else int(min_aircraft)
|
||||
min_ratio = GPS_JAMMING_MIN_RATIO if min_ratio is None else float(min_ratio)
|
||||
nacp_threshold = (
|
||||
GPS_JAMMING_NACP_THRESHOLD if nacp_threshold is None else int(nacp_threshold)
|
||||
)
|
||||
|
||||
jamming_grid: dict[str, dict[str, int]] = {}
|
||||
for rf in raw_flights or []:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") if rf.get("lng") is not None else rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
cell = jamming_grid.setdefault(grid_key, {"degraded": 0, "total": 0})
|
||||
cell["total"] += 1
|
||||
if nacp < nacp_threshold:
|
||||
cell["degraded"] += 1
|
||||
|
||||
jamming_zones: list[dict] = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < min_aircraft:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > min_ratio:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
return jamming_zones
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenSky Network API Client (OAuth2)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -256,7 +339,17 @@ PRIVATE_JET_TYPES = {
|
||||
# Flight trails state
|
||||
flight_trails = {} # {icao_hex: {points: [[lat, lng, alt, ts], ...], last_seen: ts}}
|
||||
_trails_lock = threading.Lock()
|
||||
_MAX_TRACKED_TRAILS = 2000
|
||||
_MAX_TRACKED_TRAILS = 20000
|
||||
|
||||
|
||||
def get_flight_trail(icao24: str) -> list:
|
||||
"""Return the accumulated trail for a single aircraft without expanding live payloads."""
|
||||
hex_id = str(icao24 or "").strip().lower()
|
||||
if not hex_id:
|
||||
return []
|
||||
with _trails_lock:
|
||||
points = flight_trails.get(hex_id, {}).get("points", [])
|
||||
return [list(point) for point in points]
|
||||
|
||||
# Route enrichment is now served from services.fetchers.route_database, which
|
||||
# bulk-loads vrs-standing-data.adsb.lol/routes.csv.gz once per day and looks up
|
||||
@@ -449,6 +542,18 @@ def _classify_and_publish(all_adsb_flights):
|
||||
|
||||
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
||||
|
||||
# Source attribution: prefer the explicit ``source`` tag stamped
|
||||
# at fetch time (adsb.lol, OpenSky). If absent, fall back to the
|
||||
# legacy ``supplemental_source`` (airplanes.live, adsb.fi) so
|
||||
# supplementals are still attributed without changing their
|
||||
# tagger. Final fallback "adsb.lol" preserves prior behavior for
|
||||
# any caller that synthesizes records without going through one
|
||||
# of our fetchers (e.g. tests).
|
||||
source = (
|
||||
f.get("source")
|
||||
or f.get("supplemental_source")
|
||||
or "adsb.lol"
|
||||
)
|
||||
flights.append(
|
||||
{
|
||||
"callsign": flight_str,
|
||||
@@ -470,6 +575,7 @@ def _classify_and_publish(all_adsb_flights):
|
||||
"airline_code": airline_code,
|
||||
"aircraft_category": ac_category,
|
||||
"nac_p": f.get("nac_p"),
|
||||
"source": source,
|
||||
}
|
||||
)
|
||||
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
||||
@@ -496,6 +602,22 @@ def _classify_and_publish(all_adsb_flights):
|
||||
if model:
|
||||
emi = get_emissions_info(model)
|
||||
if emi:
|
||||
# Cumulative fuel/CO2: multiply the per-hour rate by how
|
||||
# long we've been observing this airframe. Users want to
|
||||
# see the *amount* burned, not just the rate. If we've
|
||||
# never seen this hex before, observed_seconds is 0 and
|
||||
# the cumulative values are 0 until the next refresh —
|
||||
# the rate is still useful info on its own.
|
||||
observed_seconds = _record_flight_observation(
|
||||
f.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emi = {
|
||||
**emi,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emi["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emi["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
f["emissions"] = emi
|
||||
|
||||
callsign = f.get("callsign", "").strip().upper()
|
||||
@@ -612,24 +734,30 @@ def _classify_and_publish(all_adsb_flights):
|
||||
)
|
||||
|
||||
# --- Trail Accumulation ---
|
||||
_TRAIL_INTERVAL_S = 600 # only record a new trail point every 10 minutes
|
||||
_TRAIL_INTERVAL_S = 60 # selected trails need enough resolution to show where unknown-route traffic came from
|
||||
|
||||
def _accumulate_trail(f, now_ts, check_route=True):
|
||||
def _accumulate_trail(f, now_ts, attach_known_route_trail=False):
|
||||
hex_id = f.get("icao24", "").lower()
|
||||
if not hex_id:
|
||||
return 0, None
|
||||
if check_route and f.get("origin_name", "UNKNOWN") != "UNKNOWN":
|
||||
f["trail"] = []
|
||||
return 0, hex_id
|
||||
|
||||
def _known_route_name(value):
|
||||
normalized = str(value or "").strip().upper()
|
||||
return bool(normalized and normalized != "UNKNOWN")
|
||||
|
||||
has_known_route = bool(
|
||||
(f.get("origin_loc") and f.get("dest_loc"))
|
||||
or (_known_route_name(f.get("origin_name")) and _known_route_name(f.get("dest_name")))
|
||||
)
|
||||
lat, lng, alt = f.get("lat"), f.get("lng"), f.get("alt", 0)
|
||||
if lat is None or lng is None:
|
||||
f["trail"] = flight_trails.get(hex_id, {}).get("points", [])
|
||||
f["trail"] = [] if has_known_route and not attach_known_route_trail else flight_trails.get(hex_id, {}).get("points", [])
|
||||
return 0, hex_id
|
||||
point = [round(lat, 5), round(lng, 5), round(alt, 1), round(now_ts)]
|
||||
if hex_id not in flight_trails:
|
||||
flight_trails[hex_id] = {"points": [], "last_seen": now_ts}
|
||||
trail_data = flight_trails[hex_id]
|
||||
# Only append a new point if 10 minutes have passed since the last one
|
||||
# Only append a new point if enough time has passed since the last one
|
||||
last_point_ts = trail_data["points"][-1][3] if trail_data["points"] else 0
|
||||
if now_ts - last_point_ts < _TRAIL_INTERVAL_S:
|
||||
trail_data["last_seen"] = now_ts
|
||||
@@ -644,32 +772,39 @@ def _classify_and_publish(all_adsb_flights):
|
||||
trail_data["last_seen"] = now_ts
|
||||
if len(trail_data["points"]) > 200:
|
||||
trail_data["points"] = trail_data["points"][-200:]
|
||||
f["trail"] = trail_data["points"]
|
||||
# Keep known-route flights visually clean in the main payload; selected
|
||||
# detail panels can still fetch this server-side trail to compute
|
||||
# observed fuel/CO2 burn.
|
||||
f["trail"] = [] if has_known_route and not attach_known_route_trail else trail_data["points"]
|
||||
return 1, hex_id
|
||||
|
||||
now_ts = datetime.utcnow().timestamp()
|
||||
with _data_lock:
|
||||
commercial_snapshot = copy.deepcopy(latest_data.get("commercial_flights", []))
|
||||
private_jets_snapshot = copy.deepcopy(latest_data.get("private_jets", []))
|
||||
private_ga_snapshot = copy.deepcopy(latest_data.get("private_flights", []))
|
||||
military_snapshot = copy.deepcopy(latest_data.get("military_flights", []))
|
||||
tracked_snapshot = copy.deepcopy(latest_data.get("tracked_flights", []))
|
||||
raw_flights_snapshot = list(latest_data.get("flights", []))
|
||||
|
||||
# Commercial/private: skip trail if route is known (route line replaces trail)
|
||||
route_check_lists = [commercial, private_jets, private_ga]
|
||||
# Tracked + military: ALWAYS accumulate trails (high-interest flights)
|
||||
always_trail_lists = [existing_tracked, military_snapshot]
|
||||
# Accumulate trails for every aircraft so selected details can estimate
|
||||
# observed fuel/CO2 burn. Known-route flights keep an empty payload trail so
|
||||
# the route line, not historical breadcrumbs, remains the visible map path.
|
||||
route_check_lists = [commercial_snapshot, private_jets_snapshot, private_ga_snapshot]
|
||||
always_trail_lists = [tracked_snapshot, military_snapshot]
|
||||
seen_hexes = set()
|
||||
trail_count = 0
|
||||
with _trails_lock:
|
||||
for flist in route_check_lists:
|
||||
for f in flist:
|
||||
count, hex_id = _accumulate_trail(f, now_ts, check_route=True)
|
||||
count, hex_id = _accumulate_trail(f, now_ts, attach_known_route_trail=False)
|
||||
trail_count += count
|
||||
if hex_id:
|
||||
seen_hexes.add(hex_id)
|
||||
|
||||
for flist in always_trail_lists:
|
||||
for f in flist:
|
||||
count, hex_id = _accumulate_trail(f, now_ts, check_route=False)
|
||||
count, hex_id = _accumulate_trail(f, now_ts, attach_known_route_trail=False)
|
||||
trail_count += count
|
||||
if hex_id:
|
||||
seen_hexes.add(hex_id)
|
||||
@@ -693,57 +828,16 @@ def _classify_and_publish(all_adsb_flights):
|
||||
f"Trail accumulation: {trail_count} active trails, {len(stale_keys)} pruned, {len(flight_trails)} total"
|
||||
)
|
||||
|
||||
# --- GPS Jamming Detection ---
|
||||
# Uses NACp (Navigation Accuracy Category – Position) from ADS-B to infer
|
||||
# GPS interference zones, similar to GPSJam.org / Flightradar24.
|
||||
# NACp < 8 = position accuracy worse than the FAA-mandated 0.05 NM.
|
||||
#
|
||||
# Denoising (to suppress false positives from old GA transponders):
|
||||
# 1. Skip nac_p == 0 ("unknown accuracy") — old transponders that never
|
||||
# computed accuracy, NOT evidence of jamming. Real jamming shows 1-7.
|
||||
# 2. Require minimum aircraft per grid cell for statistical validity.
|
||||
# 3. Subtract 1 from degraded count per cell (GPSJam's technique) so a
|
||||
# single quirky transponder can't flag an entire zone.
|
||||
# 4. Require the adjusted ratio to exceed the threshold.
|
||||
try:
|
||||
jamming_grid = {}
|
||||
raw_flights = raw_flights_snapshot
|
||||
for rf in raw_flights:
|
||||
rlat = rf.get("lat")
|
||||
rlng = rf.get("lng") or rf.get("lon")
|
||||
if rlat is None or rlng is None:
|
||||
continue
|
||||
nacp = rf.get("nac_p")
|
||||
if nacp is None or nacp == 0:
|
||||
continue
|
||||
grid_key = f"{int(rlat)},{int(rlng)}"
|
||||
if grid_key not in jamming_grid:
|
||||
jamming_grid[grid_key] = {"degraded": 0, "total": 0}
|
||||
jamming_grid[grid_key]["total"] += 1
|
||||
if nacp < GPS_JAMMING_NACP_THRESHOLD:
|
||||
jamming_grid[grid_key]["degraded"] += 1
|
||||
with _data_lock:
|
||||
latest_data["commercial_flights"] = commercial_snapshot
|
||||
latest_data["private_jets"] = private_jets_snapshot
|
||||
latest_data["private_flights"] = private_ga_snapshot
|
||||
latest_data["tracked_flights"] = tracked_snapshot
|
||||
latest_data["military_flights"] = military_snapshot
|
||||
|
||||
jamming_zones = []
|
||||
for gk, counts in jamming_grid.items():
|
||||
if counts["total"] < GPS_JAMMING_MIN_AIRCRAFT:
|
||||
continue
|
||||
adjusted_degraded = max(counts["degraded"] - 1, 0)
|
||||
if adjusted_degraded == 0:
|
||||
continue
|
||||
ratio = adjusted_degraded / counts["total"]
|
||||
if ratio > GPS_JAMMING_MIN_RATIO:
|
||||
lat_i, lng_i = gk.split(",")
|
||||
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
|
||||
jamming_zones.append(
|
||||
{
|
||||
"lat": int(lat_i) + 0.5,
|
||||
"lng": int(lng_i) + 0.5,
|
||||
"severity": severity,
|
||||
"ratio": round(ratio, 2),
|
||||
"degraded": counts["degraded"],
|
||||
"total": counts["total"],
|
||||
}
|
||||
)
|
||||
# --- GPS Jamming Detection ---
|
||||
try:
|
||||
jamming_zones = detect_gps_jamming_zones(raw_flights_snapshot)
|
||||
with _data_lock:
|
||||
latest_data["gps_jamming"] = jamming_zones
|
||||
if jamming_zones:
|
||||
@@ -819,7 +913,15 @@ def _fetch_adsb_lol_regions():
|
||||
res = fetch_with_curl(url, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("ac", [])
|
||||
aircraft = data.get("ac", [])
|
||||
# Stamp the source at the fetch site so attribution survives
|
||||
# the OpenSky/supplemental dedupe-by-hex merge downstream.
|
||||
# Previously adsb.lol records carried no marker while OpenSky
|
||||
# records got ``is_opensky: True`` — which made flight tooltips
|
||||
# look like everything came from OpenSky.
|
||||
for a in aircraft:
|
||||
a["source"] = "adsb.lol"
|
||||
return aircraft
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
@@ -902,6 +1004,7 @@ def _enrich_with_opensky_and_supplemental(adsb_flights):
|
||||
"gs": (s[9] * 1.94384) if s[9] else 0,
|
||||
"t": "Unknown",
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
)
|
||||
elif os_res.status_code == 429:
|
||||
|
||||
@@ -15,6 +15,24 @@ from services.fetchers.retry import with_retry
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Return whether the Playwright-based LiveUAMap scraper should run.
|
||||
|
||||
It is useful enrichment, but it starts a browser/Node driver and must not be
|
||||
allowed to destabilize Windows local startup.
|
||||
"""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return os.name != "nt"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ships (AIS + Carriers)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -191,6 +209,12 @@ def update_liveuamap():
|
||||
|
||||
if not is_any_active("global_incidents"):
|
||||
return
|
||||
if not liveuamap_scraper_enabled():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled for this runtime; set "
|
||||
"SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
return
|
||||
logger.info("Running scheduled Liveuamap scraper...")
|
||||
try:
|
||||
from services.liveuamap_scraper import fetch_liveuamap
|
||||
|
||||
@@ -6,7 +6,7 @@ import heapq
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
|
||||
@@ -29,7 +29,7 @@ def _geocode_region(region_name: str, country_name: str) -> tuple:
|
||||
|
||||
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
||||
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": outbound_user_agent("infrastructure-data")})
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
if results:
|
||||
|
||||
@@ -174,16 +174,34 @@ def fetch_meshtastic_nodes():
|
||||
except Exception as e:
|
||||
logger.debug(f"Meshtastic cache freshness check failed: {e}")
|
||||
|
||||
# Build a polite User-Agent. Include the operator callsign when set so
|
||||
# the upstream service can correlate per-install traffic if needed.
|
||||
# Build a polite User-Agent. Historically this included the operator
|
||||
# callsign so meshtastic.org could rate-limit per-install; that's still
|
||||
# the default behavior for backward compatibility. Operators who want
|
||||
# stricter outbound privacy can suppress the callsign by setting
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=false. Issue #203.
|
||||
import os as _os
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
callsign = str(getattr(get_settings(), "MESHTASTIC_OPERATOR_CALLSIGN", "") or "").strip()
|
||||
except Exception:
|
||||
callsign = ""
|
||||
ua_base = "ShadowBroker-OSINT/0.9.7 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com; 24h polling)"
|
||||
user_agent = f"{ua_base}; node={callsign}" if callsign else ua_base
|
||||
|
||||
send_callsign_header = str(
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||
|
||||
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||
# The optional Meshtastic callsign is appended as additional context so
|
||||
# meshtastic.liamcottle.net's operator can identify both the install AND
|
||||
# the registered radio operator (when MESHTASTIC_OPERATOR_CALLSIGN is set
|
||||
# and MESHTASTIC_SEND_CALLSIGN_HEADER is true; see issue #203).
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua_base = f"{outbound_user_agent('meshtastic-map')}; 24h polling"
|
||||
if callsign and send_callsign_header:
|
||||
user_agent = f"{ua_base}; node={callsign}"
|
||||
else:
|
||||
user_agent = ua_base
|
||||
|
||||
try:
|
||||
logger.info("Fetching Meshtastic map nodes from API...")
|
||||
|
||||
@@ -6,6 +6,8 @@ import time
|
||||
import requests
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.emissions import get_emissions_info
|
||||
from services.fetchers.flight_observations import record_observation as _record_flight_observation
|
||||
from services.fetchers.plane_alert import enrich_with_plane_alert
|
||||
|
||||
logger = logging.getLogger("services.data_fetcher")
|
||||
@@ -170,6 +172,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "adsb.lol"
|
||||
all_mil_ac.append(a)
|
||||
except Exception as e:
|
||||
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
||||
@@ -181,6 +184,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "airplanes.live"
|
||||
all_mil_ac.append(a)
|
||||
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
||||
except Exception as e:
|
||||
@@ -233,6 +237,7 @@ def fetch_military_flights():
|
||||
"registration": f.get("r", "N/A"),
|
||||
"icao24": icao_hex,
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -257,7 +262,8 @@ def fetch_military_flights():
|
||||
"model": f.get("t", "Unknown"),
|
||||
"icao24": icao_hex,
|
||||
"speed_knots": speed_knots,
|
||||
"squawk": f.get("squawk", "")
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
except Exception as loop_e:
|
||||
logger.error(f"Mil flight interpolation error: {loop_e}")
|
||||
@@ -289,6 +295,25 @@ def fetch_military_flights():
|
||||
remaining_mil = []
|
||||
for mf in military_flights:
|
||||
enrich_with_plane_alert(mf)
|
||||
model = mf.get("model")
|
||||
if not model or str(model).strip().lower() in {"", "unknown"}:
|
||||
model = mf.get("alert_type") or ""
|
||||
if model:
|
||||
emissions = get_emissions_info(model)
|
||||
if emissions:
|
||||
# Cumulative fuel/CO2 since first observation — mirrors
|
||||
# the civilian path in flights._classify_and_publish.
|
||||
observed_seconds = _record_flight_observation(
|
||||
mf.get("icao24") or ""
|
||||
)
|
||||
elapsed_h = observed_seconds / 3600.0
|
||||
emissions = {
|
||||
**emissions,
|
||||
"observed_seconds": observed_seconds,
|
||||
"fuel_gallons_burned": round(emissions["fuel_gph"] * elapsed_h, 1),
|
||||
"co2_kg_emitted": round(emissions["co2_kg_per_hour"] * elapsed_h, 1),
|
||||
}
|
||||
mf["emissions"] = emissions
|
||||
if mf.get("alert_category"):
|
||||
mf["type"] = "tracked_flight"
|
||||
tracked_mil.append(mf)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""News fetching, geocoding, clustering, and risk assessment."""
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
@@ -11,6 +12,22 @@ from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
from services.oracle_service import enrich_news_items, compute_global_threat_level, detect_breaking_events
|
||||
|
||||
|
||||
def news_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into news RSS pulls.
|
||||
|
||||
Defaults to **on** for backward compatibility (this is the only fetcher
|
||||
where opting out is the new behavior, not the old one). Set
|
||||
``NEWS_ENABLED=false`` to disable all outbound RSS feed traffic.
|
||||
"""
|
||||
return str(os.environ.get("NEWS_ENABLED", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
logger = logging.getLogger("services.data_fetcher")
|
||||
|
||||
# Maximum article age in seconds. Anything older than this is dropped
|
||||
@@ -160,6 +177,12 @@ def _resolve_coords(text: str) -> tuple[float, float] | None:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=2)
|
||||
def fetch_news():
|
||||
if not news_fetch_enabled():
|
||||
logger.debug("News fetch skipped; unset NEWS_ENABLED=false to re-enable")
|
||||
with _data_lock:
|
||||
latest_data["news"] = []
|
||||
_mark_fresh("news")
|
||||
return
|
||||
from services.news_feed_config import get_feeds
|
||||
feed_config = get_feeds()
|
||||
feeds = {f["name"]: f["url"] for f in feed_config}
|
||||
|
||||
@@ -49,6 +49,16 @@ _HF_CSV_URL = (
|
||||
"https://huggingface.co/datasets/kcimc/NUFORC/resolve/main/nuforc_str.csv"
|
||||
)
|
||||
|
||||
|
||||
def nuforc_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into NUFORC pulls."""
|
||||
return str(os.environ.get("NUFORC_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
# Only keep sightings from the last N years for the enrichment index
|
||||
_KEEP_YEARS = 5
|
||||
|
||||
@@ -160,6 +170,12 @@ def _download_and_build() -> dict | None:
|
||||
|
||||
Returns the index dict or None on failure.
|
||||
"""
|
||||
if not nuforc_fetch_enabled():
|
||||
logger.debug(
|
||||
"NUFORC enrichment skipped; set NUFORC_ENABLED=true to opt in"
|
||||
)
|
||||
return None
|
||||
|
||||
cutoff = datetime.utcnow() - timedelta(days=_KEEP_YEARS * 365)
|
||||
cutoff_str = cutoff.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
@@ -25,6 +25,16 @@ _provider_pace_lock = threading.Lock()
|
||||
_provider_last_request_at: dict[str, float] = {}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
|
||||
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
if min_interval_s <= 0:
|
||||
return
|
||||
@@ -755,6 +765,16 @@ def fetch_prediction_markets():
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
global _prev_probabilities
|
||||
|
||||
if not prediction_markets_fetch_enabled():
|
||||
logger.debug(
|
||||
"Prediction markets fetch skipped; set "
|
||||
"PREDICTION_MARKETS_ENABLED=true to opt in"
|
||||
)
|
||||
with _data_lock:
|
||||
latest_data["prediction_markets"] = []
|
||||
_mark_fresh("prediction_markets")
|
||||
return
|
||||
|
||||
markets = fetch_prediction_markets_raw()
|
||||
|
||||
# Compute probability deltas vs previous fetch
|
||||
|
||||
@@ -17,6 +17,12 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _route_db_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("route-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
||||
@@ -24,11 +30,7 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_HTTP_TIMEOUT_S = 60
|
||||
|
||||
_USER_AGENT = (
|
||||
"ShadowBroker-OSINT/0.9.7 "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"contact: bigbodycobain@gmail.com)"
|
||||
)
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
||||
@@ -41,7 +43,7 @@ def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=_HTTP_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
||||
headers={"User-Agent": _route_db_user_agent(), "Accept-Encoding": "gzip"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
text = gzip.decompress(response.content).decode("utf-8-sig")
|
||||
|
||||
@@ -15,6 +15,7 @@ Analysis features (derived from cached TLEs — no extra network requests):
|
||||
import math
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import requests
|
||||
@@ -41,6 +42,38 @@ def _gmst(jd_ut1):
|
||||
# CelesTrak fair use: fetch at most once per 24 hours (86400s).
|
||||
# SGP4 propagation runs every 60s using cached TLEs — positions stay live.
|
||||
_CELESTRAK_FETCH_INTERVAL = 86400 # 24 hours
|
||||
_MIN_VISIBLE_SATELLITE_CATALOG = int(os.environ.get("SHADOWBROKER_MIN_VISIBLE_SATELLITES", "350"))
|
||||
_MAX_VISIBLE_SATELLITE_CATALOG = int(os.environ.get("SHADOWBROKER_MAX_VISIBLE_SATELLITES", "450"))
|
||||
_CELESTRAK_VISIBLE_GROUPS = {
|
||||
"military": {"mission": "military", "sat_type": "Military / Defense"},
|
||||
"radar": {"mission": "sar", "sat_type": "Radar / SAR"},
|
||||
"resource": {"mission": "earth_observation", "sat_type": "Earth Observation"},
|
||||
"weather": {"mission": "weather", "sat_type": "Weather / Meteorology"},
|
||||
"gnss": {"mission": "navigation", "sat_type": "GNSS / Navigation"},
|
||||
"science": {"mission": "science", "sat_type": "Science"},
|
||||
}
|
||||
_TLE_VISIBLE_FALLBACK_TERMS = {
|
||||
"COSMOS": {"mission": "military", "sat_type": "Russian / Soviet Military"},
|
||||
"USA": {"mission": "military", "sat_type": "US Military / NRO"},
|
||||
"NROL": {"mission": "military", "sat_type": "Classified NRO"},
|
||||
"GPS": {"mission": "navigation", "sat_type": "GPS Navigation"},
|
||||
"GALILEO": {"mission": "navigation", "sat_type": "Galileo Navigation"},
|
||||
"BEIDOU": {"mission": "navigation", "sat_type": "BeiDou Navigation"},
|
||||
"GLONASS": {"mission": "navigation", "sat_type": "GLONASS Navigation"},
|
||||
"NOAA": {"mission": "weather", "sat_type": "NOAA Weather"},
|
||||
"METEOR": {"mission": "weather", "sat_type": "Meteor Weather"},
|
||||
"SENTINEL": {"mission": "earth_observation", "sat_type": "Sentinel Earth Observation"},
|
||||
"LANDSAT": {"mission": "earth_observation", "sat_type": "Landsat Earth Observation"},
|
||||
"WORLDVIEW": {"mission": "commercial_imaging", "sat_type": "Maxar High-Res"},
|
||||
"PLEIADES": {"mission": "commercial_imaging", "sat_type": "Airbus Imaging"},
|
||||
"SKYSAT": {"mission": "commercial_imaging", "sat_type": "Planet Video"},
|
||||
"JILIN": {"mission": "commercial_imaging", "sat_type": "Jilin Imaging"},
|
||||
"FLOCK": {"mission": "commercial_imaging", "sat_type": "PlanetScope"},
|
||||
"LEMUR": {"mission": "commercial_rf", "sat_type": "Spire RF / AIS"},
|
||||
"ICEYE": {"mission": "sar", "sat_type": "ICEYE SAR"},
|
||||
"UMBRA": {"mission": "sar", "sat_type": "Umbra SAR"},
|
||||
"CAPELLA": {"mission": "sar", "sat_type": "Capella SAR"},
|
||||
}
|
||||
_sat_gp_cache = {"data": None, "last_fetch": 0, "source": "none", "last_modified": None}
|
||||
_sat_classified_cache = {"data": None, "gp_fetch_ts": 0}
|
||||
_SAT_CACHE_PATH = Path(__file__).parent.parent.parent / "data" / "sat_gp_cache.json"
|
||||
@@ -564,9 +597,61 @@ def _parse_tle_to_gp(name, norad_id, line1, line2):
|
||||
return None
|
||||
|
||||
|
||||
def _annotate_celestrak_group(records: list[dict], group: str) -> list[dict]:
|
||||
meta = _CELESTRAK_VISIBLE_GROUPS.get(group, {})
|
||||
out = []
|
||||
for sat in records:
|
||||
if not isinstance(sat, dict):
|
||||
continue
|
||||
item = dict(sat)
|
||||
item["_SB_GROUP"] = group
|
||||
if meta:
|
||||
item["_SB_GROUP_META"] = meta
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
|
||||
def _fetch_visible_celestrak_catalog(headers: dict | None = None) -> list[dict]:
|
||||
"""Fetch bounded CelesTrak groups used by the visible satellite layer.
|
||||
|
||||
The full ``active`` catalog is too large and frequently times out on local
|
||||
startup. These groups cover the visible operational set users expect
|
||||
without pulling Starlink-scale constellations into the map.
|
||||
"""
|
||||
headers = headers or {}
|
||||
merged: dict[int, dict] = {}
|
||||
for group in _CELESTRAK_VISIBLE_GROUPS:
|
||||
url = f"https://celestrak.org/NORAD/elements/gp.php?GROUP={group}&FORMAT=json"
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.debug("Satellites: CelesTrak group %s returned HTTP %s", group, response.status_code)
|
||||
continue
|
||||
gp_data = response.json()
|
||||
if not isinstance(gp_data, list):
|
||||
continue
|
||||
for sat in _annotate_celestrak_group(gp_data, group):
|
||||
norad_id = sat.get("NORAD_CAT_ID")
|
||||
if norad_id is None:
|
||||
continue
|
||||
merged[int(norad_id)] = sat
|
||||
time.sleep(0.35)
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
TimeoutError,
|
||||
ValueError,
|
||||
KeyError,
|
||||
json.JSONDecodeError,
|
||||
OSError,
|
||||
) as e:
|
||||
logger.warning("Satellites: Failed to fetch CelesTrak group %s: %s", group, e)
|
||||
return list(merged.values())
|
||||
|
||||
|
||||
def _fetch_satellites_from_tle_api():
|
||||
"""Fallback: fetch satellite TLEs from tle.ivanstanojevic.me when CelesTrak is blocked."""
|
||||
search_terms = set()
|
||||
search_terms = set(_TLE_VISIBLE_FALLBACK_TERMS)
|
||||
for key, _ in _SAT_INTEL_DB:
|
||||
term = key.split()[0] if len(key.split()) > 1 and key.split()[0] in ("USA", "NROL") else key
|
||||
search_terms.add(term)
|
||||
@@ -591,8 +676,13 @@ def _fetch_satellites_from_tle_api():
|
||||
sat_id = gp.get("NORAD_CAT_ID")
|
||||
if sat_id not in seen_ids:
|
||||
seen_ids.add(sat_id)
|
||||
if term in _TLE_VISIBLE_FALLBACK_TERMS:
|
||||
gp["_SB_GROUP"] = f"tle:{term}"
|
||||
gp["_SB_GROUP_META"] = _TLE_VISIBLE_FALLBACK_TERMS[term]
|
||||
all_results.append(gp)
|
||||
time.sleep(1) # Polite delay between requests
|
||||
if len(all_results) >= _MAX_VISIBLE_SATELLITE_CATALOG:
|
||||
return all_results
|
||||
time.sleep(0.15) # Polite delay between requests
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
@@ -644,18 +734,34 @@ def fetch_satellites():
|
||||
|
||||
if (
|
||||
_sat_gp_cache["data"] is None
|
||||
or len(_sat_gp_cache.get("data") or []) < _MIN_VISIBLE_SATELLITE_CATALOG
|
||||
or (now_ts - _sat_gp_cache["last_fetch"]) > _CELESTRAK_FETCH_INTERVAL
|
||||
):
|
||||
gp_urls = [
|
||||
"https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
|
||||
"https://celestrak.com/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
|
||||
]
|
||||
# Build conditional request headers (CelesTrak fair use)
|
||||
headers = {}
|
||||
if _sat_gp_cache.get("last_modified"):
|
||||
headers["If-Modified-Since"] = _sat_gp_cache["last_modified"]
|
||||
|
||||
visible_data = _fetch_visible_celestrak_catalog(headers=headers)
|
||||
if len(visible_data) >= _MIN_VISIBLE_SATELLITE_CATALOG:
|
||||
_sat_gp_cache["data"] = visible_data
|
||||
_sat_gp_cache["last_fetch"] = now_ts
|
||||
_sat_gp_cache["source"] = "celestrak_visible_groups"
|
||||
_save_sat_cache(visible_data)
|
||||
_snapshot_current_tles(visible_data)
|
||||
logger.info(
|
||||
"Satellites: Downloaded %d GP records from visible CelesTrak groups",
|
||||
len(visible_data),
|
||||
)
|
||||
|
||||
gp_urls = [
|
||||
"https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
|
||||
"https://celestrak.com/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
|
||||
]
|
||||
|
||||
for url in gp_urls:
|
||||
if len(_sat_gp_cache.get("data") or []) >= _MIN_VISIBLE_SATELLITE_CATALOG:
|
||||
break
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
if response.status_code == 304:
|
||||
@@ -696,7 +802,10 @@ def fetch_satellites():
|
||||
logger.warning(f"Satellites: Failed to fetch from {url}: {e}")
|
||||
continue
|
||||
|
||||
if _sat_gp_cache["data"] is None:
|
||||
if (
|
||||
_sat_gp_cache["data"] is None
|
||||
or len(_sat_gp_cache.get("data") or []) < _MIN_VISIBLE_SATELLITE_CATALOG
|
||||
):
|
||||
logger.info("Satellites: CelesTrak unreachable, trying TLE fallback API...")
|
||||
try:
|
||||
fallback_data = _fetch_satellites_from_tle_api()
|
||||
@@ -757,6 +866,9 @@ def fetch_satellites():
|
||||
owner = sat.get("OWNER", sat.get("OBJECT_OWNER", ""))
|
||||
if owner in _OWNER_CODE_MAP:
|
||||
intel = {"country": _OWNER_CODE_MAP[owner], "mission": "general", "sat_type": "Unclassified"}
|
||||
if not intel and sat.get("_SB_GROUP_META"):
|
||||
intel = dict(sat["_SB_GROUP_META"])
|
||||
intel.setdefault("country", "Unknown")
|
||||
if not intel:
|
||||
continue
|
||||
|
||||
@@ -818,7 +930,11 @@ def fetch_satellites():
|
||||
now.year, now.month, now.day, now.hour, now.minute, now.second + now.microsecond / 1e6
|
||||
)
|
||||
|
||||
for s in all_sats:
|
||||
for source_sat in all_sats:
|
||||
# Keep the classified cache immutable. The render payload below
|
||||
# strips orbital fields after propagation, and mutating the cached
|
||||
# entry would make the next refresh unable to position satellites.
|
||||
s = dict(source_sat)
|
||||
try:
|
||||
mean_motion = s.get("MEAN_MOTION")
|
||||
ecc = s.get("ECCENTRICITY")
|
||||
|
||||
@@ -10,6 +10,12 @@ from datetime import datetime, timezone
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _trains_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("trains")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EARTH_RADIUS_KM = 6371.0
|
||||
@@ -379,7 +385,7 @@ def _fetch_digitraffic() -> list[dict]:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept-Encoding": "gzip",
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
"User-Agent": _trains_user_agent(),
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
|
||||
@@ -0,0 +1,457 @@
|
||||
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||
position publication.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||
- low-precision (audit issue #245 — false precision from text mentions),
|
||||
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||
certain network paths, including Docker Desktop on some Windows hosts).
|
||||
|
||||
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||
carrier is operating. The article body uses extremely consistent phrasing:
|
||||
|
||||
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||
Yokosuka, Japan."
|
||||
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||
|
||||
Those are deterministic to parse. This module:
|
||||
|
||||
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||
site-wide feed often has fresher posts before the category feed
|
||||
catches up, so we union them.
|
||||
2. Picks the most recent post by parsed ``pubDate``.
|
||||
3. For each carrier in the registry, scans the article body for a
|
||||
"is operating in / is in port in / departed from" pattern near
|
||||
the carrier's name.
|
||||
4. Maps the extracted region phrase to coordinates via the carrier
|
||||
tracker's existing REGION_COORDS.
|
||||
|
||||
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||
with ``position_source_at`` set to the article's actual publication
|
||||
timestamp (not ``now()``).
|
||||
|
||||
Politeness
|
||||
----------
|
||||
We send the per-install operator handle via ``outbound_user_agent``
|
||||
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||
but WordPress RSS feeds are open and serve the full article in
|
||||
``<content:encoded>`` — that's the supported path for aggregators and
|
||||
the one we use. We do not spoof browser headers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Iterable
|
||||
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RSS_URLS: tuple[str, ...] = (
|
||||
# Site-wide feed often has the freshest posts before the category
|
||||
# feed catches up. We try this first.
|
||||
"https://news.usni.org/feed",
|
||||
# Category feed has older fleet trackers for backfill.
|
||||
"https://news.usni.org/category/fleet-tracker/feed",
|
||||
)
|
||||
|
||||
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||
|
||||
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||
)
|
||||
|
||||
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
def _request_headers() -> dict[str, str]:
|
||||
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||
|
||||
The ``Referer`` is the category index page — that's where a real
|
||||
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||
falls back to HTML. No browser UA spoofing.
|
||||
"""
|
||||
return {
|
||||
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||
}
|
||||
|
||||
|
||||
def _parse_pubdate(raw: str) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||
|
||||
De-duplicates by article link. Returns a list of dicts:
|
||||
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||
"""
|
||||
items_by_link: dict[str, dict] = {}
|
||||
for url in rss_urls:
|
||||
try:
|
||||
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||
except Exception as exc:
|
||||
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||
continue
|
||||
if not r or r.status_code != 200 or not r.text:
|
||||
logger.debug(
|
||||
"USNI RSS %s returned status=%s body=%d",
|
||||
url,
|
||||
getattr(r, "status_code", "?"),
|
||||
len(getattr(r, "text", "") or ""),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(r.text)
|
||||
except ET.ParseError as exc:
|
||||
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||
continue
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||
continue
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link or link in items_by_link:
|
||||
continue
|
||||
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||
body_html = (
|
||||
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||
or item.findtext("description", default="")
|
||||
or ""
|
||||
)
|
||||
items_by_link[link] = {
|
||||
"title": title,
|
||||
"link": link,
|
||||
"pub_date": pub_dt,
|
||||
"body": _strip_html(body_html),
|
||||
}
|
||||
return list(items_by_link.values())
|
||||
|
||||
|
||||
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||
# The carrier_tracker table already covers most named bodies of water and
|
||||
# major ports — we just need to teach this module to RECOGNIZE the
|
||||
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||
# the same body of water differently.
|
||||
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||
("eastern mediterranean", "eastern mediterranean"),
|
||||
("western mediterranean", "western mediterranean"),
|
||||
("mediterranean sea", "mediterranean"),
|
||||
("the mediterranean", "mediterranean"),
|
||||
("red sea", "red sea"),
|
||||
("arabian sea area of responsibility", "arabian sea"),
|
||||
("north arabian sea", "north arabian sea"),
|
||||
("arabian sea", "arabian sea"),
|
||||
("persian gulf", "persian gulf"),
|
||||
("gulf of oman", "gulf of oman"),
|
||||
("strait of hormuz", "strait of hormuz"),
|
||||
("south china sea", "south china sea"),
|
||||
("east china sea", "east china sea"),
|
||||
("philippine sea", "philippine sea"),
|
||||
("sea of japan", "sea of japan"),
|
||||
("taiwan strait", "taiwan strait"),
|
||||
("western pacific", "western pacific"),
|
||||
("pacific ocean", "pacific"),
|
||||
("indian ocean", "indian ocean"),
|
||||
("north atlantic", "north atlantic"),
|
||||
("western atlantic", "atlantic"),
|
||||
("eastern atlantic", "atlantic"),
|
||||
("atlantic ocean", "atlantic"),
|
||||
("gulf of aden", "gulf of aden"),
|
||||
("horn of africa", "horn of africa"),
|
||||
("bab el-mandeb", "bab el-mandeb"),
|
||||
("suez canal", "suez canal"),
|
||||
("baltic sea", "baltic sea"),
|
||||
("north sea", "north sea"),
|
||||
("black sea", "black sea"),
|
||||
("south atlantic", "south atlantic"),
|
||||
("coral sea", "coral sea"),
|
||||
("gulf of mexico", "gulf of mexico"),
|
||||
("caribbean sea", "caribbean"),
|
||||
("caribbean", "caribbean"),
|
||||
# Specific ports
|
||||
("naval station norfolk", "norfolk"),
|
||||
("norfolk naval shipyard", "newport news"),
|
||||
("newport news shipbuilding", "newport news"),
|
||||
("newport news", "newport news"),
|
||||
# USNI tags Norfolk mentions with state suffix; match both.
|
||||
("norfolk, va", "norfolk"),
|
||||
("norfolk", "norfolk"),
|
||||
("naval station everett", "puget sound"),
|
||||
("naval base kitsap", "bremerton"),
|
||||
("bremerton", "bremerton"),
|
||||
("puget sound", "puget sound"),
|
||||
("naval base san diego", "san diego"),
|
||||
("san diego, calif", "san diego"),
|
||||
("san diego", "san diego"),
|
||||
("yokosuka, japan", "yokosuka"),
|
||||
("yokosuka", "yokosuka"),
|
||||
("pearl harbor", "pearl harbor"),
|
||||
("apra harbor, guam", "guam"),
|
||||
("guam", "guam"),
|
||||
("bahrain", "bahrain"),
|
||||
("naval station rota", "rota"),
|
||||
("rota, spain", "rota"),
|
||||
("naples, italy", "naples"),
|
||||
# Fleets / AORs
|
||||
("5th fleet", "5th fleet"),
|
||||
("6th fleet", "6th fleet"),
|
||||
("7th fleet", "7th fleet"),
|
||||
("3rd fleet", "3rd fleet"),
|
||||
("2nd fleet", "2nd fleet"),
|
||||
("centcom", "centcom"),
|
||||
("indo-pacific command", "indopacom"),
|
||||
("eucom", "eucom"),
|
||||
("southcom", "southcom"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||
or ``None`` if we don't recognize it.
|
||||
|
||||
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||
``display`` is the phrase we'll show in the dossier description.
|
||||
"""
|
||||
p = (phrase or "").lower().strip()
|
||||
if not p:
|
||||
return None
|
||||
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||
if usni_phrase in p:
|
||||
return canonical, usni_phrase
|
||||
return None
|
||||
|
||||
|
||||
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||
# phrase that immediately follows. Each pattern is designed to swallow
|
||||
# the optional editorial filler that often appears between verb and
|
||||
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||
# filler; "Norfolk" is the location).
|
||||
#
|
||||
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||
# wins over the generic "is".
|
||||
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||
|
||||
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is conducting <stuff> in [the] {REGION}"
|
||||
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port in {LOCATION}"
|
||||
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||
# → not captured here; falls through to homeport
|
||||
# "is underway in [the] {REGION}"
|
||||
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is deployed to [the] {REGION}" / "deployed in"
|
||||
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "arrived [Day] in/at {LOCATION}"
|
||||
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "departed [Day] from {LOCATION}"
|
||||
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is homeported at {LOCATION}"
|
||||
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def _extract_region_for_carrier(
|
||||
body: str,
|
||||
carrier_names: list[str],
|
||||
hull_code: str,
|
||||
) -> str | None:
|
||||
"""Return the best-guess region phrase for one carrier from the
|
||||
article body, or None if no confident match.
|
||||
|
||||
Algorithm:
|
||||
1. Find every mention of the carrier (any name variant or the hull
|
||||
code) in the body.
|
||||
2. For each mention, look in the ~300-char window AFTER it for any
|
||||
of the operating-verb patterns.
|
||||
3. Return the first hit. If a more-confident match later turns up
|
||||
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||
first one in document order still wins — USNI's structure puts
|
||||
the position-update sentence near the top of each carrier's
|
||||
section, and the homeport mention later.
|
||||
"""
|
||||
# Build a master mention regex covering every name variant + the hull.
|
||||
candidates: list[str] = []
|
||||
for name in carrier_names:
|
||||
if name and len(name) >= 4:
|
||||
candidates.append(re.escape(name))
|
||||
if hull_code:
|
||||
candidates.append(re.escape(hull_code))
|
||||
if not candidates:
|
||||
return None
|
||||
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||
|
||||
window_chars = 320
|
||||
seen_phrases: list[str] = []
|
||||
for mention in mention_re.finditer(body):
|
||||
end = mention.end()
|
||||
window = body[end : end + window_chars]
|
||||
# Cut window at the next sentence break for tighter context.
|
||||
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||
if sent_break:
|
||||
window = window[: sent_break.start() + 1]
|
||||
# Try patterns in priority order.
|
||||
for pat in _OPERATING_PATTERNS:
|
||||
m = pat.search(window)
|
||||
if not m:
|
||||
continue
|
||||
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||
if not phrase:
|
||||
continue
|
||||
# Strip trailing editorial filler — USNI often writes
|
||||
# "Norfolk, Va., according to ship spotters" or
|
||||
# "Yokosuka, Japan, according to..."
|
||||
phrase = re.split(
|
||||
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||
phrase,
|
||||
maxsplit=1,
|
||||
)[0].strip()
|
||||
seen_phrases.append(phrase)
|
||||
return phrase
|
||||
return seen_phrases[0] if seen_phrases else None
|
||||
|
||||
|
||||
def fetch_latest_fleet_tracker_positions(
|
||||
carrier_registry: dict | None = None,
|
||||
region_coords: dict | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||
|
||||
Entries look like::
|
||||
|
||||
{
|
||||
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||
"desc": "Red Sea (USNI May 18, 2026)",
|
||||
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||
mention) are simply absent from the result — the caller keeps
|
||||
whatever position they had before.
|
||||
|
||||
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||
module's own tables; passed in here for testability.
|
||||
"""
|
||||
if carrier_registry is None or region_coords is None:
|
||||
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||
region_coords = region_coords or REGION_COORDS
|
||||
|
||||
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||
if not items:
|
||||
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||
return {}
|
||||
|
||||
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||
# date fall to the back of the list.
|
||||
items.sort(
|
||||
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||
reverse=True,
|
||||
)
|
||||
latest = items[0]
|
||||
|
||||
pub_dt: datetime | None = latest["pub_date"]
|
||||
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||
|
||||
body = latest["body"]
|
||||
if not body:
|
||||
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||
return {}
|
||||
|
||||
positions: dict[str, dict] = {}
|
||||
for hull, info in carrier_registry.items():
|
||||
# Build name variants we'll try in the body.
|
||||
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||
last_word = without_hull.split()[-1] # "Ford"
|
||||
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||
|
||||
# Variants ordered most-specific first.
|
||||
variants: list[str] = []
|
||||
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||
if v and v not in variants and len(v) >= 4:
|
||||
variants.append(v)
|
||||
|
||||
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||
if not phrase:
|
||||
continue
|
||||
resolved = _resolve_region_phrase(phrase)
|
||||
if not resolved:
|
||||
logger.debug(
|
||||
"USNI: %s region phrase %r did not match any known region",
|
||||
hull, phrase,
|
||||
)
|
||||
continue
|
||||
canonical_key, display_phrase = resolved
|
||||
coords = region_coords.get(canonical_key)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
positions[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||
"source_url": latest["link"],
|
||||
"position_source_at": pub_iso,
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
if positions:
|
||||
logger.info(
|
||||
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||
len(positions), len(carrier_registry), latest["link"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||
latest["link"],
|
||||
)
|
||||
return positions
|
||||
@@ -21,9 +21,17 @@ _cache_lock = threading.Lock()
|
||||
_local_search_cache: List[Dict[str, Any]] | None = None
|
||||
_local_search_lock = threading.Lock()
|
||||
|
||||
_USER_AGENT = os.environ.get(
|
||||
"NOMINATIM_USER_AGENT", "ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)"
|
||||
)
|
||||
# Round 7a: per-install operator handle threads through every Nominatim
|
||||
# call. NOMINATIM_USER_AGENT env override is still honored for operators
|
||||
# who run a custom relay / known good identity, but the default uses the
|
||||
# per-install handle so OpenStreetMap can rate-limit per install instead
|
||||
# of treating "Shadowbroker" as one big offender.
|
||||
def _nominatim_user_agent() -> str:
|
||||
override = os.environ.get("NOMINATIM_USER_AGENT", "").strip()
|
||||
if override:
|
||||
return override
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("nominatim")
|
||||
|
||||
|
||||
def _get_cache(key: str):
|
||||
@@ -178,7 +186,7 @@ def search_geocode(query: str, limit: int = 5, local_only: bool = False) -> List
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
@@ -241,7 +249,7 @@ def reverse_geocode(lat: float, lng: float, local_only: bool = False) -> Dict[st
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
|
||||
@@ -8,6 +8,13 @@ from datetime import datetime
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _geopolitics_user_agent() -> str:
|
||||
"""Round 7a: GDELT geopolitics fetcher attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("geopolitics-gdelt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
@@ -316,7 +323,7 @@ def _fetch_article_title(url):
|
||||
resp = requests.get(
|
||||
current_url,
|
||||
timeout=4,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT Dashboard/1.0)"},
|
||||
headers={"User-Agent": _geopolitics_user_agent()},
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
)
|
||||
@@ -521,10 +528,29 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
|
||||
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
||||
|
||||
|
||||
# GDELT's data.gdeltproject.org is a CNAME to a Google Cloud Storage
|
||||
# bucket of the same name. GCS returns the wildcard ``*.storage.googleapis.com``
|
||||
# certificate, which legitimately does NOT cover the GDELT custom domain
|
||||
# — Python's TLS verification correctly refuses it. Some networks/POPs
|
||||
# happen to route through a path where this works; many do not (notably
|
||||
# Docker Desktop's outbound NAT on local installs).
|
||||
#
|
||||
# Fix: rewrite the URL to hit GCS directly with a path-style bucket
|
||||
# reference, where the standard GCS cert is genuinely valid. Same data,
|
||||
# verified TLS, no operator-side workaround needed.
|
||||
def _gcs_direct_gdelt_url(url: str) -> str:
|
||||
"""If ``url`` points at data.gdeltproject.org, return the equivalent
|
||||
GCS-direct URL. Otherwise return the URL unchanged."""
|
||||
prefix = "://data.gdeltproject.org/"
|
||||
if prefix in url:
|
||||
return url.replace(prefix, "://storage.googleapis.com/data.gdeltproject.org/", 1)
|
||||
return url
|
||||
|
||||
|
||||
def _download_gdelt_export(url):
|
||||
"""Download a single GDELT export file, return bytes or None."""
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=15)
|
||||
res = fetch_with_curl(_gcs_direct_gdelt_url(url), timeout=15)
|
||||
if res.status_code == 200:
|
||||
return res.content
|
||||
except (ConnectionError, TimeoutError, OSError): # non-critical
|
||||
@@ -616,9 +642,16 @@ def fetch_global_military_incidents():
|
||||
try:
|
||||
logger.info("Fetching GDELT events via export CDN (multi-file)...")
|
||||
|
||||
# Get the latest export URL to determine current timestamp
|
||||
# Get the latest export URL to determine current timestamp.
|
||||
# HTTPS is used to prevent passive network observers from injecting
|
||||
# poisoned export records into the global incident map via MITM.
|
||||
# GDELT serves the same content over HTTPS as HTTP.
|
||||
# Use the GCS-direct URL because data.gdeltproject.org's CNAME
|
||||
# serves a wildcard *.storage.googleapis.com cert that legitimately
|
||||
# doesn't cover the GDELT hostname. See _gcs_direct_gdelt_url above.
|
||||
index_res = fetch_with_curl(
|
||||
"http://data.gdeltproject.org/gdeltv2/lastupdate.txt", timeout=10
|
||||
_gcs_direct_gdelt_url("https://data.gdeltproject.org/gdeltv2/lastupdate.txt"),
|
||||
timeout=10,
|
||||
)
|
||||
if index_res.status_code != 200:
|
||||
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
||||
@@ -636,7 +669,9 @@ def fetch_global_military_incidents():
|
||||
logger.error("Could not find GDELT export URL")
|
||||
return []
|
||||
|
||||
# Extract timestamp from URL like: http://data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip
|
||||
# Extract timestamp from URL like: https://data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip
|
||||
# (GDELT's lastupdate.txt may still list URLs with http:// — we ignore
|
||||
# the scheme there and reconstruct each download URL as https:// below.)
|
||||
import re
|
||||
|
||||
ts_match = re.search(r"(\d{14})\.export\.CSV\.zip", latest_url)
|
||||
@@ -652,7 +687,7 @@ def fetch_global_military_incidents():
|
||||
for i in range(NUM_FILES):
|
||||
ts = latest_ts - timedelta(minutes=15 * i)
|
||||
fname = ts.strftime("%Y%m%d%H%M%S") + ".export.CSV.zip"
|
||||
url = f"http://data.gdeltproject.org/gdeltv2/{fname}"
|
||||
url = f"https://data.gdeltproject.org/gdeltv2/{fname}"
|
||||
urls.append(url)
|
||||
|
||||
logger.info(f"Downloading {len(urls)} GDELT export files...")
|
||||
|
||||
@@ -34,6 +34,20 @@ kiwisdr_cache: TTLCache = TTLCache(maxsize=1, ttl=_REFRESH_SECONDS)
|
||||
|
||||
_SOURCE_URL = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_CACHE_FILE = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_cache.json"
|
||||
# Bundled fallback — shipped with the codebase so the KiwiSDR layer always
|
||||
# has something to render even when the upstream is unreachable, returns
|
||||
# garbage, or appears to have been tampered with. Issue #206: the upstream
|
||||
# only speaks HTTP, so we can't rely on TLS for integrity — instead we
|
||||
# validate the response's shape and fall back to this bundle if it doesn't
|
||||
# look right.
|
||||
_BUNDLED_FALLBACK = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_directory.json"
|
||||
|
||||
# Minimum number of receivers we expect from a healthy upstream response.
|
||||
# The KiwiSDR public network has consistently sat well above this threshold
|
||||
# for years. If we see fewer than this many parsed receivers, treat the
|
||||
# response as suspect and fall back. Tune via env if the upstream shrinks
|
||||
# legitimately.
|
||||
_MIN_HEALTHY_RECEIVER_COUNT = 50
|
||||
_LINE_COMMENT_RE = re.compile(r"^\s*//.*$", re.MULTILINE)
|
||||
_VAR_PREFIX_RE = re.compile(r"^\s*var\s+kiwisdr_com\s*=\s*", re.MULTILINE)
|
||||
_TRAILING_COMMA_RE = re.compile(r",(\s*[\]}])")
|
||||
@@ -135,12 +149,72 @@ def _parse_mirror_payload(body: str) -> list[dict]:
|
||||
return nodes
|
||||
|
||||
|
||||
def _validate_fetched_nodes(nodes: list[dict]) -> bool:
|
||||
"""Sanity-check freshly-fetched receiver data before trusting it.
|
||||
|
||||
The upstream (rx.linkfanel.net) speaks only HTTP — there is no TLS to
|
||||
authenticate the response. A passive MITM could inject doctored
|
||||
receiver positions (false pins on the map) or strip the response down
|
||||
to a tiny subset. We can't prevent the modification at the transport
|
||||
layer, but we can refuse to commit to obviously-bad responses.
|
||||
|
||||
Returns True if the parsed list looks reasonable. False means we
|
||||
should fall back to a previously-cached or bundled directory.
|
||||
"""
|
||||
if not isinstance(nodes, list):
|
||||
return False
|
||||
if len(nodes) < _MIN_HEALTHY_RECEIVER_COUNT:
|
||||
# Either upstream is degraded or someone is feeding us a stripped
|
||||
# response. Either way, the bundled fallback is more useful.
|
||||
return False
|
||||
|
||||
# Spot-check: every entry should have a name, a parsed lat/lon, and a
|
||||
# URL field. If more than 5% of entries are missing core fields, the
|
||||
# parse went sideways.
|
||||
missing_core = 0
|
||||
for entry in nodes:
|
||||
if not isinstance(entry, dict):
|
||||
missing_core += 1
|
||||
continue
|
||||
if not entry.get("name") or not isinstance(entry.get("lat"), (int, float)):
|
||||
missing_core += 1
|
||||
if missing_core > max(5, len(nodes) // 20):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _load_bundled_fallback() -> list[dict]:
|
||||
"""Last-resort directory shipped with the codebase. Always returns a
|
||||
list (may be empty if the bundle is missing in older deployments)."""
|
||||
if not _BUNDLED_FALLBACK.exists():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(_BUNDLED_FALLBACK.read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"KiwiSDR bundled fallback unreadable: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@cached(kiwisdr_cache)
|
||||
def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
"""Return the KiwiSDR receiver list, refreshed at most once per day.
|
||||
|
||||
Order of preference: in-memory cache (handled by @cached) → on-disk cache
|
||||
if <24h old → network fetch from rx.linkfanel.net.
|
||||
Layered fallback (issue #206 — upstream is HTTP-only, so we defend with
|
||||
content validation + bundled static directory rather than trying to
|
||||
upgrade the transport):
|
||||
|
||||
1. In-memory cache (handled by @cached on this function)
|
||||
2. On-disk cache if <24h old
|
||||
3. Fresh network fetch from rx.linkfanel.net → validated → committed
|
||||
4. Stale on-disk cache (>24h) if validation fails
|
||||
5. Bundled static directory at backend/data/kiwisdr_directory.json
|
||||
|
||||
The KiwiSDR map layer renders something useful in every case. A
|
||||
tampered upstream returning garbage is caught by _validate_fetched_nodes()
|
||||
and falls through to whatever previously-trusted snapshot we have.
|
||||
"""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
@@ -153,34 +227,57 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
return cached_nodes
|
||||
|
||||
# 2. Cache cold or stale — fetch from network.
|
||||
fresh_nodes: list[dict] = []
|
||||
fetch_succeeded = False
|
||||
try:
|
||||
res = fetch_with_curl(_SOURCE_URL, timeout=20)
|
||||
if not res or res.status_code != 200:
|
||||
logger.error(
|
||||
f"KiwiSDR fetch failed: HTTP {res.status_code if res else 'no response'}"
|
||||
if res and res.status_code == 200:
|
||||
fresh_nodes = _parse_mirror_payload(res.text)
|
||||
fetch_succeeded = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"KiwiSDR fetch returned HTTP {res.status_code if res else 'no response'}"
|
||||
)
|
||||
return []
|
||||
|
||||
nodes = _parse_mirror_payload(res.text)
|
||||
if nodes:
|
||||
_save_disk_cache(nodes)
|
||||
logger.info(
|
||||
f"KiwiSDR: refreshed {len(nodes)} receivers from rx.linkfanel.net "
|
||||
"(next refresh in 24h)"
|
||||
)
|
||||
return nodes
|
||||
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"KiwiSDR fetch exception: {e}")
|
||||
# Fall back to a stale disk cache if one exists, even if >24h old.
|
||||
if _CACHE_FILE.exists():
|
||||
try:
|
||||
stale = json.loads(_CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(stale, list):
|
||||
logger.info(
|
||||
f"KiwiSDR: serving {len(stale)} stale receivers from disk after fetch failure"
|
||||
)
|
||||
return stale
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
logger.warning(f"KiwiSDR fetch exception: {e}")
|
||||
|
||||
# 3. Validate before committing. If the response looks healthy, save
|
||||
# it as the new cache and return.
|
||||
if fetch_succeeded and _validate_fetched_nodes(fresh_nodes):
|
||||
_save_disk_cache(fresh_nodes)
|
||||
logger.info(
|
||||
f"KiwiSDR: refreshed {len(fresh_nodes)} receivers from rx.linkfanel.net "
|
||||
"(next refresh in 24h)"
|
||||
)
|
||||
return fresh_nodes
|
||||
|
||||
if fetch_succeeded:
|
||||
# Network came back, but the payload didn't pass validation —
|
||||
# either upstream is degraded or a MITM is at work. Fall through
|
||||
# to a trusted snapshot rather than committing garbage to disk.
|
||||
logger.warning(
|
||||
"KiwiSDR: upstream response failed validation (%d entries) — "
|
||||
"falling back to trusted snapshot",
|
||||
len(fresh_nodes),
|
||||
)
|
||||
|
||||
# 4. Stale on-disk cache, if any.
|
||||
if _CACHE_FILE.exists():
|
||||
try:
|
||||
stale = json.loads(_CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(stale, list) and stale:
|
||||
logger.info(
|
||||
f"KiwiSDR: serving {len(stale)} stale receivers from disk"
|
||||
)
|
||||
return stale
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 5. Bundled static directory — last resort, always works.
|
||||
bundled = _load_bundled_fallback()
|
||||
if bundled:
|
||||
logger.info(
|
||||
f"KiwiSDR: serving {len(bundled)} receivers from bundled fallback "
|
||||
"(no fresh fetch + no disk cache available)"
|
||||
)
|
||||
return bundled
|
||||
|
||||
@@ -69,6 +69,115 @@ def _derive_peer_key(shared_secret: str, peer_url: str) -> bytes:
|
||||
).digest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Issue #256 (tg12): per-peer HMAC secrets
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Before this change, ALL peer-push HMACs were derived from a single
|
||||
# fleet-shared ``MESH_PEER_PUSH_SECRET``. The receiver could prove a
|
||||
# request was signed by *someone who knows the fleet secret*, but it
|
||||
# could NOT prove which peer signed it — any peer could compute the
|
||||
# expected HMAC for any other peer's URL and impersonate that peer.
|
||||
#
|
||||
# Fix: an optional ``MESH_PEER_SECRETS`` env var maps specific peer URLs
|
||||
# to per-peer secrets. When a peer URL is listed there, only that
|
||||
# per-peer secret is accepted for that URL — the global secret is
|
||||
# ignored for that peer. Peer A no longer learns peer B's secret, so
|
||||
# peer A cannot forge a request claiming to be peer B.
|
||||
#
|
||||
# Backwards-compatible by design:
|
||||
#
|
||||
# - Single-peer installs (``MESH_PEER_SECRETS`` empty) keep using the
|
||||
# global secret. Zero behavior change. Zero operator action required.
|
||||
# - Multi-peer installs that haven't migrated yet keep using the global
|
||||
# secret for every peer. Same behavior as before — same exposure.
|
||||
# - Multi-peer installs that have migrated configure
|
||||
# ``MESH_PEER_SECRETS=urlA=secretA,urlB=secretB`` and immediately get
|
||||
# per-peer identity. Migration is incremental: peers not yet listed
|
||||
# continue using the global secret until both sides of that peering
|
||||
# add their entry.
|
||||
|
||||
_PEER_SECRETS_CACHE: dict[str, str] = {}
|
||||
_PEER_SECRETS_CACHE_RAW: str = ""
|
||||
|
||||
|
||||
def _lookup_per_peer_secret(normalized_url: str) -> str:
|
||||
"""Return the per-peer secret for ``normalized_url`` from MESH_PEER_SECRETS.
|
||||
|
||||
Returns "" if no per-peer entry is configured for that URL. The parser
|
||||
is forgiving:
|
||||
|
||||
- Whitespace around items, URLs, and secrets is stripped.
|
||||
- Items without ``=`` or with empty URL/secret halves are skipped.
|
||||
- The URL half is normalized via ``normalize_peer_url`` so config
|
||||
authors don't have to match scheme/port/path quirks exactly.
|
||||
|
||||
The cache is invalidated whenever the env var's raw value changes,
|
||||
which keeps tests' ``monkeypatch.setenv`` calls effective without
|
||||
forcing a process restart.
|
||||
"""
|
||||
import os
|
||||
|
||||
raw = str(os.environ.get("MESH_PEER_SECRETS", "") or "").strip()
|
||||
|
||||
global _PEER_SECRETS_CACHE, _PEER_SECRETS_CACHE_RAW
|
||||
if raw != _PEER_SECRETS_CACHE_RAW:
|
||||
new_cache: dict[str, str] = {}
|
||||
for chunk in raw.split(","):
|
||||
chunk = chunk.strip()
|
||||
if not chunk or "=" not in chunk:
|
||||
continue
|
||||
url_part, _, secret_part = chunk.partition("=")
|
||||
normalized = normalize_peer_url(url_part.strip())
|
||||
secret = secret_part.strip()
|
||||
if normalized and secret:
|
||||
new_cache[normalized] = secret
|
||||
_PEER_SECRETS_CACHE = new_cache
|
||||
_PEER_SECRETS_CACHE_RAW = raw
|
||||
|
||||
return _PEER_SECRETS_CACHE.get(normalized_url, "")
|
||||
|
||||
|
||||
def resolve_peer_key_for_url(peer_url: str) -> bytes:
|
||||
"""Return the HMAC key for ``peer_url``, preferring per-peer secret.
|
||||
|
||||
Issue #256: this is the function every peer-push call site should
|
||||
use. It looks up the peer-specific secret first, falling back to the
|
||||
fleet-shared ``MESH_PEER_PUSH_SECRET`` only when the URL is NOT
|
||||
listed in ``MESH_PEER_SECRETS``.
|
||||
|
||||
Both sender (computing X-Peer-HMAC) and receiver (verifying it) call
|
||||
this with the SENDER's URL — they must derive the same key, so
|
||||
operators on both ends of a peering need matching MESH_PEER_SECRETS
|
||||
entries for that URL to stay in sync.
|
||||
|
||||
Returns empty bytes when no usable secret exists. Callers must treat
|
||||
that as fail-closed (skip the push, reject the verification).
|
||||
"""
|
||||
normalized_url = normalize_peer_url(peer_url)
|
||||
if not normalized_url:
|
||||
return b""
|
||||
|
||||
per_peer_secret = _lookup_per_peer_secret(normalized_url)
|
||||
if per_peer_secret:
|
||||
return _derive_peer_key(per_peer_secret, normalized_url)
|
||||
|
||||
# No per-peer entry for this URL — fall back to the legacy global
|
||||
# secret. This is what preserves zero-hostility for single-peer
|
||||
# installs and the migration window for multi-peer installs.
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
global_secret = str(
|
||||
getattr(get_settings(), "MESH_PEER_PUSH_SECRET", "") or ""
|
||||
).strip()
|
||||
except Exception:
|
||||
return b""
|
||||
if not global_secret:
|
||||
return b""
|
||||
return _derive_peer_key(global_secret, normalized_url)
|
||||
|
||||
|
||||
def _node_digest(public_key_b64: str) -> str:
|
||||
raw = base64.b64decode(public_key_b64)
|
||||
return hashlib.sha256(raw).hexdigest()
|
||||
|
||||
@@ -317,6 +317,39 @@ class DMRelay:
|
||||
def _self_mailbox_limit(self) -> int:
|
||||
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
||||
|
||||
def _per_sender_pending_limit(self) -> int:
|
||||
"""Anti-spam cap on UNACKED messages a single sender can have parked
|
||||
in a single recipient mailbox at any one time. See ``config.py``
|
||||
``MESH_DM_PENDING_PER_SENDER_LIMIT`` for the threat model — this
|
||||
rule is enforced both at ``deposit`` (local) and at
|
||||
``accept_replica`` (peer push acceptance), making it a network
|
||||
rule rather than a client-side honor system."""
|
||||
try:
|
||||
limit = int(getattr(self._settings(), "MESH_DM_PENDING_PER_SENDER_LIMIT", 2) or 2)
|
||||
except (TypeError, ValueError):
|
||||
limit = 2
|
||||
return max(1, limit)
|
||||
|
||||
def _per_sender_pending_count(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
sender_block_ref: str,
|
||||
) -> int:
|
||||
"""Count UNACKED messages from ``sender_block_ref`` currently parked
|
||||
in ``mailbox_key``. Caller already holds ``self._lock``.
|
||||
|
||||
Messages that have been claimed/acked are removed from the mailbox
|
||||
list (see ``claim_message_ids``), so anything still here is by
|
||||
definition unacked. We count by exact ``sender_block_ref`` match
|
||||
— that's the per-pair sender identity used for blocking too, so
|
||||
the cap is naturally per-(sender, recipient).
|
||||
"""
|
||||
if not mailbox_key or not sender_block_ref:
|
||||
return 0
|
||||
messages = self._mailboxes.get(mailbox_key, [])
|
||||
return sum(1 for m in messages if m.sender_block_ref == sender_block_ref)
|
||||
|
||||
def _nonce_ttl_seconds(self) -> int:
|
||||
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
||||
|
||||
@@ -1264,6 +1297,21 @@ class DMRelay:
|
||||
)
|
||||
self._save()
|
||||
|
||||
def unregister_prekey_lookup_alias(self, alias: str) -> bool:
|
||||
"""Remove an invite-scoped lookup alias from the local relay."""
|
||||
handle = str(alias or "").strip()
|
||||
if not handle:
|
||||
return False
|
||||
removed = False
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
if handle in self._prekey_lookup_aliases:
|
||||
del self._prekey_lookup_aliases[handle]
|
||||
removed = True
|
||||
if removed:
|
||||
self._save()
|
||||
return removed
|
||||
|
||||
def consume_one_time_prekey(self, agent_id: str) -> dict[str, Any] | None:
|
||||
"""Atomically claim the next published one-time prekey for a peer bundle."""
|
||||
claimed: dict[str, Any] | None = None
|
||||
@@ -1500,6 +1548,29 @@ class DMRelay:
|
||||
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
||||
metrics_inc("dm_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
# Anti-spam: per-(sender, recipient) cap on unacked messages.
|
||||
# A sender who already has the configured number of messages
|
||||
# parked in this mailbox can't deposit more until the recipient
|
||||
# pulls (acks) at least one. The same cap is re-enforced on
|
||||
# inbound replication in ``accept_replica`` so this rule isn't
|
||||
# bypassable by patching out the local check on a hostile
|
||||
# sender's relay — see config.py
|
||||
# MESH_DM_PENDING_PER_SENDER_LIMIT for the threat model.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_drop_per_sender_cap")
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
f"Recipient already has {pending} unread message"
|
||||
f"{'s' if pending != 1 else ''} from you. Wait for "
|
||||
"them to read your messages before sending more."
|
||||
),
|
||||
}
|
||||
if not msg_id:
|
||||
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
||||
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
||||
@@ -1524,8 +1595,245 @@ class DMRelay:
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
# Cross-node mailbox replication: push the freshly-stored
|
||||
# envelope to every authenticated relay peer so the recipient
|
||||
# can log into ANY node and find their messages. The push is
|
||||
# async (fire-and-forget thread) so deposit() returns
|
||||
# immediately — slow Tor peers can't block the sender's UX.
|
||||
# Each receiving peer re-enforces the per-sender cap on
|
||||
# acceptance, so hostile relays can't widen the cap.
|
||||
try:
|
||||
envelope_for_push = self.envelope_for_replication(
|
||||
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||
)
|
||||
if envelope_for_push:
|
||||
self._replicate_envelope_to_peers_async(
|
||||
envelope=envelope_for_push,
|
||||
)
|
||||
except Exception:
|
||||
metrics_inc("dm_replication_push_error")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def accept_replica(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
originating_peer_url: str = "",
|
||||
) -> dict[str, Any]:
|
||||
"""Receive a DM envelope replicated from a peer relay.
|
||||
|
||||
Cross-node mailbox replication entry point. When a sender's local
|
||||
relay accepts a ``deposit`` and pushes the envelope to
|
||||
``MESH_RELAY_PEERS`` (so the recipient can log into any peer
|
||||
node and find their messages), each receiving peer calls
|
||||
``accept_replica`` to ingest it.
|
||||
|
||||
The per-(sender, recipient) cap is re-enforced HERE. That's what
|
||||
makes the rule a NETWORK rule rather than a client-side honor
|
||||
system: a hostile sender who patches out the local ``deposit``
|
||||
check still can't get a 3rd unacked message to spread, because
|
||||
every honest peer enforces the same cap on inbound replicas.
|
||||
Result: hostile relays can hold extras locally, but those extras
|
||||
never reach any node a legitimate recipient is polling from.
|
||||
|
||||
Returns the same shape as ``deposit`` so the calling endpoint can
|
||||
forward the result back to the originating peer.
|
||||
"""
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
msg_id = str(envelope.get("msg_id", "") or "").strip()
|
||||
mailbox_key = str(envelope.get("mailbox_key", "") or "").strip()
|
||||
sender_block_ref = str(envelope.get("sender_block_ref", "") or "").strip()
|
||||
ciphertext = str(envelope.get("ciphertext", "") or "")
|
||||
if not msg_id or not mailbox_key or not sender_block_ref or not ciphertext:
|
||||
return {"ok": False, "detail": "envelope missing required fields"}
|
||||
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
self._cleanup_expired()
|
||||
|
||||
# Idempotent — if we already hold this exact msg_id, the
|
||||
# replication round-tripped or a peer pushed the same
|
||||
# envelope through multiple paths. Accept silently.
|
||||
if any(m.msg_id == msg_id for m in self._mailboxes.get(mailbox_key, [])):
|
||||
metrics_inc("dm_replica_duplicate")
|
||||
return {"ok": True, "msg_id": msg_id, "duplicate": True}
|
||||
|
||||
# Same per-class cap as the deposit path — defense in depth
|
||||
# against a peer that wraps a "deposit" as a "replica" to
|
||||
# bypass the class limit.
|
||||
delivery_class = str(envelope.get("delivery_class", "") or "")
|
||||
if delivery_class in ("request", "shared", "self"):
|
||||
class_limit = self._mailbox_limit_for_class(delivery_class)
|
||||
else:
|
||||
class_limit = self._shared_mailbox_limit()
|
||||
if len(self._mailboxes.get(mailbox_key, [])) >= class_limit:
|
||||
metrics_inc("dm_replica_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
|
||||
# THE network rule: per-(sender, recipient) anti-spam cap.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_replica_drop_per_sender_cap")
|
||||
# Returning a structured rejection — the sender's relay
|
||||
# learns its envelope was rejected by an honest peer and
|
||||
# can stop trying to push it.
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
"Per-sender cap reached on this relay; refusing replica"
|
||||
),
|
||||
"cap_violation": True,
|
||||
"pending": pending,
|
||||
"limit": per_sender_limit,
|
||||
}
|
||||
|
||||
# Accept the replica into the local mailbox.
|
||||
self._mailboxes[mailbox_key].append(
|
||||
DMMessage(
|
||||
sender_id=str(envelope.get("sender_id", "") or ""),
|
||||
ciphertext=ciphertext,
|
||||
timestamp=float(envelope.get("timestamp", time.time()) or time.time()),
|
||||
msg_id=msg_id,
|
||||
delivery_class=str(envelope.get("delivery_class", "shared") or "shared"),
|
||||
sender_seal=str(envelope.get("sender_seal", "") or ""),
|
||||
relay_salt=str(envelope.get("relay_salt", "") or ""),
|
||||
sender_block_ref=sender_block_ref,
|
||||
payload_format=str(envelope.get("payload_format", "dm1") or "dm1"),
|
||||
session_welcome=str(envelope.get("session_welcome", "") or ""),
|
||||
)
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
metrics_inc("dm_replica_accepted")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def _replicate_envelope_to_peers_async(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
) -> None:
|
||||
"""Push an outbound DM envelope to every authenticated relay peer.
|
||||
|
||||
Fire-and-forget: spawned in a background thread so ``deposit``
|
||||
returns to the caller immediately. Per-peer errors are logged
|
||||
and swallowed — the sender's UX must not block on slow Tor
|
||||
peers, and a peer that's down today gets the next message
|
||||
whenever it comes back. Inbound recipient polling from a healthy
|
||||
peer keeps the system functional during peer failures.
|
||||
|
||||
Each peer is authed with the existing per-peer HMAC pattern
|
||||
(#256) — same headers and key resolver gate-message replication
|
||||
uses, so a hostile node that doesn't know any peer's HMAC key
|
||||
can't impersonate a legitimate relay.
|
||||
"""
|
||||
import threading
|
||||
|
||||
def _do_push():
|
||||
try:
|
||||
import hashlib
|
||||
import hmac
|
||||
import requests as _requests
|
||||
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_router import (
|
||||
authenticated_push_peer_urls,
|
||||
)
|
||||
|
||||
peers = authenticated_push_peer_urls()
|
||||
if not peers:
|
||||
return
|
||||
|
||||
payload = json.dumps(
|
||||
{"envelope": envelope},
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
).encode("utf-8")
|
||||
|
||||
timeout = max(
|
||||
1,
|
||||
int(getattr(self._settings(), "MESH_RELAY_PUSH_TIMEOUT_S", 10) or 10),
|
||||
)
|
||||
|
||||
for peer_url in peers:
|
||||
try:
|
||||
normalized = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key, payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
url = f"{peer_url}/api/mesh/dm/replicate-envelope"
|
||||
resp = _requests.post(
|
||||
url, data=payload, timeout=timeout, headers=headers,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
metrics_inc("dm_replication_push_ok")
|
||||
else:
|
||||
# 4xx including the structured cap_violation
|
||||
# rejection from accept_replica — sender's
|
||||
# relay learns and stops retrying this msg_id.
|
||||
metrics_inc("dm_replication_push_rejected")
|
||||
except Exception:
|
||||
# Per-peer failure is non-fatal — log to metrics
|
||||
# but don't break the loop. Other peers and a
|
||||
# future retry can still propagate the envelope.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
continue
|
||||
except Exception:
|
||||
# Outer guard — never let replication errors propagate
|
||||
# back to the sender's deposit() caller.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_do_push,
|
||||
name="dm-replicate-push",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
def envelope_for_replication(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
msg_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return the wire-form envelope for a stored message, suitable
|
||||
for POSTing to a peer relay's replicate-envelope endpoint.
|
||||
|
||||
Returns ``None`` if the message isn't in the mailbox (already
|
||||
acked, expired, never existed). The caller holds the
|
||||
responsibility for transport security (Tor SOCKS for .onion
|
||||
peers, per-peer HMAC) and for not leaking the envelope to
|
||||
clearnet peers when private transport is required.
|
||||
"""
|
||||
with self._lock:
|
||||
for m in self._mailboxes.get(mailbox_key, []):
|
||||
if m.msg_id == msg_id:
|
||||
return {
|
||||
"msg_id": m.msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"sender_id": m.sender_id,
|
||||
"sender_block_ref": m.sender_block_ref,
|
||||
"sender_seal": m.sender_seal,
|
||||
"ciphertext": m.ciphertext,
|
||||
"timestamp": m.timestamp,
|
||||
"delivery_class": m.delivery_class,
|
||||
"relay_salt": m.relay_salt,
|
||||
"payload_format": m.payload_format,
|
||||
"session_welcome": m.session_welcome,
|
||||
}
|
||||
return None
|
||||
|
||||
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
|
||||
@@ -33,8 +33,9 @@ Each event contains:
|
||||
|
||||
Persistence: JSON file at backend/data/infonet.json
|
||||
|
||||
Encrypted gate chat events are intentionally kept off the public chain and
|
||||
persisted separately via GateMessageStore.
|
||||
Encrypted gate chat events are private-chain ciphertext records. They are
|
||||
excluded from public read surfaces and replicated only over private Infonet
|
||||
transports.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -64,6 +65,8 @@ from services.mesh.mesh_schema import (
|
||||
ACTIVE_PUBLIC_LEDGER_EVENT_TYPES,
|
||||
PUBLIC_LEDGER_EVENT_TYPES,
|
||||
validate_event_payload,
|
||||
validate_private_dm_ledger_payload,
|
||||
validate_private_gate_ledger_payload,
|
||||
validate_protocol_fields,
|
||||
validate_public_ledger_payload,
|
||||
)
|
||||
@@ -127,6 +130,12 @@ GATE_SEGMENT_MAX_COMPRESSED_BYTES = max(
|
||||
int(os.environ.get("MESH_GATE_SEGMENT_MAX_COMPRESSED_BYTES", str(2 * 1024 * 1024)) or str(2 * 1024 * 1024)),
|
||||
)
|
||||
GATE_SEGMENT_STORAGE_VERSION = 1
|
||||
DM_HASHCHAIN_SPOOL_LIMIT = max(1, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_LIMIT", "2") or "2"))
|
||||
DM_HASHCHAIN_SPOOL_SENDER_LIMIT = max(
|
||||
1,
|
||||
int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_SENDER_LIMIT", "1") or "1"),
|
||||
)
|
||||
DM_HASHCHAIN_SPOOL_TTL_S = max(60, int(os.environ.get("MESH_DM_HASHCHAIN_SPOOL_TTL_S", "3600") or "3600"))
|
||||
_PUBLIC_EVENT_APPEND_HOOKS: list[Any] = []
|
||||
_PUBLIC_EVENT_APPEND_HOOKS_LOCK = threading.Lock()
|
||||
|
||||
@@ -216,18 +225,19 @@ def _peer_pair_ref_key(peer_url: str) -> bytes:
|
||||
Returns an empty key on misconfiguration so callers fail closed.
|
||||
"""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
|
||||
secret = str(get_settings().MESH_PEER_PUSH_SECRET or "").strip()
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
except Exception:
|
||||
return b""
|
||||
if not secret:
|
||||
return b""
|
||||
normalized = normalize_peer_url(peer_url or "")
|
||||
if not normalized:
|
||||
return b""
|
||||
peer_key = _derive_peer_key(secret, normalized)
|
||||
# Issue #256: resolve_peer_key_for_url() prefers per-peer secrets
|
||||
# from MESH_PEER_SECRETS and falls back to the global
|
||||
# MESH_PEER_PUSH_SECRET only when the URL has no per-peer entry.
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if not peer_key:
|
||||
return b""
|
||||
# Domain-separate from the transport HMAC key so the two
|
||||
@@ -339,6 +349,32 @@ def _private_gate_event_id(
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def _private_gate_signature_payload_variants(gate_id: str, event: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
payload = _private_gate_signature_payload(gate_id, event)
|
||||
variants: list[dict[str, Any]] = [payload]
|
||||
event_payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
reply_to = str(event_payload.get("reply_to", "") or "").strip()
|
||||
if reply_to:
|
||||
variants.append(_private_gate_signature_payload(gate_id, event, include_reply_to=False))
|
||||
if "epoch" in payload:
|
||||
no_epoch = dict(payload)
|
||||
no_epoch.pop("epoch", None)
|
||||
variants.append(no_epoch)
|
||||
if reply_to:
|
||||
no_epoch_no_reply = _private_gate_signature_payload(gate_id, event, include_reply_to=False)
|
||||
no_epoch_no_reply.pop("epoch", None)
|
||||
variants.append(no_epoch_no_reply)
|
||||
deduped: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for variant in variants:
|
||||
material = json.dumps(variant, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if material in seen:
|
||||
continue
|
||||
seen.add(material)
|
||||
deduped.append(variant)
|
||||
return deduped
|
||||
|
||||
|
||||
def _sanitize_private_gate_event(gate_id: str, event: dict[str, Any]) -> dict[str, Any]:
|
||||
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
||||
sanitized = {
|
||||
@@ -1438,14 +1474,57 @@ class Infonet:
|
||||
# Running counters — avoid O(N) scans in get_info()
|
||||
self._type_counts: dict[str, int] = {}
|
||||
self._active_count: int = 0
|
||||
self._registered_nodes: set[str] = set()
|
||||
self._chain_bytes: int = 2 # Start with "[]" empty JSON array
|
||||
self._dirty = False
|
||||
self._save_lock = threading.Lock()
|
||||
self._save_timer: threading.Timer | None = None
|
||||
self._SAVE_INTERVAL = 5.0 # seconds — coalesce writes
|
||||
# Issue #208: Merkle levels cache so get_merkle_proofs() doesn't
|
||||
# rebuild O(n) levels on every public call. Invalidated whenever
|
||||
# self.events mutates. Computed lazily on first read after an
|
||||
# invalidation.
|
||||
self._merkle_levels_cache: list[list[str]] | None = None
|
||||
self._merkle_levels_for_event_count: int = -1
|
||||
atexit.register(self._flush)
|
||||
self._load()
|
||||
|
||||
def _invalidate_merkle_cache(self) -> None:
|
||||
"""Clear the precomputed Merkle levels.
|
||||
|
||||
Called whenever ``self.events`` may have mutated (append, rebuild,
|
||||
cleanup, fork resolution). The next call to ``get_merkle_root()``
|
||||
or ``get_merkle_proofs()`` will recompute and re-cache.
|
||||
"""
|
||||
self._merkle_levels_cache = None
|
||||
self._merkle_levels_for_event_count = -1
|
||||
|
||||
def _get_merkle_levels(self) -> list[list[str]]:
|
||||
"""Return Merkle levels for the current chain, recomputing if
|
||||
the cache is invalid or out of date.
|
||||
|
||||
Issue #208: a public endpoint (``/api/mesh/infonet/sync?include_proofs=true``)
|
||||
used to rebuild Merkle levels on every request, which is O(n) in
|
||||
chain length and trivially abusable for CPU exhaustion. By caching
|
||||
the levels and invalidating on mutation, repeated proof requests
|
||||
become O(1) per proof; the rebuild only happens after a genuine
|
||||
append/rebuild/cleanup.
|
||||
"""
|
||||
from services.mesh.mesh_merkle import build_merkle_levels
|
||||
|
||||
current_count = len(self.events)
|
||||
if (
|
||||
self._merkle_levels_cache is not None
|
||||
and self._merkle_levels_for_event_count == current_count
|
||||
):
|
||||
return self._merkle_levels_cache
|
||||
|
||||
leaves = [e["event_id"] for e in self.events]
|
||||
levels = build_merkle_levels(leaves)
|
||||
self._merkle_levels_cache = levels
|
||||
self._merkle_levels_for_event_count = current_count
|
||||
return levels
|
||||
|
||||
# ─── Persistence ──────────────────────────────────────────────────
|
||||
|
||||
def _load(self):
|
||||
@@ -1518,16 +1597,24 @@ class Infonet:
|
||||
self._last_validated_index = 0
|
||||
self._type_counts = {}
|
||||
self._active_count = 0
|
||||
self._registered_nodes = set()
|
||||
self._chain_bytes = 2
|
||||
|
||||
def _rebuild_state(self) -> None:
|
||||
self.event_index = {}
|
||||
self.node_sequences = {}
|
||||
# Keep private signed-write replay domains across public-chain
|
||||
# rebuilds; these domains protect local side effects that are not
|
||||
# represented as public Infonet events.
|
||||
if not isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
self.sequence_domains = {}
|
||||
# Keep private signed-write replay domains that are not represented
|
||||
# on-chain, but rebuild the gate_message sequence domain from chain
|
||||
# events so reloads/fork application do not mix it with public
|
||||
# per-node message sequences.
|
||||
preserved_domains = {}
|
||||
if isinstance(getattr(self, "sequence_domains", None), dict):
|
||||
preserved_domains = {
|
||||
key: value
|
||||
for key, value in self.sequence_domains.items()
|
||||
if not str(key or "").endswith("|gate_message")
|
||||
}
|
||||
self.sequence_domains = dict(preserved_domains)
|
||||
self.public_key_bindings = {}
|
||||
self.revocations = {}
|
||||
self._replay_filter = ReplayFilter()
|
||||
@@ -1539,9 +1626,12 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(
|
||||
evt.get("event_type", ""), node_id
|
||||
)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence > last:
|
||||
self.node_sequences[node_id] = sequence
|
||||
sequence_table[sequence_key] = sequence
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
existing = self.public_key_bindings.get(public_key)
|
||||
@@ -1566,10 +1656,15 @@ class Infonet:
|
||||
now = time.time()
|
||||
self._type_counts = {}
|
||||
self._active_count = 0
|
||||
self._registered_nodes = set()
|
||||
self._chain_bytes = 2 # "[]"
|
||||
for evt in self.events:
|
||||
t = evt.get("event_type", "unknown")
|
||||
self._type_counts[t] = self._type_counts.get(t, 0) + 1
|
||||
if t == "node_register":
|
||||
node_id = str(evt.get("node_id", "") or "")
|
||||
if node_id:
|
||||
self._registered_nodes.add(node_id)
|
||||
is_eph = evt.get("payload", {}).get("ephemeral") or evt.get("payload", {}).get("_ephemeral")
|
||||
if not is_eph or (now - evt.get("timestamp", 0)) < EPHEMERAL_TTL:
|
||||
self._active_count += 1
|
||||
@@ -1579,6 +1674,10 @@ class Infonet:
|
||||
"""Incrementally update counters when a new event is appended."""
|
||||
t = evt.get("event_type", "unknown")
|
||||
self._type_counts[t] = self._type_counts.get(t, 0) + 1
|
||||
if t == "node_register":
|
||||
node_id = str(evt.get("node_id", "") or "")
|
||||
if node_id:
|
||||
self._registered_nodes.add(node_id)
|
||||
self._active_count += 1
|
||||
self._chain_bytes += len(json.dumps(evt)) + 2
|
||||
|
||||
@@ -1844,6 +1943,295 @@ class Infonet:
|
||||
self._save()
|
||||
return True, "ok"
|
||||
|
||||
def _sequence_table_for_event(self, event_type: str, node_id: str) -> tuple[dict[str, int], str]:
|
||||
normalized = str(event_type or "").strip().lower()
|
||||
if normalized == "gate_message":
|
||||
return self.sequence_domains, f"{node_id}|gate_message"
|
||||
if normalized == "dm_message":
|
||||
return self.sequence_domains, f"{node_id}|dm_message"
|
||||
return self.node_sequences, node_id
|
||||
|
||||
def _dm_spool_target_key(self, payload: dict[str, Any]) -> tuple[str, str]:
|
||||
delivery_class = str(payload.get("delivery_class", "") or "").strip().lower()
|
||||
if delivery_class == "shared":
|
||||
key = str(payload.get("recipient_token", "") or "").strip()
|
||||
else:
|
||||
key = str(payload.get("recipient_id", "") or "").strip()
|
||||
return delivery_class, key
|
||||
|
||||
def _dm_spool_active_counts(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
sender_id: str = "",
|
||||
now: float | None = None,
|
||||
) -> tuple[int, int]:
|
||||
delivery_class, key = self._dm_spool_target_key(payload)
|
||||
if not key:
|
||||
return 0, 0
|
||||
sender_id = str(sender_id or "").strip()
|
||||
current = time.time() if now is None else float(now)
|
||||
total_count = 0
|
||||
sender_count = 0
|
||||
for evt in reversed(self.events):
|
||||
if evt.get("event_type") != "dm_message":
|
||||
continue
|
||||
evt_payload = evt.get("payload") if isinstance(evt.get("payload"), dict) else {}
|
||||
evt_delivery_class, evt_key = self._dm_spool_target_key(evt_payload)
|
||||
if evt_delivery_class != delivery_class:
|
||||
continue
|
||||
if evt_key != key:
|
||||
continue
|
||||
evt_ts = float(evt_payload.get("timestamp", evt.get("timestamp", 0)) or 0)
|
||||
if evt_ts > 0 and current - evt_ts > DM_HASHCHAIN_SPOOL_TTL_S:
|
||||
continue
|
||||
total_count += 1
|
||||
if sender_id and str(evt.get("node_id", "") or "").strip() == sender_id:
|
||||
sender_count += 1
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT and (
|
||||
not sender_id or sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT
|
||||
):
|
||||
break
|
||||
return total_count, sender_count
|
||||
|
||||
def _dm_spool_active_count(self, payload: dict[str, Any], *, now: float | None = None) -> int:
|
||||
total_count, _sender_count = self._dm_spool_active_counts(payload, now=now)
|
||||
return total_count
|
||||
|
||||
def append_private_dm_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted DM dead-drop message to the private Infonet ledger.
|
||||
|
||||
The event is a small offline spool, capped per mailbox target, so the
|
||||
hashchain can carry a couple of sealed DMs without becoming an
|
||||
unbounded global mailbox.
|
||||
"""
|
||||
event_type = "dm_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "plaintext" in raw_payload or "_local_plaintext" in raw_payload:
|
||||
raise ValueError("private DM ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("DM hashchain spool requires private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=node_id)
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
raise ValueError("DM hashchain sender spool full for recipient")
|
||||
if total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
raise ValueError("DM hashchain spool full for recipient")
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
logger.info(
|
||||
f"Infonet append [dm_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append_private_gate_message(
|
||||
self,
|
||||
*,
|
||||
node_id: str,
|
||||
payload: dict,
|
||||
signature: str,
|
||||
sequence: int,
|
||||
public_key: str,
|
||||
public_key_algo: str,
|
||||
protocol_version: str = "",
|
||||
timestamp: float = 0,
|
||||
) -> dict:
|
||||
"""Append an encrypted gate message to the private Infonet ledger.
|
||||
|
||||
Gate messages use their own sequence domain so a gate post cannot
|
||||
consume or replay-block the author's public broadcast sequence.
|
||||
"""
|
||||
event_type = "gate_message"
|
||||
if sequence <= 0:
|
||||
raise ValueError("sequence is required and must be > 0")
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
raise ValueError(f"Replay detected: sequence {sequence} <= last {last}")
|
||||
|
||||
raw_payload = dict(payload or {})
|
||||
if "message" in raw_payload or "_local_plaintext" in raw_payload or "_local_reply_to" in raw_payload:
|
||||
raise ValueError("private gate ledger payload must not contain plaintext")
|
||||
if str(raw_payload.get("transport_lock", "") or "").strip().lower() != "private_strong":
|
||||
raise ValueError("gate messages require private_strong transport_lock")
|
||||
|
||||
payload = normalize_payload(event_type, raw_payload)
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
||||
if len(payload_json.encode("utf-8")) > MAX_PAYLOAD_BYTES:
|
||||
raise ValueError("payload exceeds max size")
|
||||
|
||||
protocol_version = str(protocol_version or PROTOCOL_VERSION)
|
||||
ok, reason = validate_protocol_fields(protocol_version, NETWORK_ID)
|
||||
if not ok:
|
||||
raise ValueError(reason)
|
||||
|
||||
if not (signature and public_key and public_key_algo):
|
||||
raise ValueError("Missing signature fields")
|
||||
algo = parse_public_key_algo(public_key_algo)
|
||||
if not algo:
|
||||
raise ValueError("Unsupported public_key_algo")
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
raise ValueError("node_id mismatch")
|
||||
bound, bind_reason = self._bind_public_key(public_key, node_id)
|
||||
if not bound:
|
||||
raise ValueError(bind_reason)
|
||||
event_for_signature = {"payload": payload}
|
||||
signature_ok = False
|
||||
for signature_payload in _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
event_for_signature,
|
||||
):
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
raise ValueError("Invalid signature")
|
||||
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked:
|
||||
raise ValueError("public key is revoked")
|
||||
|
||||
event = ChainEvent(
|
||||
prev_hash=self.head_hash,
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
payload=payload,
|
||||
timestamp=float(timestamp or time.time()),
|
||||
sequence=sequence,
|
||||
signature=signature,
|
||||
public_key=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
protocol_version=protocol_version,
|
||||
)
|
||||
event_dict = event.to_dict()
|
||||
self._write_wal(event_dict)
|
||||
self.events.append(event_dict)
|
||||
self.event_index[event.event_id] = len(self.events) - 1
|
||||
self.head_hash = event.event_id
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
self._save()
|
||||
|
||||
try:
|
||||
from services.mesh.mesh_rns import rns_bridge
|
||||
|
||||
rns_bridge.publish_event(event_dict)
|
||||
except Exception:
|
||||
pass
|
||||
_notify_public_event_append_hooks(event_dict)
|
||||
|
||||
logger.info(
|
||||
f"Infonet append [gate_message] by {_redact_node(node_id)} seq={sequence} "
|
||||
f"id={event.event_id[:16]}..."
|
||||
)
|
||||
return event_dict
|
||||
|
||||
def append(
|
||||
self,
|
||||
event_type: str,
|
||||
@@ -1972,6 +2360,8 @@ class Infonet:
|
||||
self.head_hash = event.event_id
|
||||
self.node_sequences[node_id] = sequence
|
||||
self._replay_filter.add(event.event_id)
|
||||
# Issue #208: chain advanced, cached Merkle levels are stale.
|
||||
self._invalidate_merkle_cache()
|
||||
self._update_counters_for_event(event_dict)
|
||||
|
||||
if event_type == "key_revoke":
|
||||
@@ -2022,6 +2412,18 @@ class Infonet:
|
||||
if not event_id or not prev_hash:
|
||||
rejected.append({"index": idx, "reason": "Missing event_id or prev_hash"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != expected_prev:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2040,25 +2442,14 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "network_id mismatch"})
|
||||
continue
|
||||
if event_id in self.event_index:
|
||||
duplicates += 1
|
||||
continue
|
||||
if self._replay_filter.seen(event_id):
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
metrics_inc("ingest_replay_seen")
|
||||
except Exception:
|
||||
pass
|
||||
duplicates += 1
|
||||
continue
|
||||
if prev_hash != self.head_hash:
|
||||
rejected.append({"index": idx, "reason": "prev_hash does not match head"})
|
||||
continue
|
||||
if sequence <= 0:
|
||||
rejected.append({"index": idx, "reason": "Invalid sequence"})
|
||||
continue
|
||||
last = self.node_sequences.get(node_id, 0)
|
||||
sequence_table, sequence_key = self._sequence_table_for_event(event_type, node_id)
|
||||
last = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
rejected.append({"index": idx, "reason": "Replay detected"})
|
||||
continue
|
||||
@@ -2093,7 +2484,18 @@ class Infonet:
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if ok:
|
||||
total_count, sender_count = self._dm_spool_active_counts(payload, sender_id=str(evt.get("node_id", "") or ""))
|
||||
if sender_count >= DM_HASHCHAIN_SPOOL_SENDER_LIMIT:
|
||||
ok, reason = False, "DM hashchain sender spool full for recipient"
|
||||
elif total_count >= DM_HASHCHAIN_SPOOL_LIMIT:
|
||||
ok, reason = False, "DM hashchain spool full for recipient"
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
rejected.append({"index": idx, "reason": reason})
|
||||
continue
|
||||
@@ -2169,7 +2571,7 @@ class Infonet:
|
||||
pass
|
||||
rejected.append({"index": idx, "reason": "public key is revoked"})
|
||||
continue
|
||||
last_seq = self.node_sequences.get(node_id, 0)
|
||||
last_seq = sequence_table.get(sequence_key, 0)
|
||||
if sequence <= last_seq:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2205,18 +2607,30 @@ class Infonet:
|
||||
rejected.append({"index": idx, "reason": bind_reason})
|
||||
continue
|
||||
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
|
||||
@@ -2246,7 +2660,8 @@ class Infonet:
|
||||
self.events.append(evt)
|
||||
self.event_index[event_id] = len(self.events) - 1
|
||||
self.head_hash = event_id
|
||||
self.node_sequences[node_id] = sequence
|
||||
sequence_table[sequence_key] = sequence
|
||||
self._update_counters_for_event(evt)
|
||||
accepted += 1
|
||||
expected_prev = event_id
|
||||
self._replay_filter.add(event_id)
|
||||
@@ -2254,6 +2669,9 @@ class Infonet:
|
||||
self._apply_revocation(evt)
|
||||
|
||||
if accepted:
|
||||
# Issue #208: any accepted event invalidates the cached Merkle
|
||||
# levels. One invalidation per batch, not per event.
|
||||
self._invalidate_merkle_cache()
|
||||
self._save()
|
||||
return {"accepted": accepted, "duplicates": duplicates, "rejected": rejected}
|
||||
|
||||
@@ -2305,6 +2723,7 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
@@ -2315,21 +2734,41 @@ class Infonet:
|
||||
return False, f"public key binding conflict at index {i}"
|
||||
seen_public_keys[public_key] = node_id
|
||||
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, f"Invalid signature at index {i}"
|
||||
|
||||
prev = evt_dict["event_id"]
|
||||
@@ -2394,27 +2833,48 @@ class Infonet:
|
||||
verify_node_binding,
|
||||
)
|
||||
|
||||
event_type = evt_dict.get("event_type", "")
|
||||
node_id = evt_dict.get("node_id", "")
|
||||
if not parse_public_key_algo(public_key_algo):
|
||||
return False, f"Unsupported public_key_algo at index {i}"
|
||||
if not verify_node_binding(node_id, public_key):
|
||||
return False, f"node_id mismatch at index {i}"
|
||||
|
||||
normalized = normalize_payload(
|
||||
evt_dict.get("event_type", ""), evt_dict.get("payload", {})
|
||||
)
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=evt_dict.get("event_type", ""),
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=normalized,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
payload = evt_dict.get("payload", {})
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid gate_message payload at index {i}: {reason}"
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt_dict,
|
||||
)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
if not ok:
|
||||
return False, f"Invalid dm_message payload at index {i}: {reason}"
|
||||
signature_payloads = [normalize_payload(event_type, payload)]
|
||||
else:
|
||||
signature_payloads = [
|
||||
normalize_payload(event_type, payload)
|
||||
]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=_safe_int(evt_dict.get("sequence", 0) or 0, 0),
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, f"Invalid signature at index {i}"
|
||||
prev = evt_dict["event_id"]
|
||||
|
||||
@@ -2478,7 +2938,14 @@ class Infonet:
|
||||
node_id = evt.get("node_id", "")
|
||||
sequence = _safe_int(evt.get("sequence", 0) or 0, 0)
|
||||
if node_id and sequence:
|
||||
last_seq[node_id] = max(last_seq.get(node_id, 0), sequence)
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if str(evt.get("event_type", "") or "").strip().lower() == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last_seq[sequence_key] = max(last_seq.get(sequence_key, 0), sequence)
|
||||
public_key = str(evt.get("public_key", "") or "")
|
||||
if public_key and node_id:
|
||||
seen_public_keys.setdefault(public_key, node_id)
|
||||
@@ -2498,8 +2965,21 @@ class Infonet:
|
||||
existing_idx = self.event_index.get(event_id)
|
||||
if existing_idx is not None and existing_idx <= prev_index:
|
||||
return False, "duplicate event_id"
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
if event_type == "gate_message":
|
||||
payload = dict(payload or {})
|
||||
elif event_type == "dm_message":
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
else:
|
||||
payload = normalize_payload(event_type, dict(payload or {}))
|
||||
ok, reason = validate_event_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
if event_type == "gate_message":
|
||||
ok, reason = validate_private_gate_ledger_payload(payload)
|
||||
elif event_type == "dm_message":
|
||||
ok, reason = validate_private_dm_ledger_payload(payload)
|
||||
else:
|
||||
ok, reason = validate_public_ledger_payload(event_type, payload)
|
||||
if not ok:
|
||||
return False, reason
|
||||
proto = evt.get("protocol_version") or PROTOCOL_VERSION
|
||||
@@ -2513,7 +2993,14 @@ class Infonet:
|
||||
revoked, _info = self._revocation_status(public_key)
|
||||
if revoked and event_type != "key_revoke":
|
||||
return False, "public key revoked"
|
||||
last = last_seq.get(node_id, 0)
|
||||
sequence_key = (
|
||||
f"{node_id}|gate_message"
|
||||
if event_type == "gate_message"
|
||||
else f"{node_id}|dm_message"
|
||||
if event_type == "dm_message"
|
||||
else node_id
|
||||
)
|
||||
last = last_seq.get(sequence_key, 0)
|
||||
if sequence <= last:
|
||||
return False, "sequence replay"
|
||||
from services.mesh.mesh_crypto import (
|
||||
@@ -2531,27 +3018,43 @@ class Infonet:
|
||||
if existing and existing != node_id:
|
||||
return False, "public key binding conflict"
|
||||
seen_public_keys[public_key] = node_id
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=payload,
|
||||
)
|
||||
if not verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
if event_type == "gate_message":
|
||||
signature_payloads = _private_gate_signature_payload_variants(
|
||||
str(payload.get("gate", "") or ""),
|
||||
evt,
|
||||
)
|
||||
else:
|
||||
signature_payloads = [payload]
|
||||
signature_ok = False
|
||||
for signature_payload in signature_payloads:
|
||||
sig_payload = build_signature_payload(
|
||||
event_type=event_type,
|
||||
node_id=node_id,
|
||||
sequence=sequence,
|
||||
payload=signature_payload,
|
||||
)
|
||||
if verify_signature(
|
||||
public_key_b64=public_key,
|
||||
public_key_algo=public_key_algo,
|
||||
signature_hex=signature,
|
||||
payload=sig_payload,
|
||||
):
|
||||
signature_ok = True
|
||||
break
|
||||
if not signature_ok:
|
||||
return False, "invalid signature"
|
||||
computed = ChainEvent.from_dict(evt).event_id
|
||||
if computed != event_id:
|
||||
return False, "event_id mismatch"
|
||||
last_seq[node_id] = sequence
|
||||
last_seq[sequence_key] = sequence
|
||||
|
||||
# Apply fork
|
||||
self.events = prefix + ordered
|
||||
self._rebuild_state()
|
||||
self._rebuild_revocations()
|
||||
self._rebuild_counters()
|
||||
# Issue #208: chain replaced, cached Merkle levels are stale.
|
||||
self._invalidate_merkle_cache()
|
||||
self._save()
|
||||
try:
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
@@ -2681,6 +3184,8 @@ class Infonet:
|
||||
"head_hash_full": self.head_hash,
|
||||
"chain_lock": self.chain_lock(),
|
||||
"known_nodes": len(self.node_sequences),
|
||||
"author_nodes": len(self.node_sequences),
|
||||
"registered_nodes": len(self._registered_nodes),
|
||||
"event_types": dict(self._type_counts),
|
||||
"chain_size_kb": round(self._chain_bytes / 1024, 1),
|
||||
"unsigned_events": 0,
|
||||
@@ -2716,8 +3221,11 @@ class Infonet:
|
||||
|
||||
if len(new_events) != before:
|
||||
self.events = new_events
|
||||
# Rebuild index
|
||||
self.event_index = {e["event_id"]: i for i, e in enumerate(self.events)}
|
||||
self._rebuild_state()
|
||||
self._rebuild_revocations()
|
||||
self._rebuild_counters()
|
||||
# Issue #208: cleanup may have dropped expired events.
|
||||
self._invalidate_merkle_cache()
|
||||
self._save()
|
||||
logger.info(f"Infonet cleanup: removed {before - len(new_events)} expired events")
|
||||
|
||||
@@ -2726,30 +3234,37 @@ class Infonet:
|
||||
def get_merkle_root(self) -> str:
|
||||
"""Compute a Merkle root hash of the Infonet for sync comparison.
|
||||
|
||||
Two nodes with the same Merkle root have identical chains.
|
||||
Two nodes with the same Merkle root have identical chains. Reads
|
||||
from the cached Merkle levels (issue #208) — O(1) when the chain
|
||||
hasn't changed since the last computation.
|
||||
"""
|
||||
if not self.events:
|
||||
return GENESIS_HASH
|
||||
|
||||
from services.mesh.mesh_merkle import merkle_root
|
||||
|
||||
leaves = [e["event_id"] for e in self.events]
|
||||
root = merkle_root(leaves)
|
||||
return root or GENESIS_HASH
|
||||
levels = self._get_merkle_levels()
|
||||
if not levels or not levels[-1]:
|
||||
return GENESIS_HASH
|
||||
return levels[-1][0] or GENESIS_HASH
|
||||
|
||||
def get_merkle_proofs(self, start_index: int, count: int) -> dict:
|
||||
"""Return merkle proofs for a contiguous range of events."""
|
||||
leaves = [e["event_id"] for e in self.events]
|
||||
total = len(leaves)
|
||||
"""Return merkle proofs for a contiguous range of events.
|
||||
|
||||
Issue #208: uses the cached Merkle levels so this is O(count *
|
||||
log n) per request, not O(n + count * log n). Anonymous peers
|
||||
hitting ``/api/mesh/infonet/sync?include_proofs=true`` no longer
|
||||
force a rebuild on every call.
|
||||
"""
|
||||
total = len(self.events)
|
||||
if total == 0:
|
||||
return {"root": GENESIS_HASH, "total": 0, "start": 0, "proofs": []}
|
||||
|
||||
from services.mesh.mesh_merkle import build_merkle_levels, merkle_proof_from_levels
|
||||
from services.mesh.mesh_merkle import merkle_proof_from_levels
|
||||
|
||||
leaves = [e["event_id"] for e in self.events]
|
||||
start = max(0, start_index)
|
||||
end = min(total, start + max(0, count))
|
||||
levels = build_merkle_levels(leaves)
|
||||
root = levels[-1][0] if levels else GENESIS_HASH
|
||||
levels = self._get_merkle_levels()
|
||||
root = levels[-1][0] if levels and levels[-1] else GENESIS_HASH
|
||||
|
||||
proofs = []
|
||||
for idx in range(start, end):
|
||||
|
||||
@@ -2,10 +2,64 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from email.utils import parsedate_to_datetime
|
||||
from datetime import timezone
|
||||
|
||||
from services.mesh.mesh_peer_store import PeerRecord
|
||||
|
||||
|
||||
class PeerSyncRateLimited(Exception):
|
||||
"""Upstream peer returned HTTP 429 — Too Many Requests.
|
||||
|
||||
Carries the ``Retry-After`` header value (parsed to seconds) so
|
||||
the caller can pass it to ``finish_sync(retry_after_s=...)`` and
|
||||
actually wait that long instead of hammering the upstream every
|
||||
60s and keeping its rate-limit bucket full.
|
||||
|
||||
``retry_after_s`` is 0 when the upstream didn't provide a header.
|
||||
Caller should still apply the exponential backoff in that case.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after_s: int = 0, status: int = 429):
|
||||
super().__init__(message)
|
||||
self.retry_after_s = max(0, int(retry_after_s or 0))
|
||||
self.status = int(status or 429)
|
||||
|
||||
|
||||
def parse_retry_after_header(header_value: str, *, now: float | None = None) -> int:
|
||||
"""Parse the ``Retry-After`` HTTP header.
|
||||
|
||||
Two valid forms per RFC 7231 §7.1.3:
|
||||
|
||||
* Delay-seconds: a non-negative integer (e.g. ``Retry-After: 120``)
|
||||
* HTTP-date: an absolute time (e.g. ``Retry-After: Wed, 21 Oct 2026 07:28:00 GMT``)
|
||||
|
||||
Returns the wait in **seconds from now**. Unparseable / empty headers
|
||||
return 0 (caller falls back to exponential backoff). Clamped at a
|
||||
sane upper bound (1 hour) so a typo'd or hostile peer can't pin us
|
||||
silent for days.
|
||||
"""
|
||||
value = str(header_value or "").strip()
|
||||
if not value:
|
||||
return 0
|
||||
upper_bound = 3600 # never trust a peer to silence us > 1h
|
||||
# Form 1: pure integer seconds.
|
||||
if value.isdigit():
|
||||
return min(max(0, int(value)), upper_bound)
|
||||
# Form 2: HTTP-date.
|
||||
try:
|
||||
target = parsedate_to_datetime(value)
|
||||
if target is None:
|
||||
return 0
|
||||
if target.tzinfo is None:
|
||||
target = target.replace(tzinfo=timezone.utc)
|
||||
current = float(now if now is not None else time.time())
|
||||
delta = int(target.timestamp() - current)
|
||||
return min(max(0, delta), upper_bound)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SyncWorkerState:
|
||||
last_sync_started_at: int = 0
|
||||
@@ -30,10 +84,19 @@ def eligible_sync_peers(records: list[PeerRecord], *, now: float | None = None)
|
||||
for record in records
|
||||
if record.bucket == "sync" and record.enabled and int(record.cooldown_until or 0) <= current_time
|
||||
]
|
||||
|
||||
def _seed_priority(record: PeerRecord) -> int:
|
||||
role = str(record.role or "").strip().lower()
|
||||
source = str(record.source or "").strip().lower()
|
||||
if role == "seed" and source in {"bundle", "bootstrap_promoted"}:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
return sorted(
|
||||
candidates,
|
||||
key=lambda record: (
|
||||
-int(record.last_sync_ok_at or 0),
|
||||
_seed_priority(record),
|
||||
int(record.failure_count or 0),
|
||||
int(record.added_at or 0),
|
||||
record.peer_url,
|
||||
@@ -63,6 +126,59 @@ def begin_sync(
|
||||
)
|
||||
|
||||
|
||||
def _failure_backoff_seconds(
|
||||
*,
|
||||
base_backoff_s: int,
|
||||
consecutive_failures: int,
|
||||
retry_after_s: int,
|
||||
cap_s: int = 1800,
|
||||
) -> int:
|
||||
"""Compute the next-attempt delay after a failed sync.
|
||||
|
||||
Two inputs combine:
|
||||
|
||||
* ``retry_after_s`` — when an upstream peer answered HTTP 429
|
||||
with a ``Retry-After`` header, we honor it exactly. Continuing
|
||||
to hammer the upstream every 60s is the bug this fix exists to
|
||||
close: it keeps the upstream's rate-limit bucket full
|
||||
indefinitely and no sync ever lands.
|
||||
|
||||
* Exponential growth on ``consecutive_failures`` — even without an
|
||||
explicit Retry-After, repeated failures should slow us down. The
|
||||
first failure waits ``base`` (preserves pre-fix behavior for
|
||||
one-off blips). Each subsequent failure doubles the wait, capped
|
||||
to ``cap_s`` (default 30 minutes). With base=60 and cap=1800,
|
||||
the schedule is 60s → 120s → 240s → 480s → 960s → 1800s →
|
||||
1800s → … .
|
||||
|
||||
The actual delay is the MAX of the two — whichever asks for more
|
||||
patience wins. ``retry_after_s == 0`` (no header) falls back to
|
||||
pure exponential. An aggressive ``Retry-After`` (say 600s while
|
||||
we're only at 1 failure) wins over the exponential ladder.
|
||||
"""
|
||||
base = max(0, int(base_backoff_s or 0))
|
||||
failures = max(0, int(consecutive_failures or 0))
|
||||
cap = max(0, int(cap_s or 0))
|
||||
retry_after = max(0, int(retry_after_s or 0))
|
||||
# ``cap_s=0`` explicitly disables the exponential ladder entirely
|
||||
# — operators who want the pre-fix "honor Retry-After only" behavior
|
||||
# can set this. The default cap of 1800s is what saturates the
|
||||
# ladder at the 5th-6th failure for base=60.
|
||||
if cap == 0:
|
||||
return retry_after
|
||||
# 2^(failures-1) — so failure #1 = base (preserves the pre-fix
|
||||
# default for transient blips), failure #2 = 2*base, etc. Cap on
|
||||
# the exponent (16) is defense against integer overflow on a
|
||||
# hostile or very large failures counter.
|
||||
if base > 0 and failures > 0:
|
||||
exponent = min(max(0, failures - 1), 16)
|
||||
grown = base * (2 ** exponent)
|
||||
else:
|
||||
grown = 0
|
||||
exponential = min(max(0, grown), cap)
|
||||
return max(exponential, retry_after)
|
||||
|
||||
|
||||
def finish_sync(
|
||||
state: SyncWorkerState,
|
||||
*,
|
||||
@@ -74,7 +190,26 @@ def finish_sync(
|
||||
now: float | None = None,
|
||||
interval_s: int = 300,
|
||||
failure_backoff_s: int = 60,
|
||||
retry_after_s: int = 0,
|
||||
failure_backoff_cap_s: int = 1800,
|
||||
) -> SyncWorkerState:
|
||||
"""Finalise a sync attempt and compute when the next one should run.
|
||||
|
||||
New args (added for the 429 retry storm fix):
|
||||
|
||||
* ``retry_after_s`` — if the peer responded with HTTP 429 + a
|
||||
``Retry-After`` header, pass that value here. ``finish_sync``
|
||||
will use ``max(exponential, retry_after_s)`` for the delay so
|
||||
we never hammer a peer that asked us to back off.
|
||||
* ``failure_backoff_cap_s`` — upper bound on the exponential
|
||||
ladder. Default 1800 (30 min) — keeps a sync queue from going
|
||||
silent for hours while still cutting the request rate to
|
||||
something the upstream can absorb.
|
||||
|
||||
The pre-fix behavior (constant 60s on every failure) is recoverable
|
||||
by passing ``failure_backoff_cap_s=0`` and ``retry_after_s=0``, but
|
||||
there's no reason to.
|
||||
"""
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
if ok:
|
||||
return SyncWorkerState(
|
||||
@@ -90,17 +225,25 @@ def finish_sync(
|
||||
consecutive_failures=0,
|
||||
)
|
||||
|
||||
next_failures = state.consecutive_failures + 1
|
||||
delay_s = _failure_backoff_seconds(
|
||||
base_backoff_s=failure_backoff_s,
|
||||
consecutive_failures=next_failures,
|
||||
retry_after_s=retry_after_s,
|
||||
cap_s=failure_backoff_cap_s,
|
||||
)
|
||||
|
||||
return SyncWorkerState(
|
||||
last_sync_started_at=state.last_sync_started_at,
|
||||
last_sync_finished_at=timestamp,
|
||||
last_sync_ok_at=state.last_sync_ok_at,
|
||||
next_sync_due_at=timestamp + max(0, int(failure_backoff_s or 0)),
|
||||
next_sync_due_at=timestamp + delay_s,
|
||||
last_peer_url=peer_url or state.last_peer_url,
|
||||
last_error=str(error or "").strip(),
|
||||
last_outcome="fork" if fork_detected else "error",
|
||||
current_head=current_head or state.current_head,
|
||||
fork_detected=bool(fork_detected),
|
||||
consecutive_failures=state.consecutive_failures + 1,
|
||||
consecutive_failures=next_failures,
|
||||
)
|
||||
|
||||
|
||||
@@ -133,5 +276,6 @@ def should_run_sync(
|
||||
) -> bool:
|
||||
current_time = int(now if now is not None else time.time())
|
||||
if state.last_outcome == "running":
|
||||
return False
|
||||
started_at = int(state.last_sync_started_at or 0)
|
||||
return started_at <= 0 or current_time - started_at >= 300
|
||||
return int(state.next_sync_due_at or 0) <= current_time
|
||||
|
||||
@@ -258,6 +258,12 @@ class PeerStore:
|
||||
self._records[record.record_key()] = record
|
||||
return record
|
||||
|
||||
explicit_seed_refresh = (
|
||||
record.bucket == "sync"
|
||||
and record.role == "seed"
|
||||
and record.source in {"bundle", "bootstrap_promoted"}
|
||||
)
|
||||
|
||||
merged = PeerRecord(
|
||||
bucket=record.bucket,
|
||||
source=record.source,
|
||||
@@ -272,9 +278,9 @@ class PeerStore:
|
||||
last_seen_at=max(existing.last_seen_at, record.last_seen_at),
|
||||
last_sync_ok_at=max(existing.last_sync_ok_at, record.last_sync_ok_at),
|
||||
last_push_ok_at=max(existing.last_push_ok_at, record.last_push_ok_at),
|
||||
last_error=record.last_error or existing.last_error,
|
||||
failure_count=max(existing.failure_count, record.failure_count),
|
||||
cooldown_until=max(existing.cooldown_until, record.cooldown_until),
|
||||
last_error="" if explicit_seed_refresh else record.last_error or existing.last_error,
|
||||
failure_count=0 if explicit_seed_refresh else max(existing.failure_count, record.failure_count),
|
||||
cooldown_until=0 if explicit_seed_refresh else max(existing.cooldown_until, record.cooldown_until),
|
||||
metadata={**existing.metadata, **record.metadata},
|
||||
)
|
||||
self._records[record.record_key()] = merged
|
||||
|
||||
@@ -26,7 +26,11 @@ from enum import Enum
|
||||
from typing import Any, Callable, Optional
|
||||
from collections import deque
|
||||
from urllib.parse import urlparse
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
from services.mesh.mesh_crypto import (
|
||||
_derive_peer_key,
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_metrics import increment as metrics_inc
|
||||
from services.mesh.mesh_privacy_policy import (
|
||||
TRANSPORT_TIER_ORDER as _TIER_RANK,
|
||||
@@ -390,15 +394,9 @@ class MeshtasticTransport:
|
||||
def _mqtt_config() -> tuple[str, int, str, str]:
|
||||
"""Return (broker, port, user, password) from settings."""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.meshtastic_mqtt_settings import mqtt_connection_config
|
||||
|
||||
s = get_settings()
|
||||
return (
|
||||
str(s.MESH_MQTT_BROKER or "mqtt.meshtastic.org"),
|
||||
int(s.MESH_MQTT_PORT or 1883),
|
||||
str(s.MESH_MQTT_USER or "meshdev"),
|
||||
str(s.MESH_MQTT_PASS or "large4cats"),
|
||||
)
|
||||
return mqtt_connection_config()
|
||||
except Exception:
|
||||
return ("mqtt.meshtastic.org", 1883, "meshdev", "large4cats")
|
||||
|
||||
@@ -433,8 +431,9 @@ class MeshtasticTransport:
|
||||
def _resolve_psk(cls) -> bytes:
|
||||
"""Return the PSK from config, or the default LongFast key if empty."""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
raw = str(getattr(get_settings(), "MESH_MQTT_PSK", "") or "").strip()
|
||||
from services.meshtastic_mqtt_settings import mqtt_psk_hex
|
||||
|
||||
raw = mqtt_psk_hex()
|
||||
except Exception:
|
||||
raw = ""
|
||||
if not raw:
|
||||
@@ -449,7 +448,10 @@ class MeshtasticTransport:
|
||||
|
||||
@staticmethod
|
||||
def mesh_address_for_sender(sender_id: str) -> str:
|
||||
"""Return the synthetic public mesh address used for MQTT-originated sends."""
|
||||
"""Return the public mesh address used for MQTT-originated sends."""
|
||||
parsed = MeshtasticTransport._parse_node_id(sender_id)
|
||||
if parsed is not None:
|
||||
return f"!{parsed:08x}"
|
||||
return f"!{MeshtasticTransport._stable_node_id(sender_id):08x}"
|
||||
|
||||
@staticmethod
|
||||
@@ -489,7 +491,8 @@ class MeshtasticTransport:
|
||||
|
||||
# Generate IDs
|
||||
packet_id = random.randint(1, 0xFFFFFFFF)
|
||||
from_node = self._stable_node_id(envelope.sender_id)
|
||||
parsed_sender = self._parse_node_id(envelope.sender_id)
|
||||
from_node = parsed_sender if parsed_sender is not None else self._stable_node_id(envelope.sender_id)
|
||||
direct_node = self._parse_node_id(envelope.destination)
|
||||
to_node = direct_node if direct_node is not None else 0xFFFFFFFF
|
||||
|
||||
@@ -521,7 +524,7 @@ class MeshtasticTransport:
|
||||
|
||||
def _on_connect(client, userdata, flags, rc):
|
||||
if rc == 0:
|
||||
info = client.publish(topic, payload, qos=0)
|
||||
info = client.publish(topic, payload, qos=1)
|
||||
info.wait_for_publish(timeout=5)
|
||||
published[0] = True
|
||||
client.disconnect()
|
||||
@@ -529,9 +532,7 @@ class MeshtasticTransport:
|
||||
error_msg[0] = f"MQTT connect refused: rc={rc}"
|
||||
client.disconnect()
|
||||
|
||||
client = mqtt.Client(
|
||||
client_id=f"shadowbroker-tx-{envelope.message_id[:8]}", protocol=mqtt.MQTTv311
|
||||
)
|
||||
client = mqtt.Client(client_id=f"meshchat-tx-{envelope.message_id[:8]}", protocol=mqtt.MQTTv311)
|
||||
broker, port, user, pw = self._mqtt_config()
|
||||
client.username_pw_set(user, pw)
|
||||
client.on_connect = _on_connect
|
||||
@@ -553,9 +554,9 @@ class MeshtasticTransport:
|
||||
True,
|
||||
self.NAME,
|
||||
(
|
||||
f"Published direct to !{to_node:08x} via {region}/{channel}"
|
||||
f"Broker accepted direct publish to !{to_node:08x} via {region}/{channel}"
|
||||
if direct_node is not None
|
||||
else f"Published to {region}/{channel} ({len(payload)}B protobuf)"
|
||||
else f"Broker accepted channel publish to {region}/{channel} ({len(payload)}B protobuf)"
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
@@ -706,7 +707,6 @@ class InternetTransport(_PeerPushTransportMixin):
|
||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||
except ValueError as exc:
|
||||
return TransportResult(False, self.NAME, str(exc))
|
||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
||||
|
||||
delivered = 0
|
||||
last_error = ""
|
||||
@@ -716,10 +716,13 @@ class InternetTransport(_PeerPushTransportMixin):
|
||||
try:
|
||||
normalized_peer_url = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if secret:
|
||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
||||
if not peer_key:
|
||||
raise ValueError("invalid peer URL for HMAC derivation")
|
||||
# Issue #256: per-peer secret takes precedence over the
|
||||
# global MESH_PEER_PUSH_SECRET. When neither is set the
|
||||
# key is empty and we skip the HMAC header entirely so a
|
||||
# bare (unsigned) push still works on test deployments
|
||||
# that have not yet configured any secret at all.
|
||||
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized_peer_url
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key,
|
||||
@@ -801,7 +804,6 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
||||
endpoint_path, padded = self._build_peer_push_request(envelope, self.NAME)
|
||||
except ValueError as exc:
|
||||
return TransportResult(False, self.NAME, str(exc))
|
||||
secret = str(settings.MESH_PEER_PUSH_SECRET or "").strip()
|
||||
|
||||
delivered = 0
|
||||
last_error = ""
|
||||
@@ -811,10 +813,10 @@ class TorArtiTransport(_PeerPushTransportMixin):
|
||||
try:
|
||||
normalized_peer_url = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if secret:
|
||||
peer_key = _derive_peer_key(secret, normalized_peer_url)
|
||||
if not peer_key:
|
||||
raise ValueError("invalid peer URL for HMAC derivation")
|
||||
# Issue #256: per-peer secret takes precedence; see the
|
||||
# other transport above for the rationale.
|
||||
peer_key = resolve_peer_key_for_url(normalized_peer_url)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized_peer_url
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key,
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
|
||||
@@ -33,6 +36,58 @@ def _require_fields(payload: dict[str, Any], fields: tuple[str, ...]) -> tuple[b
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def _decode_base64ish(value: Any) -> bytes | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw or any(ch.isspace() for ch in raw):
|
||||
return None
|
||||
padded = raw + ("=" * (-len(raw) % 4))
|
||||
for altchars in (None, b"-_"):
|
||||
try:
|
||||
return base64.b64decode(padded.encode("ascii"), altchars=altchars, validate=True)
|
||||
except (binascii.Error, UnicodeEncodeError, ValueError):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _byte_entropy(data: bytes) -> float:
|
||||
if not data:
|
||||
return 0.0
|
||||
counts = [0] * 256
|
||||
for byte in data:
|
||||
counts[byte] += 1
|
||||
total = float(len(data))
|
||||
return -sum((count / total) * math.log2(count / total) for count in counts if count)
|
||||
|
||||
|
||||
def _validate_sealed_bytes_field(
|
||||
payload: dict[str, Any],
|
||||
field: str,
|
||||
*,
|
||||
min_bytes: int = 8,
|
||||
entropy_floor: float = 2.5,
|
||||
) -> tuple[bool, str]:
|
||||
data = _decode_base64ish(payload.get(field, ""))
|
||||
if data is None:
|
||||
return False, f"{field} must be base64-encoded sealed bytes"
|
||||
if len(data) < min_bytes:
|
||||
return False, f"{field} is too short"
|
||||
|
||||
# Short test vectors and compact envelopes can be low entropy; only apply
|
||||
# heuristics once there is enough material to distinguish a sealed blob
|
||||
# from accidental base64-encoded plaintext.
|
||||
if len(data) >= 32:
|
||||
printable = sum(1 for byte in data if 32 <= byte <= 126 or byte in (9, 10, 13))
|
||||
if printable / len(data) > 0.9:
|
||||
try:
|
||||
data.decode("utf-8")
|
||||
return False, f"{field} looks like encoded plaintext"
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if _byte_entropy(data) < entropy_floor:
|
||||
return False, f"{field} entropy is too low for sealed bytes"
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def _validate_message(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
ok, reason = _require_fields(
|
||||
payload, ("message", "destination", "channel", "priority", "ephemeral")
|
||||
@@ -331,6 +386,7 @@ ACTIVE_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
||||
LEGACY_PUBLIC_LEDGER_EVENT_TYPES: frozenset[str] = frozenset(
|
||||
{
|
||||
"gate_message",
|
||||
"dm_message",
|
||||
}
|
||||
)
|
||||
"""Event types that exist historically on the public chain and must remain
|
||||
@@ -425,6 +481,8 @@ def validate_event_payload(event_type: str, payload: dict[str, Any]) -> tuple[bo
|
||||
|
||||
|
||||
def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
if event_type == "gate_message":
|
||||
return validate_private_gate_ledger_payload(payload)
|
||||
if event_type not in PUBLIC_LEDGER_EVENT_TYPES and event_type not in _EXTENSION_VALIDATORS:
|
||||
return False, f"{event_type} is not allowed on the public ledger"
|
||||
forbidden = sorted(
|
||||
@@ -441,6 +499,92 @@ def validate_public_ledger_payload(event_type: str, payload: dict[str, Any]) ->
|
||||
return True, "ok"
|
||||
|
||||
|
||||
_PRIVATE_GATE_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"gate",
|
||||
"ciphertext",
|
||||
"nonce",
|
||||
"sender_ref",
|
||||
"format",
|
||||
"epoch",
|
||||
"gate_envelope",
|
||||
"envelope_hash",
|
||||
"reply_to",
|
||||
"transport_lock",
|
||||
"signed_context",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_private_gate_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
"""Validate ciphertext-only gate events for private Infonet replication."""
|
||||
ok, reason = validate_event_payload("gate_message", payload)
|
||||
if not ok:
|
||||
return ok, reason
|
||||
unexpected = sorted(
|
||||
key
|
||||
for key in payload.keys()
|
||||
if str(key or "").strip().lower() not in _PRIVATE_GATE_LEDGER_ALLOWED_FIELDS
|
||||
)
|
||||
if unexpected:
|
||||
return False, f"private gate ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||
if "message" in payload or "_local_plaintext" in payload or "_local_reply_to" in payload:
|
||||
return False, "private gate ledger payload must not contain plaintext"
|
||||
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||
if transport_lock and transport_lock not in {"private", "private_strong", "rns", "onion"}:
|
||||
return False, "gate messages require private transport_lock"
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "nonce")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
return True, "ok"
|
||||
|
||||
|
||||
_PRIVATE_DM_LEDGER_ALLOWED_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"recipient_id",
|
||||
"delivery_class",
|
||||
"recipient_token",
|
||||
"ciphertext",
|
||||
"msg_id",
|
||||
"timestamp",
|
||||
"format",
|
||||
"session_welcome",
|
||||
"sender_seal",
|
||||
"relay_salt",
|
||||
"transport_lock",
|
||||
"signed_context",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_private_dm_ledger_payload(payload: dict[str, Any]) -> tuple[bool, str]:
|
||||
"""Validate ciphertext-only DM dead-drop events for private Infonet replication."""
|
||||
ok, reason = validate_event_payload("dm_message", payload)
|
||||
if not ok:
|
||||
return ok, reason
|
||||
unexpected = sorted(
|
||||
key
|
||||
for key in payload.keys()
|
||||
if str(key or "").strip().lower() not in _PRIVATE_DM_LEDGER_ALLOWED_FIELDS
|
||||
)
|
||||
if unexpected:
|
||||
return False, f"private DM ledger payload contains unsupported fields: {', '.join(unexpected)}"
|
||||
if "message" in payload or "plaintext" in payload or "_local_plaintext" in payload:
|
||||
return False, "private DM ledger payload must not contain plaintext"
|
||||
transport_lock = str(payload.get("transport_lock", "") or "").strip().lower()
|
||||
if transport_lock != "private_strong":
|
||||
return False, "DM hashchain spool requires private_strong transport_lock"
|
||||
if not str(payload.get("ciphertext", "") or "").strip():
|
||||
return False, "ciphertext cannot be empty"
|
||||
ok, reason = _validate_sealed_bytes_field(payload, "ciphertext")
|
||||
if not ok:
|
||||
return ok, reason
|
||||
return True, "ok"
|
||||
|
||||
|
||||
def validate_protocol_fields(protocol_version: str, network_id: str) -> tuple[bool, str]:
|
||||
if protocol_version != PROTOCOL_VERSION:
|
||||
return False, "Unsupported protocol_version"
|
||||
|
||||
@@ -230,11 +230,16 @@ def _raw_fallback_allowed() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _generated_secret_file() -> Path:
|
||||
return DATA_DIR / "secure_storage_secret.key"
|
||||
|
||||
|
||||
def _get_storage_secret() -> str | None:
|
||||
"""Return the operator-supplied secure storage secret, or None."""
|
||||
"""Return the operator-supplied or local generated secure storage secret."""
|
||||
secret = os.environ.get("MESH_SECURE_STORAGE_SECRET", "").strip()
|
||||
if secret:
|
||||
return secret
|
||||
secret_file_override = os.environ.get("MESH_SECURE_STORAGE_SECRET_FILE", "").strip()
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
@@ -242,8 +247,36 @@ def _get_storage_secret() -> str | None:
|
||||
secret = str(getattr(settings, "MESH_SECURE_STORAGE_SECRET", "") or "").strip()
|
||||
if secret:
|
||||
return secret
|
||||
secret_file_override = (
|
||||
secret_file_override
|
||||
or str(getattr(settings, "MESH_SECURE_STORAGE_SECRET_FILE", "") or "").strip()
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if not _is_windows():
|
||||
if _raw_fallback_allowed():
|
||||
return None
|
||||
secret_file = Path(secret_file_override or _generated_secret_file())
|
||||
try:
|
||||
if secret_file.exists():
|
||||
secret = secret_file.read_text(encoding="utf-8").strip()
|
||||
if secret:
|
||||
return secret
|
||||
secret_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
secret = _b64(os.urandom(48))
|
||||
_atomic_write_text(secret_file, secret + "\n", encoding="utf-8")
|
||||
try:
|
||||
os.chmod(secret_file, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
logger.info("Generated local secure storage secret at %s", secret_file)
|
||||
return secret
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to load or generate local secure storage secret at %s: %s",
|
||||
secret_file,
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -11,12 +11,13 @@ import base64
|
||||
import hmac
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import secrets
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||
from cryptography.hazmat.primitives.asymmetric import ed25519, x25519
|
||||
|
||||
from services.mesh.mesh_crypto import (
|
||||
build_signature_payload,
|
||||
@@ -51,6 +52,8 @@ PREKEY_LOOKUP_ROTATE_BEFORE_REMAINING_USES = 8
|
||||
PREKEY_LOOKUP_ROTATION_OVERLAP_S = 12 * 60 * 60
|
||||
PREKEY_LOOKUP_ROTATION_ACTIVE_CAP = 4
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _safe_int(val, default=0) -> int:
|
||||
try:
|
||||
@@ -107,6 +110,7 @@ def _default_identity() -> dict[str, Any]:
|
||||
def _prekey_lookup_handle_record(
|
||||
handle: str,
|
||||
*,
|
||||
label: str = "",
|
||||
issued_at: int = 0,
|
||||
expires_at: int = 0,
|
||||
max_uses: int = 0,
|
||||
@@ -125,6 +129,7 @@ def _prekey_lookup_handle_record(
|
||||
bounded_max_uses = max(1, _safe_int(max_uses or PREKEY_LOOKUP_HANDLE_MAX_USES, PREKEY_LOOKUP_HANDLE_MAX_USES))
|
||||
return {
|
||||
"handle": str(handle or "").strip(),
|
||||
"label": str(label or "").strip()[:96],
|
||||
"issued_at": issued,
|
||||
"expires_at": bounded_expires_at,
|
||||
"max_uses": bounded_max_uses,
|
||||
@@ -152,8 +157,10 @@ def _coerce_prekey_lookup_handle_record(
|
||||
max_uses = _safe_int(value.get("max_uses", PREKEY_LOOKUP_HANDLE_MAX_USES) or PREKEY_LOOKUP_HANDLE_MAX_USES)
|
||||
use_count = _safe_int(value.get("use_count", value.get("uses", 0)) or 0, 0)
|
||||
last_used_at = _safe_int(value.get("last_used_at", value.get("last_used", 0)) or 0, 0)
|
||||
label = str(value.get("label", "") or "").strip()
|
||||
return _prekey_lookup_handle_record(
|
||||
handle,
|
||||
label=label,
|
||||
issued_at=issued_at,
|
||||
expires_at=expires_at,
|
||||
max_uses=max_uses,
|
||||
@@ -228,6 +235,23 @@ def _fresh_prekey_lookup_handle_record(*, now: int | None = None) -> dict[str, A
|
||||
)
|
||||
|
||||
|
||||
def _prekey_registration_failure_blocks_dm_invite(detail: str) -> bool:
|
||||
"""Only trust-root failures block address export; transport warm-up can finish later."""
|
||||
lowered = str(detail or "").lower()
|
||||
critical_markers = (
|
||||
"root transparency",
|
||||
"external root witness",
|
||||
"stable root",
|
||||
"witness threshold",
|
||||
"witness finality",
|
||||
"root manifest",
|
||||
"root witness",
|
||||
"manifest_fingerprint",
|
||||
"policy fingerprint",
|
||||
)
|
||||
return any(marker in lowered for marker in critical_markers)
|
||||
|
||||
|
||||
def _bounded_lookup_handle_records(
|
||||
records: list[dict[str, Any]],
|
||||
*,
|
||||
@@ -440,6 +464,37 @@ def _bundle_fingerprint(data: dict[str, Any]) -> str:
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _ensure_dm_dh_material(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
||||
"""Repair legacy/corrupt DM identities that kept signing keys but lost DH material."""
|
||||
if str(data.get("dh_pub_key", "") or "").strip() and str(data.get("dh_private_key", "") or "").strip():
|
||||
return data, False
|
||||
|
||||
dh_priv = x25519.X25519PrivateKey.generate()
|
||||
dh_priv_raw = dh_priv.private_bytes(
|
||||
encoding=serialization.Encoding.Raw,
|
||||
format=serialization.PrivateFormat.Raw,
|
||||
encryption_algorithm=serialization.NoEncryption(),
|
||||
)
|
||||
dh_pub_raw = dh_priv.public_key().public_bytes(
|
||||
encoding=serialization.Encoding.Raw,
|
||||
format=serialization.PublicFormat.Raw,
|
||||
)
|
||||
repaired = {
|
||||
**dict(data or {}),
|
||||
"dh_pub_key": base64.b64encode(dh_pub_raw).decode("ascii"),
|
||||
"dh_algo": "X25519",
|
||||
"dh_private_key": base64.b64encode(dh_priv_raw).decode("ascii"),
|
||||
"last_dh_timestamp": int(time.time()),
|
||||
"bundle_fingerprint": "",
|
||||
"bundle_sequence": 0,
|
||||
"bundle_registered_at": 0,
|
||||
"prekey_bundle_registered_at": 0,
|
||||
"prekey_transparency_head": "",
|
||||
"prekey_transparency_size": 0,
|
||||
}
|
||||
return _write_identity(repaired), True
|
||||
|
||||
|
||||
def trust_fingerprint_for_identity_material(
|
||||
*,
|
||||
agent_id: str,
|
||||
@@ -806,10 +861,11 @@ def _sign_dm_invite_payload(
|
||||
|
||||
def register_wormhole_dm_key(force: bool = False) -> dict[str, Any]:
|
||||
data = read_wormhole_identity()
|
||||
data, repaired_dh = _ensure_dm_dh_material(data)
|
||||
|
||||
timestamp = int(time.time())
|
||||
fingerprint = _bundle_fingerprint(data)
|
||||
if not force and fingerprint and fingerprint == data.get("bundle_fingerprint"):
|
||||
if not force and not repaired_dh and fingerprint and fingerprint == data.get("bundle_fingerprint"):
|
||||
return {
|
||||
"ok": True,
|
||||
**_public_view(data),
|
||||
@@ -884,6 +940,7 @@ def export_wormhole_dm_invite(*, label: str = "", expires_in_s: int = 0) -> dict
|
||||
existing_handles.append(
|
||||
_prekey_lookup_handle_record(
|
||||
lookup_handle,
|
||||
label=str(label or "").strip(),
|
||||
issued_at=issued_at,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
@@ -920,14 +977,25 @@ def export_wormhole_dm_invite(*, label: str = "", expires_in_s: int = 0) -> dict
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
prekey_registration: dict[str, Any] = {"ok": False, "detail": "prekey bundle publish not attempted"}
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_prekey import register_wormhole_prekey_bundle
|
||||
|
||||
registered = register_wormhole_prekey_bundle()
|
||||
if not registered.get("ok"):
|
||||
return {"ok": False, "detail": str(registered.get("detail", "") or "prekey bundle registration failed")}
|
||||
prekey_registration = register_wormhole_prekey_bundle()
|
||||
if not prekey_registration.get("ok"):
|
||||
detail = str(prekey_registration.get("detail", "") or "prekey bundle registration failed")
|
||||
if _prekey_registration_failure_blocks_dm_invite(detail):
|
||||
return {"ok": False, "detail": detail}
|
||||
logger.warning(
|
||||
"DM invite prekey publish pending: %s",
|
||||
detail,
|
||||
)
|
||||
except Exception as exc:
|
||||
return {"ok": False, "detail": str(exc) or "prekey bundle registration failed"}
|
||||
prekey_registration = {"ok": False, "detail": str(exc) or "prekey bundle registration failed"}
|
||||
detail = str(prekey_registration.get("detail", "") or "")
|
||||
if _prekey_registration_failure_blocks_dm_invite(detail):
|
||||
return {"ok": False, "detail": detail}
|
||||
logger.warning("DM invite prekey publish pending: %s", prekey_registration["detail"])
|
||||
|
||||
invite_node_id, invite_public_key, invite_private_key = _generate_invite_signing_identity()
|
||||
payload = _attach_dm_invite_root_distribution(payload)
|
||||
@@ -958,6 +1026,8 @@ def export_wormhole_dm_invite(*, label: str = "", expires_in_s: int = 0) -> dict
|
||||
"peer_id": str(invite_node_id or ""),
|
||||
"trust_fingerprint": str(payload.get("identity_commitment", "") or ""),
|
||||
"invite": invite,
|
||||
"prekey_publish_pending": not bool(prekey_registration.get("ok")),
|
||||
"prekey_registration": prekey_registration,
|
||||
}
|
||||
|
||||
|
||||
@@ -980,6 +1050,140 @@ def get_prekey_lookup_handle_records() -> list[dict[str, Any]]:
|
||||
]
|
||||
|
||||
|
||||
def list_prekey_lookup_handle_records_for_ui(*, now: int | None = None) -> dict[str, Any]:
|
||||
"""Return shareable DM address records without exposing local identity secrets."""
|
||||
current_time = _safe_int(now or time.time(), int(time.time()))
|
||||
addresses: list[dict[str, Any]] = []
|
||||
for record in get_prekey_lookup_handle_records():
|
||||
handle = str(record.get("handle", "") or "").strip()
|
||||
if not handle:
|
||||
continue
|
||||
expires_at = _effective_prekey_lookup_handle_expires_at(record)
|
||||
max_uses = max(
|
||||
1,
|
||||
_safe_int(
|
||||
record.get("max_uses", PREKEY_LOOKUP_HANDLE_MAX_USES) or PREKEY_LOOKUP_HANDLE_MAX_USES,
|
||||
PREKEY_LOOKUP_HANDLE_MAX_USES,
|
||||
),
|
||||
)
|
||||
use_count = max(0, _safe_int(record.get("use_count", 0) or 0, 0))
|
||||
addresses.append(
|
||||
{
|
||||
"handle": handle,
|
||||
"label": str(record.get("label", "") or "").strip(),
|
||||
"issued_at": _safe_int(record.get("issued_at", 0) or 0, 0),
|
||||
"expires_at": expires_at,
|
||||
"max_uses": max_uses,
|
||||
"use_count": use_count,
|
||||
"remaining_uses": max(0, max_uses - use_count),
|
||||
"last_used_at": _safe_int(record.get("last_used_at", 0) or 0, 0),
|
||||
"expired": bool(expires_at > 0 and current_time >= expires_at),
|
||||
"exhausted": bool(use_count >= max_uses),
|
||||
}
|
||||
)
|
||||
addresses.sort(key=lambda item: _safe_int(item.get("issued_at", 0) or 0, 0), reverse=True)
|
||||
return {"ok": True, "addresses": addresses}
|
||||
|
||||
|
||||
def rename_prekey_lookup_handle(handle: str, label: str) -> dict[str, Any]:
|
||||
"""Rename an active invite-scoped DM lookup handle without changing the handle."""
|
||||
lookup_handle = str(handle or "").strip()
|
||||
next_label = str(label or "").strip()[:96]
|
||||
if not lookup_handle:
|
||||
return {"ok": False, "detail": "missing_lookup_handle"}
|
||||
|
||||
current_time = int(time.time())
|
||||
data = read_wormhole_identity()
|
||||
existing, _ = _normalize_prekey_lookup_handles(
|
||||
data.get("prekey_lookup_handles", []),
|
||||
fallback_issued_at=current_time,
|
||||
now=current_time,
|
||||
)
|
||||
updated = False
|
||||
next_records: list[dict[str, Any]] = []
|
||||
for record in existing:
|
||||
current = dict(record)
|
||||
if str(current.get("handle", "") or "").strip() == lookup_handle:
|
||||
current["label"] = next_label
|
||||
updated = True
|
||||
next_records.append(current)
|
||||
|
||||
if not updated:
|
||||
return {
|
||||
"ok": False,
|
||||
"handle": lookup_handle,
|
||||
"label": next_label,
|
||||
"updated": False,
|
||||
"detail": "lookup_handle_not_found",
|
||||
}
|
||||
|
||||
normalized_records, _ = _normalize_prekey_lookup_handles(
|
||||
next_records,
|
||||
fallback_issued_at=current_time,
|
||||
now=current_time,
|
||||
)
|
||||
_write_identity({"prekey_lookup_handles": normalized_records})
|
||||
return {
|
||||
"ok": True,
|
||||
"handle": lookup_handle,
|
||||
"label": next_label,
|
||||
"updated": True,
|
||||
}
|
||||
|
||||
|
||||
def revoke_prekey_lookup_handle(handle: str) -> dict[str, Any]:
|
||||
"""Revoke an invite-scoped DM lookup handle for future first-contact attempts."""
|
||||
lookup_handle = str(handle or "").strip()
|
||||
if not lookup_handle:
|
||||
return {"ok": False, "detail": "missing_lookup_handle"}
|
||||
current_time = int(time.time())
|
||||
data = read_wormhole_identity()
|
||||
existing, _ = _normalize_prekey_lookup_handles(
|
||||
data.get("prekey_lookup_handles", []),
|
||||
fallback_issued_at=current_time,
|
||||
now=current_time,
|
||||
)
|
||||
next_records = [
|
||||
dict(record)
|
||||
for record in existing
|
||||
if str(record.get("handle", "") or "").strip() != lookup_handle
|
||||
]
|
||||
identity_removed = len(next_records) != len(existing)
|
||||
if identity_removed:
|
||||
_write_identity({"prekey_lookup_handles": next_records})
|
||||
|
||||
relay_removed = False
|
||||
try:
|
||||
from services.mesh.mesh_dm_relay import dm_relay
|
||||
|
||||
relay_removed = bool(dm_relay.unregister_prekey_lookup_alias(lookup_handle))
|
||||
except Exception:
|
||||
relay_removed = False
|
||||
|
||||
republished = False
|
||||
detail = ""
|
||||
if identity_removed:
|
||||
try:
|
||||
from services.mesh.mesh_wormhole_prekey import register_wormhole_prekey_bundle
|
||||
|
||||
registered = register_wormhole_prekey_bundle()
|
||||
republished = bool(registered.get("ok"))
|
||||
if not republished:
|
||||
detail = str(registered.get("detail", "") or "prekey bundle republish failed")
|
||||
except Exception as exc:
|
||||
detail = str(exc) or "prekey bundle republish failed"
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"handle": lookup_handle,
|
||||
"revoked": bool(identity_removed or relay_removed),
|
||||
"identity_removed": identity_removed,
|
||||
"relay_removed": relay_removed,
|
||||
"republished": republished,
|
||||
"detail": detail,
|
||||
}
|
||||
|
||||
|
||||
def record_prekey_lookup_handle_use(handle: str, *, now: int | None = None) -> dict[str, Any] | None:
|
||||
lookup_handle = str(handle or "").strip()
|
||||
if not lookup_handle:
|
||||
@@ -999,6 +1203,7 @@ def record_prekey_lookup_handle_use(handle: str, *, now: int | None = None) -> d
|
||||
if str(current.get("handle", "") or "").strip() == lookup_handle:
|
||||
current = _prekey_lookup_handle_record(
|
||||
lookup_handle,
|
||||
label=str(current.get("label", "") or "").strip(),
|
||||
issued_at=_safe_int(current.get("issued_at", 0) or 0, current_time),
|
||||
expires_at=_safe_int(current.get("expires_at", 0) or 0, 0),
|
||||
max_uses=_safe_int(current.get("max_uses", PREKEY_LOOKUP_HANDLE_MAX_USES) or PREKEY_LOOKUP_HANDLE_MAX_USES),
|
||||
@@ -1129,6 +1334,7 @@ def maybe_rotate_prekey_lookup_handles(*, now: int | None = None) -> dict[str, A
|
||||
candidate_records.append(
|
||||
_prekey_lookup_handle_record(
|
||||
old_handle,
|
||||
label=str(record.get("label", "") or "").strip(),
|
||||
issued_at=_safe_int(record.get("issued_at", 0) or 0, current_time),
|
||||
expires_at=overlap_expires_at,
|
||||
max_uses=_safe_int(record.get("max_uses", PREKEY_LOOKUP_HANDLE_MAX_USES) or PREKEY_LOOKUP_HANDLE_MAX_USES),
|
||||
@@ -1351,11 +1557,101 @@ def import_wormhole_dm_invite(invite: dict[str, Any], *, alias: str = "") -> dic
|
||||
"detail": "compat dm invite import disabled; ask the sender to re-export a current signed invite",
|
||||
}
|
||||
|
||||
def _prekey_missing_or_pending(detail: str) -> bool:
|
||||
lower = str(detail or "").strip().lower()
|
||||
return any(
|
||||
phrase in lower
|
||||
for phrase in (
|
||||
"prekey bundle not found",
|
||||
"invite prekey bundle not found",
|
||||
"peer prekey lookup unavailable",
|
||||
"peer prekey lookup still preparing",
|
||||
"transport tier insufficient",
|
||||
"preparing_private_lane",
|
||||
)
|
||||
)
|
||||
|
||||
def _pin_pending_invite_prekey(detail: str) -> dict[str, Any]:
|
||||
if invite_version < DM_INVITE_VERSION:
|
||||
return {"ok": False, "detail": detail or "invite prekey bundle not found"}
|
||||
invite_root_distribution = _verify_dm_invite_root_distribution(payload)
|
||||
if not invite_root_distribution.get("ok"):
|
||||
return invite_root_distribution
|
||||
attested = _verify_dm_invite_identity_attestation(
|
||||
envelope=envelope,
|
||||
payload=payload,
|
||||
resolved_root_node_id=str(invite_root_distribution.get("root_node_id", "") or ""),
|
||||
resolved_root_public_key=str(invite_root_distribution.get("root_public_key", "") or ""),
|
||||
resolved_root_public_key_algo=str(
|
||||
invite_root_distribution.get("root_public_key_algo", "Ed25519") or "Ed25519"
|
||||
),
|
||||
resolved_root_manifest_fingerprint=str(
|
||||
invite_root_distribution.get("root_manifest_fingerprint", "") or ""
|
||||
).strip().lower(),
|
||||
)
|
||||
if not attested.get("ok"):
|
||||
return attested
|
||||
pending_peer_id = str(verified.get("peer_id", "") or "").strip()
|
||||
trust_fingerprint = str(verified.get("trust_fingerprint", "") or "").strip().lower()
|
||||
contact = pin_wormhole_dm_invite(
|
||||
pending_peer_id,
|
||||
invite_payload={
|
||||
"trust_fingerprint": trust_fingerprint,
|
||||
"public_key": "",
|
||||
"public_key_algo": "Ed25519",
|
||||
"identity_dh_pub_key": "",
|
||||
"dh_algo": "X25519",
|
||||
"prekey_lookup_handle": lookup_handle,
|
||||
"issued_at": int(payload.get("issued_at", 0) or 0),
|
||||
"expires_at": int(payload.get("expires_at", 0) or 0),
|
||||
"label": str(payload.get("label", "") or ""),
|
||||
"root_node_id": str(attested.get("root_node_id", "") or ""),
|
||||
"root_public_key": str(attested.get("root_public_key", "") or ""),
|
||||
"root_public_key_algo": str(attested.get("root_public_key_algo", "Ed25519") or "Ed25519"),
|
||||
"root_fingerprint": str(attested.get("root_fingerprint", "") or ""),
|
||||
"root_manifest_fingerprint": str(invite_root_distribution.get("root_manifest_fingerprint", "") or ""),
|
||||
"root_witness_policy_fingerprint": str(
|
||||
invite_root_distribution.get("root_witness_policy_fingerprint", "") or ""
|
||||
),
|
||||
"root_witness_threshold": _safe_int(
|
||||
invite_root_distribution.get("root_witness_threshold", 0) or 0,
|
||||
0,
|
||||
),
|
||||
"root_witness_count": _safe_int(invite_root_distribution.get("root_witness_count", 0) or 0, 0),
|
||||
"root_witness_domain_count": _safe_int(
|
||||
invite_root_distribution.get("root_witness_domain_count", 0) or 0,
|
||||
0,
|
||||
),
|
||||
"root_manifest_generation": _safe_int(
|
||||
invite_root_distribution.get("root_manifest_generation", 0) or 0,
|
||||
0,
|
||||
),
|
||||
"root_rotation_proven": bool(invite_root_distribution.get("root_rotation_proven")),
|
||||
},
|
||||
alias=resolved_alias,
|
||||
attested=True,
|
||||
)
|
||||
return {
|
||||
"ok": True,
|
||||
"peer_id": pending_peer_id,
|
||||
"invite_peer_id": pending_peer_id,
|
||||
"trust_fingerprint": trust_fingerprint,
|
||||
"trust_level": str(contact.get("trust_level", "") or ""),
|
||||
"detail": "Contact saved.",
|
||||
"invite_attested": True,
|
||||
"pending_prekey": True,
|
||||
"prekey_detail": detail or "invite prekey bundle not found",
|
||||
"contact": contact,
|
||||
}
|
||||
|
||||
from services.mesh.mesh_wormhole_prekey import fetch_dm_prekey_bundle
|
||||
|
||||
fetched = fetch_dm_prekey_bundle(lookup_token=lookup_handle)
|
||||
if not fetched.get("ok"):
|
||||
return {"ok": False, "detail": str(fetched.get("detail", "") or "invite prekey bundle not found")}
|
||||
fetch_detail = str(fetched.get("detail", "") or "invite prekey bundle not found")
|
||||
if _prekey_missing_or_pending(fetch_detail):
|
||||
return _pin_pending_invite_prekey(fetch_detail)
|
||||
return {"ok": False, "detail": fetch_detail}
|
||||
|
||||
resolved_peer_id = str(fetched.get("agent_id", "") or "").strip()
|
||||
if not resolved_peer_id:
|
||||
|
||||
@@ -11,6 +11,7 @@ import os
|
||||
import random
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from typing import Any
|
||||
|
||||
@@ -90,13 +91,15 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
||||
return {"ok": False, "detail": "lookup token required"}
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_crypto import _derive_peer_key, normalize_peer_url
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_router import configured_relay_peer_urls
|
||||
|
||||
settings = get_settings()
|
||||
secret = str(getattr(settings, "MESH_PEER_PUSH_SECRET", "") or "").strip()
|
||||
if not secret:
|
||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||
# Issue #256: secret check moved per-peer below. We still bail out
|
||||
# cleanly when there are no peers configured at all.
|
||||
peers = configured_relay_peer_urls()
|
||||
if not peers:
|
||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||
@@ -120,7 +123,8 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
||||
or os.environ.get("SB_TEST_NODE_URL", "").strip()
|
||||
or normalized_peer_url
|
||||
)
|
||||
peer_key = _derive_peer_key(secret, sender_peer_url)
|
||||
# Issue #256: prefer per-peer secret keyed by the sender URL.
|
||||
peer_key = resolve_peer_key_for_url(sender_peer_url)
|
||||
if not peer_key:
|
||||
continue
|
||||
headers = {
|
||||
@@ -150,6 +154,122 @@ def _fetch_dm_prekey_bundle_from_peer_lookup(lookup_token: str) -> dict[str, Any
|
||||
return {"ok": False, "detail": last_detail or "Prekey bundle not found"}
|
||||
|
||||
|
||||
def _configured_public_lookup_peer_urls() -> list[str]:
|
||||
try:
|
||||
from services.config import get_settings
|
||||
from services.mesh.mesh_router import active_sync_peer_urls, parse_configured_relay_peers
|
||||
|
||||
settings = get_settings()
|
||||
candidates: list[str] = []
|
||||
for raw in (
|
||||
getattr(settings, "MESH_BOOTSTRAP_SEED_PEERS", ""),
|
||||
getattr(settings, "MESH_DEFAULT_SYNC_PEERS", ""),
|
||||
):
|
||||
candidates.extend(parse_configured_relay_peers(str(raw or "")))
|
||||
candidates.extend(active_sync_peer_urls())
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
seen: set[str] = set()
|
||||
peers: list[str] = []
|
||||
for candidate in candidates:
|
||||
peer = str(candidate or "").strip().rstrip("/")
|
||||
if not peer or peer in seen:
|
||||
continue
|
||||
seen.add(peer)
|
||||
peers.append(peer)
|
||||
return peers
|
||||
|
||||
|
||||
def _normalize_remote_lookup_bundle(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
data = dict(payload or {})
|
||||
bundle = dict(data.get("bundle") or {})
|
||||
public_key = str(data.get("public_key", "") or bundle.get("public_key", "") or "").strip()
|
||||
if not public_key:
|
||||
return {"ok": False, "detail": "Prekey bundle missing signing key"}
|
||||
agent_id = str(data.get("agent_id", "") or "").strip() or derive_node_id(public_key)
|
||||
if not agent_id:
|
||||
return {"ok": False, "detail": "Prekey bundle public key binding mismatch"}
|
||||
data["agent_id"] = agent_id
|
||||
data["public_key"] = public_key
|
||||
data["public_key_algo"] = str(data.get("public_key_algo", "") or bundle.get("public_key_algo", "Ed25519") or "Ed25519")
|
||||
data["protocol_version"] = str(data.get("protocol_version", "") or bundle.get("protocol_version", PROTOCOL_VERSION) or PROTOCOL_VERSION)
|
||||
data["bundle"] = bundle
|
||||
ok, reason = _validate_bundle_record(data)
|
||||
if not ok:
|
||||
return {"ok": False, "detail": reason}
|
||||
data["ok"] = True
|
||||
data["lookup_mode"] = "invite_lookup_handle"
|
||||
data["public_lookup"] = True
|
||||
return data
|
||||
|
||||
|
||||
def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, Any]:
|
||||
"""Fetch an invite-scoped prekey bundle from bootstrap/sync peers.
|
||||
|
||||
The token is high-entropy and invite-scoped. This path does not expose a
|
||||
stable agent_id to the peer; if the ordinary peer response omits agent_id,
|
||||
derive it from the signed identity public key and validate the bundle before
|
||||
accepting it.
|
||||
"""
|
||||
token = str(lookup_token or "").strip()
|
||||
if not token:
|
||||
return {"ok": False, "detail": "lookup token required"}
|
||||
peers = _configured_public_lookup_peer_urls()
|
||||
if not peers:
|
||||
return {"ok": False, "detail": "peer prekey lookup unavailable"}
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
timeout = max(1, _safe_int(getattr(get_settings(), "MESH_SYNC_TIMEOUT_S", 5) or 5, 5))
|
||||
except Exception:
|
||||
timeout = 5
|
||||
|
||||
encoded = urllib.parse.urlencode({"lookup_token": token})
|
||||
last_detail = ""
|
||||
for peer_url in peers:
|
||||
normalized_peer_url = str(peer_url or "").strip().rstrip("/")
|
||||
if not normalized_peer_url:
|
||||
continue
|
||||
# Generic UA: any peer-facing crypto request should not carry a
|
||||
# fork-specific identifier — that turns prekey lookups into a
|
||||
# software-fingerprinting beacon.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
request = urllib.request.Request(
|
||||
f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as response:
|
||||
raw = response.read(256 * 1024)
|
||||
payload = json.loads(raw.decode("utf-8"))
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.debug("public prekey lookup failed for %s: %s", normalized_peer_url, type(exc).__name__)
|
||||
last_detail = "peer prekey lookup unavailable"
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
last_detail = "invalid peer response"
|
||||
continue
|
||||
if payload.get("pending") or str(payload.get("status", "") or "") == "preparing_private_lane":
|
||||
last_detail = "peer prekey lookup still preparing"
|
||||
continue
|
||||
if not payload.get("ok"):
|
||||
last_detail = str(payload.get("detail", "") or last_detail or "Prekey bundle not found")
|
||||
continue
|
||||
if not isinstance(payload.get("bundle"), dict):
|
||||
last_detail = "Prekey bundle not found"
|
||||
continue
|
||||
normalized = _normalize_remote_lookup_bundle(payload)
|
||||
if normalized.get("ok"):
|
||||
return normalized
|
||||
last_detail = str(normalized.get("detail", "") or last_detail)
|
||||
return {"ok": False, "detail": last_detail or "Prekey bundle not found"}
|
||||
|
||||
|
||||
def _b64(data: bytes) -> str:
|
||||
return base64.b64encode(data).decode("ascii")
|
||||
|
||||
@@ -926,6 +1046,11 @@ def fetch_dm_prekey_bundle(
|
||||
peer_found = _fetch_dm_prekey_bundle_from_peer_lookup(resolved_lookup)
|
||||
if peer_found.get("ok"):
|
||||
return peer_found
|
||||
public_found = _fetch_dm_prekey_bundle_from_public_lookup(resolved_lookup)
|
||||
if public_found.get("ok"):
|
||||
return public_found
|
||||
if str(public_found.get("detail", "") or "").strip():
|
||||
return {"ok": False, "detail": str(public_found.get("detail", "") or "Prekey bundle not found")}
|
||||
return {"ok": False, "detail": str(peer_found.get("detail", "") or "Prekey bundle not found")}
|
||||
else:
|
||||
return {"ok": False, "detail": "Prekey bundle not found"}
|
||||
|
||||
@@ -12,6 +12,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@@ -23,7 +24,7 @@ from cryptography.hazmat.primitives.asymmetric import ed25519
|
||||
|
||||
from services.mesh.mesh_crypto import build_signature_payload, derive_node_id, verify_node_binding, verify_signature
|
||||
from services.mesh.mesh_protocol import PROTOCOL_VERSION
|
||||
from services.mesh.mesh_secure_storage import read_domain_json, write_domain_json
|
||||
from services.mesh.mesh_secure_storage import SecureStorageError, read_domain_json, write_domain_json
|
||||
from services.mesh.mesh_wormhole_identity import root_identity_fingerprint_for_material
|
||||
from services.mesh.mesh_wormhole_persona import (
|
||||
bootstrap_wormhole_persona_state,
|
||||
@@ -51,6 +52,7 @@ DEFAULT_ROOT_WITNESS_THRESHOLD = 2
|
||||
DEFAULT_ROOT_WITNESS_MANAGEMENT_SCOPE = "local"
|
||||
DEFAULT_ROOT_WITNESS_INDEPENDENCE_GROUP = "local_system"
|
||||
DEFAULT_ROOT_EXTERNAL_WITNESS_MAX_AGE_S = 3600
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _safe_int(val: Any, default: int = 0) -> int:
|
||||
@@ -461,12 +463,22 @@ def witness_policy_fingerprint(policy: dict[str, Any]) -> str:
|
||||
|
||||
|
||||
def read_root_distribution_state() -> dict[str, Any]:
|
||||
raw = read_domain_json(
|
||||
ROOT_DISTRIBUTION_DOMAIN,
|
||||
ROOT_DISTRIBUTION_FILE,
|
||||
_default_state,
|
||||
base_dir=DATA_DIR,
|
||||
)
|
||||
try:
|
||||
raw = read_domain_json(
|
||||
ROOT_DISTRIBUTION_DOMAIN,
|
||||
ROOT_DISTRIBUTION_FILE,
|
||||
_default_state,
|
||||
base_dir=DATA_DIR,
|
||||
)
|
||||
except SecureStorageError as exc:
|
||||
detail = str(exc)
|
||||
if "Failed to decrypt domain JSON" not in detail:
|
||||
raise
|
||||
logger.warning(
|
||||
"Root distribution state could not decrypt; regenerating local witness distribution: %s",
|
||||
detail,
|
||||
)
|
||||
raw = _default_state()
|
||||
state = {**_default_state(), **dict(raw or {})}
|
||||
state["witness_identity"] = {**_empty_witness_identity(), **dict(state.get("witness_identity") or {})}
|
||||
witness_identities, witness_changed = _normalize_witness_identities(
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Iterable
|
||||
# Default subscription roots — US-only to avoid flooding the public broker.
|
||||
# Users can opt into additional regions via MESH_MQTT_EXTRA_ROOTS.
|
||||
DEFAULT_ROOTS: tuple[str, ...] = ("US",)
|
||||
DEFAULT_CHANNEL = "LongFast"
|
||||
|
||||
# Every known official region root (for UI dropdowns / manual opt-in).
|
||||
ALL_OFFICIAL_ROOTS: tuple[str, ...] = (
|
||||
@@ -107,6 +108,20 @@ def normalize_topic_filter(value: str) -> str | None:
|
||||
return "/".join(parts)
|
||||
|
||||
|
||||
def _default_topics_for_root(root: str) -> list[str]:
|
||||
"""Return the default LongFast subscriptions for a region root.
|
||||
|
||||
The public broker carries protobuf/encrypted traffic under ``/e/`` and
|
||||
companion decoded JSON traffic under ``/json/``. Positions often arrive on
|
||||
the protobuf path, while public text is commonly easiest to observe on the
|
||||
JSON path.
|
||||
"""
|
||||
return [
|
||||
f"msh/{root}/2/e/{DEFAULT_CHANNEL}/#",
|
||||
f"msh/{root}/2/json/{DEFAULT_CHANNEL}/#",
|
||||
]
|
||||
|
||||
|
||||
def build_subscription_topics(
|
||||
extra_roots: str = "",
|
||||
extra_topics: str = "",
|
||||
@@ -119,7 +134,11 @@ def build_subscription_topics(
|
||||
# via MESH_MQTT_EXTRA_ROOTS to avoid flooding the public broker.
|
||||
roots.extend(root for root in (normalize_root(item) for item in _split_config_values(extra_roots)) if root)
|
||||
|
||||
topics = [f"msh/{root}/#" for root in _dedupe(roots)]
|
||||
topics = [
|
||||
topic
|
||||
for root in _dedupe(roots)
|
||||
for topic in _default_topics_for_root(root)
|
||||
]
|
||||
topics.extend(
|
||||
topic
|
||||
for topic in (
|
||||
@@ -137,7 +156,7 @@ def known_roots(extra_roots: str = "", include_defaults: bool = True) -> list[st
|
||||
for topic in topics:
|
||||
if not topic.startswith("msh/") or not topic.endswith("/#"):
|
||||
continue
|
||||
root = normalize_root(topic[4:-2])
|
||||
root = normalize_root(parse_topic_metadata(topic)["root"])
|
||||
if root:
|
||||
roots.append(root)
|
||||
return _dedupe(roots)
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from services.config import get_settings
|
||||
|
||||
|
||||
PUBLIC_DEFAULT_USER = "meshdev"
|
||||
PUBLIC_DEFAULT_PASS = "large4cats"
|
||||
DATA_DIR = Path(os.environ.get("SB_DATA_DIR", str(Path(__file__).parent.parent / "data")))
|
||||
if not DATA_DIR.is_absolute():
|
||||
DATA_DIR = Path(__file__).parent.parent / DATA_DIR
|
||||
|
||||
SETTINGS_FILE = DATA_DIR / "meshtastic_mqtt.json"
|
||||
_cache: dict[str, Any] | None = None
|
||||
_cache_ts: float = 0.0
|
||||
_CACHE_TTL = 2.0
|
||||
|
||||
|
||||
def _settings_defaults() -> dict[str, Any]:
|
||||
try:
|
||||
s = get_settings()
|
||||
return {
|
||||
"enabled": bool(getattr(s, "MESH_MQTT_ENABLED", False)),
|
||||
"broker": str(getattr(s, "MESH_MQTT_BROKER", "") or "mqtt.meshtastic.org"),
|
||||
"port": int(getattr(s, "MESH_MQTT_PORT", 1883) or 1883),
|
||||
"username": str(getattr(s, "MESH_MQTT_USER", "") or PUBLIC_DEFAULT_USER),
|
||||
"password": str(getattr(s, "MESH_MQTT_PASS", "") or PUBLIC_DEFAULT_PASS),
|
||||
"psk": str(getattr(s, "MESH_MQTT_PSK", "") or ""),
|
||||
"include_default_roots": bool(getattr(s, "MESH_MQTT_INCLUDE_DEFAULT_ROOTS", True)),
|
||||
"extra_roots": str(getattr(s, "MESH_MQTT_EXTRA_ROOTS", "") or ""),
|
||||
"extra_topics": str(getattr(s, "MESH_MQTT_EXTRA_TOPICS", "") or ""),
|
||||
}
|
||||
except Exception:
|
||||
return {
|
||||
"enabled": False,
|
||||
"broker": "mqtt.meshtastic.org",
|
||||
"port": 1883,
|
||||
"username": PUBLIC_DEFAULT_USER,
|
||||
"password": PUBLIC_DEFAULT_PASS,
|
||||
"psk": "",
|
||||
"include_default_roots": True,
|
||||
"extra_roots": "",
|
||||
"extra_topics": "",
|
||||
}
|
||||
|
||||
|
||||
def _safe_int(value: Any, default: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
if parsed < 1 or parsed > 65535:
|
||||
return default
|
||||
return parsed
|
||||
|
||||
|
||||
def _normalize(data: dict[str, Any]) -> dict[str, Any]:
|
||||
defaults = _settings_defaults()
|
||||
return {
|
||||
"enabled": bool(data.get("enabled", defaults["enabled"])),
|
||||
"broker": str(data.get("broker", defaults["broker"]) or defaults["broker"]).strip(),
|
||||
"port": _safe_int(data.get("port", defaults["port"]), defaults["port"]),
|
||||
"username": str(data.get("username", defaults["username"]) or "").strip(),
|
||||
"password": str(data.get("password", defaults["password"]) or ""),
|
||||
"psk": str(data.get("psk", defaults["psk"]) or "").strip(),
|
||||
"include_default_roots": bool(data.get("include_default_roots", defaults["include_default_roots"])),
|
||||
"extra_roots": str(data.get("extra_roots", defaults["extra_roots"]) or "").strip(),
|
||||
"extra_topics": str(data.get("extra_topics", defaults["extra_topics"]) or "").strip(),
|
||||
"updated_at": _safe_int(data.get("updated_at", 0), 0),
|
||||
}
|
||||
|
||||
|
||||
def read_meshtastic_mqtt_settings() -> dict[str, Any]:
|
||||
global _cache, _cache_ts
|
||||
now = time.monotonic()
|
||||
if _cache is not None and (now - _cache_ts) < _CACHE_TTL:
|
||||
return dict(_cache)
|
||||
if not SETTINGS_FILE.exists():
|
||||
result = {**_settings_defaults(), "updated_at": 0}
|
||||
else:
|
||||
try:
|
||||
loaded = json.loads(SETTINGS_FILE.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
loaded = {}
|
||||
result = _normalize(loaded if isinstance(loaded, dict) else {})
|
||||
_cache = result
|
||||
_cache_ts = now
|
||||
return dict(result)
|
||||
|
||||
|
||||
def write_meshtastic_mqtt_settings(**updates: Any) -> dict[str, Any]:
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
existing = read_meshtastic_mqtt_settings()
|
||||
next_data = dict(existing)
|
||||
for key in (
|
||||
"enabled",
|
||||
"broker",
|
||||
"port",
|
||||
"username",
|
||||
"password",
|
||||
"psk",
|
||||
"include_default_roots",
|
||||
"extra_roots",
|
||||
"extra_topics",
|
||||
):
|
||||
if key in updates and updates[key] is not None:
|
||||
next_data[key] = updates[key]
|
||||
if "username" in updates and not str(updates.get("username") or "").strip() and "password" not in updates:
|
||||
next_data["password"] = PUBLIC_DEFAULT_PASS
|
||||
next_data["updated_at"] = int(time.time())
|
||||
normalized = _normalize(next_data)
|
||||
SETTINGS_FILE.write_text(json.dumps(normalized, indent=2), encoding="utf-8")
|
||||
if os.name != "nt":
|
||||
os.chmod(SETTINGS_FILE, 0o600)
|
||||
global _cache, _cache_ts
|
||||
_cache = normalized
|
||||
_cache_ts = time.monotonic()
|
||||
return dict(normalized)
|
||||
|
||||
|
||||
def redacted_meshtastic_mqtt_settings(data: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
source = read_meshtastic_mqtt_settings() if data is None else dict(data)
|
||||
username = str(source.get("username", "") or "")
|
||||
uses_default_credentials = username in ("", PUBLIC_DEFAULT_USER) and str(source.get("password", "") or "") in (
|
||||
"",
|
||||
PUBLIC_DEFAULT_PASS,
|
||||
)
|
||||
return {
|
||||
"enabled": bool(source.get("enabled")),
|
||||
"broker": str(source.get("broker", "")),
|
||||
"port": int(source.get("port", 1883) or 1883),
|
||||
"username": "" if uses_default_credentials else username,
|
||||
"uses_default_credentials": uses_default_credentials,
|
||||
"has_password": bool(str(source.get("password", "") or "")),
|
||||
"has_psk": bool(str(source.get("psk", "") or "")),
|
||||
"include_default_roots": bool(source.get("include_default_roots", True)),
|
||||
"extra_roots": str(source.get("extra_roots", "") or ""),
|
||||
"extra_topics": str(source.get("extra_topics", "") or ""),
|
||||
"updated_at": int(source.get("updated_at", 0) or 0),
|
||||
}
|
||||
|
||||
|
||||
def mqtt_connection_config() -> tuple[str, int, str, str]:
|
||||
data = read_meshtastic_mqtt_settings()
|
||||
return (
|
||||
str(data.get("broker") or "mqtt.meshtastic.org"),
|
||||
int(data.get("port") or 1883),
|
||||
str(data.get("username") or PUBLIC_DEFAULT_USER),
|
||||
str(data.get("password") or PUBLIC_DEFAULT_PASS),
|
||||
)
|
||||
|
||||
|
||||
def mqtt_bridge_enabled() -> bool:
|
||||
return bool(read_meshtastic_mqtt_settings().get("enabled"))
|
||||
|
||||
|
||||
def mqtt_psk_hex() -> str:
|
||||
return str(read_meshtastic_mqtt_settings().get("psk", "") or "").strip()
|
||||
|
||||
|
||||
def mqtt_subscription_settings() -> tuple[str, str, bool]:
|
||||
data = read_meshtastic_mqtt_settings()
|
||||
return (
|
||||
str(data.get("extra_roots", "") or ""),
|
||||
str(data.get("extra_topics", "") or ""),
|
||||
bool(data.get("include_default_roots", True)),
|
||||
)
|
||||
@@ -1,10 +1,13 @@
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import shutil
|
||||
import time
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
@@ -18,9 +21,216 @@ _retry = Retry(total=1, backoff_factor=0.3, status_forcelist=[502, 503, 504])
|
||||
_session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
|
||||
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-operator outbound identification
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Issues #289 / #290 / #291 and the retrofit of PR #284 (#218 / #219 / #220):
|
||||
# every third-party API the backend calls used to identify itself with a
|
||||
# single "Shadowbroker" aggregate User-Agent. From the upstream's
|
||||
# perspective, that meant every Shadowbroker install in the world looked
|
||||
# like one giant entity hammering them. If one install misbehaved, the
|
||||
# upstream's only recourse was to block "Shadowbroker" as a whole — which
|
||||
# would take out every other install too.
|
||||
#
|
||||
# Fix: give each install a stable pseudonymous handle and include it in
|
||||
# the User-Agent. Now an upstream can rate-limit or block the offending
|
||||
# operator without affecting anyone else.
|
||||
#
|
||||
# The handle:
|
||||
#
|
||||
# - Is auto-generated on first call if no `OPERATOR_HANDLE` is configured
|
||||
# (looks like "operator-7f3a92" — 6 hex chars from uuid4()).
|
||||
# - Is persisted to ``backend/data/operator_handle.json`` so it survives
|
||||
# restarts. Under Docker compose that file lives in the volume mount
|
||||
# alongside `carrier_cache.json` and the other persistent state.
|
||||
# - Can be overridden by the operator via the `OPERATOR_HANDLE` setting
|
||||
# (env var or settings UI). Operators with their own GitHub handle,
|
||||
# organization name, etc. can use that for traceability.
|
||||
# - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is
|
||||
# strictly for public third-party API attribution.
|
||||
|
||||
_SHADOWBROKER_VERSION = "0.9"
|
||||
_OPERATOR_HANDLE_FILE = (
|
||||
Path(__file__).parent.parent / "data" / "operator_handle.json"
|
||||
)
|
||||
_OPERATOR_HANDLE_CACHE: str = ""
|
||||
_OPERATOR_HANDLE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _generate_operator_handle() -> str:
|
||||
"""Produce a stable pseudonymous handle for first-launch installs.
|
||||
|
||||
Format: ``operator-7f3a92`` (6 hex chars from a fresh uuid4()).
|
||||
Distinct per install. Carries no real-world identity by default —
|
||||
operators who want one can override via ``OPERATOR_HANDLE``.
|
||||
|
||||
Note: the prefix is deliberately neutral. Earlier drafts used
|
||||
``shadow-`` which, while accurate to the project name, looks
|
||||
exactly like the kind of pattern a third-party abuse-detection
|
||||
system would auto-block as suspicious. ``operator-`` describes
|
||||
what the value actually is and doesn't pattern-match malware.
|
||||
"""
|
||||
return f"operator-{uuid.uuid4().hex[:6]}"
|
||||
|
||||
|
||||
def _load_persisted_operator_handle() -> str:
|
||||
"""Return the previously-saved handle from disk, or empty if none.
|
||||
|
||||
Reads ``backend/data/operator_handle.json`` if it exists. Any read
|
||||
error returns empty so a fresh handle gets generated rather than
|
||||
crashing the request.
|
||||
"""
|
||||
try:
|
||||
if _OPERATOR_HANDLE_FILE.exists():
|
||||
data = json.loads(_OPERATOR_HANDLE_FILE.read_text(encoding="utf-8"))
|
||||
return str(data.get("handle", "") or "").strip()
|
||||
except (OSError, json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _persist_operator_handle(handle: str) -> None:
|
||||
"""Atomically save the auto-generated handle so subsequent restarts
|
||||
use the same one. Failure to persist is non-fatal — the request still
|
||||
succeeds with the in-memory handle, we just may generate a different
|
||||
one on the next process restart."""
|
||||
try:
|
||||
_OPERATOR_HANDLE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = _OPERATOR_HANDLE_FILE.with_suffix(_OPERATOR_HANDLE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(
|
||||
json.dumps({"handle": handle, "_meta": {
|
||||
"purpose": "Per-install operator handle for outbound third-party API attribution.",
|
||||
"see": "backend/services/network_utils.py:outbound_user_agent",
|
||||
}}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
os.replace(tmp, _OPERATOR_HANDLE_FILE)
|
||||
except OSError as exc:
|
||||
logger.debug("Could not persist operator_handle (continuing in-memory): %s", exc)
|
||||
|
||||
|
||||
def get_operator_handle() -> str:
|
||||
"""Return the stable per-install operator handle.
|
||||
|
||||
Resolution order:
|
||||
1. ``OPERATOR_HANDLE`` setting (env var / settings UI) if non-empty.
|
||||
2. Process-cached value from previous call this run.
|
||||
3. Value persisted to ``operator_handle.json`` (from a previous run).
|
||||
4. Newly generated pseudonymous handle, persisted to disk.
|
||||
|
||||
The handle is normalized: stripped of whitespace, lowercased,
|
||||
non-alphanumeric chars (except ``-`` and ``_``) replaced with ``-``.
|
||||
This both sanitizes any HTTP-header-unsafe characters AND prevents
|
||||
the operator from impersonating real third-party projects via
|
||||
inventive whitespace.
|
||||
"""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
# 1. Configured override always wins.
|
||||
configured = ""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
configured = str(getattr(get_settings(), "OPERATOR_HANDLE", "") or "").strip()
|
||||
except Exception:
|
||||
configured = ""
|
||||
if configured:
|
||||
return _normalize_handle(configured)
|
||||
|
||||
# 2. In-memory cache (fast path for repeated calls).
|
||||
if _OPERATOR_HANDLE_CACHE:
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 3. On-disk handle from a previous run.
|
||||
persisted = _load_persisted_operator_handle()
|
||||
if persisted:
|
||||
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 4. Generate, persist, return.
|
||||
fresh = _generate_operator_handle()
|
||||
_persist_operator_handle(fresh)
|
||||
_OPERATOR_HANDLE_CACHE = fresh
|
||||
return fresh
|
||||
|
||||
|
||||
def _normalize_handle(raw: str) -> str:
|
||||
"""Strip whitespace, lowercase, replace unsafe characters with dashes."""
|
||||
safe = "".join(
|
||||
ch if (ch.isalnum() or ch in "-_") else "-"
|
||||
for ch in raw.strip().lower()
|
||||
)
|
||||
# Collapse runs of dashes and trim to a reasonable length so an
|
||||
# operator can't make our outbound logs unreadable.
|
||||
while "--" in safe:
|
||||
safe = safe.replace("--", "-")
|
||||
safe = safe.strip("-")
|
||||
return safe[:48] if safe else "anonymous"
|
||||
|
||||
|
||||
_CONTACT_URL = "https://github.com/BigBodyCobain/Shadowbroker/issues"
|
||||
|
||||
|
||||
def outbound_user_agent(purpose: str = "") -> str:
|
||||
"""Build a User-Agent for an outbound third-party HTTP request.
|
||||
|
||||
Returns something like::
|
||||
|
||||
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
|
||||
+https://github.com/BigBodyCobain/Shadowbroker/issues)
|
||||
|
||||
The ``purpose`` is optional but recommended — it tells the upstream
|
||||
what feature of ours is making the call (``wikipedia``, ``openmhz``,
|
||||
``nominatim``, etc.), which makes their logs and our complaints
|
||||
actionable.
|
||||
|
||||
Every outbound call in the backend that previously sent a custom
|
||||
User-Agent should call this helper instead. Centralizing here means:
|
||||
- one place to change the contact URL,
|
||||
- one place to bump the version on release,
|
||||
- one place a Wikimedia / OpenMHz operator can reach to ask for
|
||||
the project to back off, with a per-install handle so they can
|
||||
target the specific install instead of the project as a whole.
|
||||
"""
|
||||
handle = get_operator_handle()
|
||||
if purpose:
|
||||
purpose_clean = _normalize_handle(purpose)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; purpose: {purpose_clean}; +{_CONTACT_URL})"
|
||||
)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; +{_CONTACT_URL})"
|
||||
)
|
||||
|
||||
|
||||
def _reset_operator_handle_cache_for_tests() -> None:
|
||||
"""Test-only: invalidate the in-memory cache so a test can set a
|
||||
new ``OPERATOR_HANDLE`` env var and see it picked up immediately."""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
_OPERATOR_HANDLE_CACHE = ""
|
||||
|
||||
|
||||
# Default outbound User-Agent. Retained for backwards compatibility with
|
||||
# call sites that haven't been migrated to ``outbound_user_agent()`` yet.
|
||||
# Operators who want full per-install attribution should set the
|
||||
# ``OPERATOR_HANDLE`` setting and migrate call sites incrementally.
|
||||
#
|
||||
# Operators who run a public-facing relay can also override the whole UA
|
||||
# string via the ``SHADOWBROKER_USER_AGENT`` env var. That override
|
||||
# completely bypasses the per-operator helper; only use it if you know
|
||||
# what you're doing.
|
||||
DEFAULT_USER_AGENT = os.environ.get(
|
||||
"SHADOWBROKER_USER_AGENT",
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION}",
|
||||
)
|
||||
|
||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
|
||||
_BASH_PATH = shutil.which("bash") or "bash"
|
||||
|
||||
# Cache domains where requests fails — skip straight to curl for 5 minutes
|
||||
_domain_fail_cache: dict[str, float] = {}
|
||||
@@ -39,6 +249,17 @@ class UpstreamCircuitBreakerError(OSError):
|
||||
"""Raised when a domain recently failed hard and is temporarily skipped."""
|
||||
|
||||
|
||||
def _env_truthy(name: str) -> bool:
|
||||
return str(os.getenv(name, "")).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def external_curl_fallback_enabled() -> bool:
|
||||
"""Return whether the backend may spawn an external curl process."""
|
||||
if os.name != "nt":
|
||||
return True
|
||||
return _env_truthy("SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK")
|
||||
|
||||
|
||||
class _DummyResponse:
|
||||
"""Minimal response object matching requests.Response interface."""
|
||||
def __init__(self, status_code, text):
|
||||
@@ -62,7 +283,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
both Python requests and the barebones Windows system curl.
|
||||
"""
|
||||
default_headers = {
|
||||
"User-Agent": "ShadowBroker-OSINT/0.9.7 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com)",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
@@ -98,11 +319,22 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
_circuit_breaker.pop(domain, None)
|
||||
return res
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, OSError) as e:
|
||||
logger.warning(f"Python requests failed for {url} ({e}), falling back to bash curl...")
|
||||
fallback = "falling back to curl" if external_curl_fallback_enabled() else "skipping external curl"
|
||||
logger.warning(f"Python requests failed for {url} ({e}), {fallback}...")
|
||||
with _cb_lock:
|
||||
_domain_fail_cache[domain] = time.time()
|
||||
|
||||
# Curl fallback — reached from both _skip_requests and requests-exception paths
|
||||
if not external_curl_fallback_enabled():
|
||||
logger.warning(
|
||||
"External curl fallback disabled on Windows for %s; set "
|
||||
"SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=1 to opt in.",
|
||||
domain,
|
||||
)
|
||||
with _cb_lock:
|
||||
_circuit_breaker[domain] = time.time()
|
||||
return _DummyResponse(500, "")
|
||||
|
||||
_CURL_PATH = shutil.which("curl") or "curl"
|
||||
cmd = [_CURL_PATH, "-s", "-w", "\n%{http_code}"]
|
||||
if follow_redirects:
|
||||
@@ -116,9 +348,16 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
|
||||
try:
|
||||
stdin_data = json.dumps(json_data) if (method == "POST" and json_data) else None
|
||||
creationflags = 0
|
||||
if os.name == "nt":
|
||||
creationflags = (
|
||||
getattr(subprocess, "CREATE_NO_WINDOW", 0)
|
||||
| getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
|
||||
)
|
||||
res = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=timeout + 5,
|
||||
input=stdin_data, encoding="utf-8", errors="replace"
|
||||
input=stdin_data, encoding="utf-8", errors="replace",
|
||||
creationflags=creationflags,
|
||||
)
|
||||
if res.returncode == 0 and (res.stdout or "").strip():
|
||||
# Parse HTTP status code from -w output (last line)
|
||||
@@ -130,12 +369,12 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
_circuit_breaker.pop(domain, None) # Clear circuit breaker on success
|
||||
return _DummyResponse(http_code, body)
|
||||
else:
|
||||
logger.error(f"bash curl fallback failed: exit={res.returncode} stderr={res.stderr[:200]}")
|
||||
logger.error(f"curl fallback failed: exit={res.returncode} stderr={res.stderr[:200]}")
|
||||
with _cb_lock:
|
||||
_circuit_breaker[domain] = time.time()
|
||||
return _DummyResponse(500, "")
|
||||
except (subprocess.SubprocessError, ConnectionError, TimeoutError, OSError) as curl_e:
|
||||
logger.error(f"bash curl fallback exception: {curl_e}")
|
||||
logger.error(f"curl fallback exception: {curl_e}")
|
||||
with _cb_lock:
|
||||
_circuit_breaker[domain] = time.time()
|
||||
return _DummyResponse(500, "")
|
||||
|
||||
@@ -15,6 +15,8 @@ _FEED_URL_REPLACEMENTS = {
|
||||
"https://www.channelnewsasia.com/rssfeed/8395986": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml",
|
||||
}
|
||||
_DEAD_FEED_URLS = {
|
||||
"https://www.reutersagency.com/feed/?best-topics=world",
|
||||
"https://rsshub.app/apnews/topics/world-news",
|
||||
"https://www3.nhk.or.jp/nhkworld/rss/world.xml",
|
||||
"https://focustaiwan.tw/rss",
|
||||
"https://english.kyodonews.net/rss/news.xml",
|
||||
@@ -29,6 +31,11 @@ DEFAULT_FEEDS = [
|
||||
{"name": "AlJazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml", "weight": 2},
|
||||
{"name": "NYT", "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "weight": 1},
|
||||
{"name": "GDACS", "url": "https://www.gdacs.org/xml/rss.xml", "weight": 5},
|
||||
{"name": "The War Zone", "url": "https://www.twz.com/feed", "weight": 4},
|
||||
{"name": "Bellingcat", "url": "https://www.bellingcat.com/feed/", "weight": 4},
|
||||
{"name": "Guardian", "url": "https://www.theguardian.com/world/rss", "weight": 3},
|
||||
{"name": "TASS", "url": "https://tass.com/rss/v2.xml", "weight": 2},
|
||||
{"name": "Xinhua", "url": "http://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
{"name": "CNA", "url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml", "weight": 3},
|
||||
{"name": "Mercopress", "url": "https://en.mercopress.com/rss/", "weight": 3},
|
||||
{"name": "SCMP", "url": "https://www.scmp.com/rss/91/feed", "weight": 4},
|
||||
@@ -73,7 +80,9 @@ def get_feeds() -> list[dict]:
|
||||
normalised = _normalise_feeds(feeds)
|
||||
if normalised != feeds:
|
||||
save_feeds(normalised)
|
||||
return normalised
|
||||
if normalised:
|
||||
return normalised
|
||||
logger.warning("News feed configuration contained no usable feeds; falling back to defaults")
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Failed to read news feed config: {e}")
|
||||
return list(DEFAULT_FEEDS)
|
||||
|
||||
@@ -10,7 +10,8 @@ _cache: dict | None = None
|
||||
_cache_ts: float = 0.0
|
||||
_CACHE_TTL = 5.0
|
||||
_DEFAULTS = {
|
||||
"enabled": False,
|
||||
"enabled": True,
|
||||
"operator_disabled": False,
|
||||
"timemachine_enabled": False,
|
||||
}
|
||||
|
||||
@@ -35,8 +36,16 @@ def read_node_settings() -> dict:
|
||||
except Exception:
|
||||
result = {**_DEFAULTS, "updated_at": 0}
|
||||
else:
|
||||
operator_disabled = bool(data.get("operator_disabled", False))
|
||||
raw_enabled = data.get("enabled", _DEFAULTS["enabled"])
|
||||
# v0.9.7 initially wrote enabled:false as a default/offline state,
|
||||
# which accidentally blocked InfoNet participation. Treat legacy
|
||||
# false-without-marker as auto-enabled; only an explicit operator
|
||||
# disable should keep the participant sync loop off.
|
||||
enabled = False if operator_disabled else bool(raw_enabled or "operator_disabled" not in data)
|
||||
result = {
|
||||
"enabled": bool(data.get("enabled", _DEFAULTS["enabled"])),
|
||||
"enabled": enabled,
|
||||
"operator_disabled": operator_disabled,
|
||||
"timemachine_enabled": bool(data.get("timemachine_enabled", _DEFAULTS["timemachine_enabled"])),
|
||||
"updated_at": _safe_int(data.get("updated_at", 0) or 0),
|
||||
}
|
||||
@@ -48,8 +57,10 @@ def read_node_settings() -> dict:
|
||||
def write_node_settings(*, enabled: bool | None = None, timemachine_enabled: bool | None = None) -> dict:
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
existing = read_node_settings()
|
||||
next_enabled = bool(existing.get("enabled", _DEFAULTS["enabled"])) if enabled is None else bool(enabled)
|
||||
payload = {
|
||||
"enabled": bool(existing.get("enabled", _DEFAULTS["enabled"])) if enabled is None else bool(enabled),
|
||||
"enabled": next_enabled,
|
||||
"operator_disabled": bool(existing.get("operator_disabled", _DEFAULTS["operator_disabled"])) if enabled is None else not next_enabled,
|
||||
"timemachine_enabled": bool(existing.get("timemachine_enabled", _DEFAULTS["timemachine_enabled"])) if timemachine_enabled is None else bool(timemachine_enabled),
|
||||
"updated_at": int(time.time()),
|
||||
}
|
||||
|
||||
@@ -104,6 +104,8 @@ def _match_prediction_markets(title: str, markets: list[dict]) -> dict | None:
|
||||
"kalshi_pct": best_match.get("kalshi_pct"),
|
||||
"consensus_pct": best_match.get("consensus_pct"),
|
||||
"match_score": round(best_score, 2),
|
||||
"slug": best_match.get("slug", ""),
|
||||
"kalshi_ticker": best_match.get("kalshi_ticker", ""),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -6,8 +6,8 @@ Docs: https://pskreporter.info/pskdev.html
|
||||
"""
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
from cachetools import TTLCache, cached
|
||||
|
||||
|
||||
@@ -2,14 +2,34 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
from cachetools import cached, TTLCache
|
||||
import cloudscraper
|
||||
import reverse_geocoder as rg
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPENMHZ_AUDIO_HOSTS = {"media.openmhz.com", "media2.openmhz.com", "media3.openmhz.com"}
|
||||
|
||||
|
||||
# Round 7a / Issues #289, #290, #291 (tg12 audit):
|
||||
# We previously sent a spoofed Chrome User-Agent and (for OpenMHz) used
|
||||
# cloudscraper to bypass anti-bot challenges. Both are dishonest and ToS-
|
||||
# unfriendly. We now send the per-install Shadowbroker UA — the upstream
|
||||
# can identify us, rate-limit us per install, and contact us if needed.
|
||||
#
|
||||
# If the upstream actively blocks our honest UA, the feature degrades
|
||||
# gracefully (returns an empty list / cached results) rather than
|
||||
# escalating to deception.
|
||||
|
||||
|
||||
def _broadcastify_user_agent() -> str:
|
||||
return outbound_user_agent("broadcastify")
|
||||
|
||||
|
||||
def _openmhz_user_agent() -> str:
|
||||
return outbound_user_agent("openmhz")
|
||||
|
||||
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
||||
radio_cache = TTLCache(maxsize=1, ttl=300)
|
||||
|
||||
@@ -22,8 +42,12 @@ def get_top_broadcastify_feeds():
|
||||
"""
|
||||
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
# Issue #289 (tg12) + Round 7a: identify ourselves honestly as a
|
||||
# per-install Shadowbroker scraper. Broadcastify can rate-limit
|
||||
# us per install or block us; either way we stop pretending to be
|
||||
# a browser. If they block, the panel degrades gracefully.
|
||||
"User-Agent": _broadcastify_user_agent(),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
@@ -89,21 +113,32 @@ openmhz_systems_cache = TTLCache(maxsize=1, ttl=3600)
|
||||
|
||||
@cached(openmhz_systems_cache)
|
||||
def get_openmhz_systems():
|
||||
"""Fetches the full directory of OpenMHZ systems."""
|
||||
logger.info("Scraping OpenMHZ Systems (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
"""Fetches the full directory of OpenMHZ systems.
|
||||
|
||||
Issue #290 (tg12) + Round 7a: replaced cloudscraper-based Chrome
|
||||
impersonation with an honest per-install Shadowbroker User-Agent.
|
||||
If OpenMHz's Cloudflare layer blocks honest traffic, we accept
|
||||
that degradation (return empty list) rather than spoof a browser.
|
||||
"""
|
||||
logger.info("Fetching OpenMHZ Systems (Cache Miss)")
|
||||
try:
|
||||
res = scraper.get("https://api.openmhz.com/systems", timeout=15)
|
||||
res = requests.get(
|
||||
"https://api.openmhz.com/systems",
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
# Return list of systems
|
||||
return data.get("systems", []) if isinstance(data, dict) else []
|
||||
if res.status_code in (403, 503):
|
||||
logger.warning(
|
||||
"OpenMHZ returned %s for systems directory — Cloudflare may "
|
||||
"be blocking our honest UA. Feature degrades to empty result.",
|
||||
res.status_code,
|
||||
)
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
|
||||
logger.error(f"OpenMHZ Systems Fetch Exception: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -113,45 +148,85 @@ openmhz_calls_cache = TTLCache(maxsize=100, ttl=20)
|
||||
|
||||
@cached(openmhz_calls_cache)
|
||||
def get_recent_openmhz_calls(sys_name: str):
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata')."""
|
||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata').
|
||||
|
||||
Issue #290 (tg12) + Round 7a: same honest-UA model as
|
||||
``get_openmhz_systems``.
|
||||
"""
|
||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||
try:
|
||||
url = f"https://api.openmhz.com/{sys_name}/calls"
|
||||
res = scraper.get(url, timeout=15)
|
||||
res = requests.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("calls", []) if isinstance(data, dict) else []
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
|
||||
logger.error(f"OpenMHZ Calls Fetch Exception ({sys_name}): {e}")
|
||||
return []
|
||||
|
||||
|
||||
_OPENMHZ_MAX_REDIRECTS = 5
|
||||
|
||||
|
||||
def openmhz_audio_response(target_url: str):
|
||||
"""Fetch an OpenMHz audio object through the backend with browser-safe headers."""
|
||||
"""Fetch an OpenMHz audio object through the backend with browser-safe headers.
|
||||
|
||||
Redirects are followed manually so each hop's host can be re-validated
|
||||
against ``_OPENMHZ_AUDIO_HOSTS``. Without this, the upstream could
|
||||
302-redirect to an internal address (e.g. ``http://127.0.0.1:8000/...``
|
||||
or an RFC1918 range), and the backend would dutifully fetch and stream
|
||||
that response back to the browser — a classic open-redirect-to-SSRF
|
||||
chain. Same-host redirects (CDN edge selection) still work normally.
|
||||
"""
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from urllib.parse import urljoin
|
||||
|
||||
parsed = urlparse(str(target_url or ""))
|
||||
host = (parsed.hostname or "").lower()
|
||||
if parsed.scheme != "https" or host not in _OPENMHZ_AUDIO_HOSTS:
|
||||
raise HTTPException(status_code=400, detail="Unsupported OpenMHz audio URL")
|
||||
|
||||
current_url = target_url
|
||||
hops = 0
|
||||
try:
|
||||
upstream = requests.get(
|
||||
target_url,
|
||||
stream=True,
|
||||
timeout=(5, 20),
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
||||
"Referer": "https://openmhz.com/",
|
||||
},
|
||||
)
|
||||
while True:
|
||||
upstream = requests.get(
|
||||
current_url,
|
||||
stream=True,
|
||||
timeout=(5, 20),
|
||||
allow_redirects=False,
|
||||
headers={
|
||||
# Issue #291 (tg12) + Round 7a: drop spoofed Mozilla
|
||||
# UA and the fake first-party Referer. Identify as
|
||||
# the per-install Shadowbroker proxy honestly.
|
||||
"User-Agent": _openmhz_user_agent(),
|
||||
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
if upstream.is_redirect or upstream.status_code in (301, 302, 303, 307, 308):
|
||||
location = upstream.headers.get("Location", "")
|
||||
upstream.close()
|
||||
if hops >= _OPENMHZ_MAX_REDIRECTS or not location:
|
||||
raise HTTPException(status_code=502, detail="OpenMHz redirect rejected")
|
||||
next_url = urljoin(current_url, location)
|
||||
next_parsed = urlparse(next_url)
|
||||
next_host = (next_parsed.hostname or "").lower()
|
||||
# Re-validate the next hop against the same allowlist used for
|
||||
# the original URL. Cross-host redirects to disallowed hosts
|
||||
# are rejected silently; the browser audio element handles
|
||||
# the resulting 502 gracefully and moves on.
|
||||
if next_parsed.scheme != "https" or next_host not in _OPENMHZ_AUDIO_HOSTS:
|
||||
raise HTTPException(status_code=502, detail="OpenMHz redirect rejected")
|
||||
current_url = next_url
|
||||
hops += 1
|
||||
continue
|
||||
break
|
||||
except requests.RequestException as exc:
|
||||
raise HTTPException(status_code=502, detail="OpenMHz audio fetch failed") from exc
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,6 +15,31 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issues #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path.
|
||||
#
|
||||
# Round 7a: the original fix in PR #284 used a single project-wide
|
||||
# identifier, which from Wikimedia's perspective made every Shadowbroker
|
||||
# install in the world look like one giant scraper. If one install
|
||||
# misbehaved, their only recourse was to block "Shadowbroker" as a
|
||||
# whole. We now build the headers from ``outbound_user_agent('wikimedia')``
|
||||
# which embeds the per-install operator handle (auto-generated or
|
||||
# operator-chosen), so Wikimedia can rate-limit / contact the specific
|
||||
# install instead of the project.
|
||||
|
||||
|
||||
def _wikimedia_request_headers() -> dict[str, str]:
|
||||
ua = outbound_user_agent("wikimedia")
|
||||
return {
|
||||
"User-Agent": ua,
|
||||
# Browser-JS-style header that Wikimedia's policy explicitly
|
||||
# accepts on top of (or instead of) User-Agent. We send both so
|
||||
# whichever the upstream prefers, the per-operator handle is
|
||||
# always available.
|
||||
"Api-User-Agent": ua,
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||
@@ -45,9 +70,7 @@ def _reverse_geocode(lat: float, lng: float) -> dict:
|
||||
f"https://nominatim.openstreetmap.org/reverse?"
|
||||
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
|
||||
)
|
||||
headers = {
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard; contact@shadowbroker.app)"
|
||||
}
|
||||
headers = {"User-Agent": outbound_user_agent("nominatim")}
|
||||
|
||||
for attempt in range(2):
|
||||
# Enforce Nominatim's 1 req/sec policy
|
||||
@@ -121,7 +144,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
"""
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=6)
|
||||
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||
# sends the project default, and we also add the Wikimedia-
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -147,7 +176,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
slug = quote(name.replace(" ", "_"))
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=5)
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
|
||||
@@ -34,6 +34,11 @@ from services.sar.sar_config import (
|
||||
copernicus_token,
|
||||
earthdata_token,
|
||||
)
|
||||
|
||||
|
||||
def _sar_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("sar-products")
|
||||
from services.sar.sar_normalize import (
|
||||
SarAnomaly,
|
||||
evidence_hash_for_payload,
|
||||
@@ -442,7 +447,7 @@ def _fetch_unosat_packages() -> list[dict[str, Any]]:
|
||||
# HDX CKAN returns 406 without explicit Accept + a browser-ish UA.
|
||||
hdx_headers = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker-SAR/1.0)",
|
||||
"User-Agent": _sar_user_agent(),
|
||||
}
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=20, headers=hdx_headers)
|
||||
|
||||
@@ -14,6 +14,11 @@ class HealthResponse(BaseModel):
|
||||
# ({status, age_s, row_count, slo, stale, empty, description}).
|
||||
slo: Optional[Dict[str, Any]] = None
|
||||
slo_summary: Optional[Dict[str, int]] = None
|
||||
# Issue #258: AIS proxy status — currently exposes ``degraded_tls``
|
||||
# (bool), true when ais_proxy.js fell back to the SPKI-pinned
|
||||
# insecure-date path because the upstream Let's Encrypt cert is
|
||||
# expired. Empty dict / null means no status reported yet.
|
||||
ais_proxy: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class RefreshResponse(BaseModel):
|
||||
|
||||
@@ -11,12 +11,21 @@ import requests
|
||||
from datetime import datetime, timedelta
|
||||
from cachetools import TTLCache
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
|
||||
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
|
||||
|
||||
|
||||
def _planetary_user_agent() -> str:
|
||||
# Round 7a: per-install handle so Microsoft Planetary Computer can
|
||||
# attribute requests to the specific operator rather than treating
|
||||
# the whole Shadowbroker user base as one entity.
|
||||
return outbound_user_agent("sentinel2-planetary-computer")
|
||||
|
||||
|
||||
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
lat_span = 0.18
|
||||
lng_span = 0.24
|
||||
@@ -64,7 +73,7 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=8,
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard)"},
|
||||
headers={"User-Agent": _planetary_user_agent()},
|
||||
)
|
||||
search_res.raise_for_status()
|
||||
data = search_res.json()
|
||||
|
||||
@@ -20,7 +20,11 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SHODAN_BASE = "https://api.shodan.io"
|
||||
_USER_AGENT = "ShadowBroker/0.9.7 local Shodan connector"
|
||||
# Round 7a: per-install attribution. Shodan already has the operator API
|
||||
# key for billing, but the UA still identifies the install.
|
||||
def _shodan_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("shodan")
|
||||
_REQUEST_TIMEOUT = 15
|
||||
_MIN_INTERVAL_SECONDS = 1.05 # Shodan docs say API plans are rate limited to ~1 req/sec.
|
||||
_DEFAULT_SEARCH_PAGES = 1
|
||||
@@ -179,7 +183,7 @@ def _request(path: str, *, params: dict[str, Any], cache: TTLCache[str, dict[str
|
||||
f"{_SHODAN_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
headers={"User-Agent": _shodan_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
@@ -22,6 +22,12 @@ from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.config import get_settings
|
||||
from services.meshtastic_mqtt_settings import (
|
||||
mqtt_bridge_enabled,
|
||||
mqtt_connection_config,
|
||||
mqtt_psk_hex,
|
||||
mqtt_subscription_settings,
|
||||
)
|
||||
from services.mesh.meshtastic_topics import all_available_roots, build_subscription_topics, known_roots, parse_topic_metadata
|
||||
|
||||
logger = logging.getLogger("services.sigint")
|
||||
@@ -477,22 +483,13 @@ class MeshtasticBridge:
|
||||
@staticmethod
|
||||
def _mqtt_config() -> tuple[str, int, str, str]:
|
||||
"""Return (broker, port, user, password) from settings."""
|
||||
try:
|
||||
s = get_settings()
|
||||
return (
|
||||
str(s.MESH_MQTT_BROKER or "mqtt.meshtastic.org"),
|
||||
int(s.MESH_MQTT_PORT or 1883),
|
||||
str(s.MESH_MQTT_USER or "meshdev"),
|
||||
str(s.MESH_MQTT_PASS or "large4cats"),
|
||||
)
|
||||
except Exception:
|
||||
return ("mqtt.meshtastic.org", 1883, "meshdev", "large4cats")
|
||||
return mqtt_connection_config()
|
||||
|
||||
@classmethod
|
||||
def _resolve_psk(cls) -> bytes:
|
||||
"""Return the PSK from config, or the default LongFast key if empty."""
|
||||
try:
|
||||
raw = str(getattr(get_settings(), "MESH_MQTT_PSK", "") or "").strip()
|
||||
raw = mqtt_psk_hex()
|
||||
except Exception:
|
||||
raw = ""
|
||||
if not raw:
|
||||
@@ -506,6 +503,11 @@ class MeshtasticBridge:
|
||||
self._thread: threading.Thread | None = None
|
||||
self._stop = threading.Event()
|
||||
self._client_id = self._build_client_id()
|
||||
self._connected = False
|
||||
self._last_error = ""
|
||||
self._last_connected_at = 0.0
|
||||
self._last_disconnected_at = 0.0
|
||||
self._last_broker = ""
|
||||
# Rate-limiter: sliding window of receive timestamps
|
||||
self._rx_timestamps: deque[float] = deque()
|
||||
self._rx_dropped = 0
|
||||
@@ -518,10 +520,11 @@ class MeshtasticBridge:
|
||||
second client connects with the same id. Using a fixed id made separate
|
||||
ShadowBroker instances kick each other off the broker.
|
||||
|
||||
Includes the app version so the Meshtastic team can track our footprint.
|
||||
This is deliberately not tied to the user's public mesh address or
|
||||
ShadowBroker node identity; it is only an MQTT session handle.
|
||||
"""
|
||||
suffix = uuid.uuid4().hex[:8]
|
||||
return f"sb096-{suffix}"
|
||||
return f"meshchat-{suffix}"
|
||||
|
||||
def _dedupe_message(
|
||||
self,
|
||||
@@ -542,9 +545,206 @@ class MeshtasticBridge:
|
||||
self._message_dedupe[key] = now
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _message_dedupe_key(message: dict) -> str:
|
||||
sender = str(message.get("from") or "???").strip().lower()
|
||||
recipient = str(message.get("to") or "broadcast").strip().lower()
|
||||
text = str(message.get("text") or "").strip()
|
||||
channel = str(message.get("channel") or "LongFast").strip().lower()
|
||||
root = str(message.get("root") or message.get("region") or "").strip().lower()
|
||||
if root == "us":
|
||||
root = "us"
|
||||
return f"{sender}:{recipient}:{root}:{channel}:{text}"
|
||||
|
||||
def append_text_message(self, message: dict, *, dedupe_window_s: float = 5.0) -> bool:
|
||||
"""Append a Meshtastic text message unless it is a near-immediate echo."""
|
||||
if not str(message.get("text") or "").strip():
|
||||
return False
|
||||
now = time.time()
|
||||
cutoff = now - max(1.0, dedupe_window_s)
|
||||
next_message = dict(message)
|
||||
next_message.setdefault("to", "broadcast")
|
||||
next_message.setdefault("channel", "LongFast")
|
||||
next_message.setdefault("timestamp", datetime.utcnow().isoformat() + "Z")
|
||||
key = self._message_dedupe_key(next_message)
|
||||
for existing in list(self.messages)[:40]:
|
||||
if self._message_dedupe_key(existing) != key:
|
||||
continue
|
||||
try:
|
||||
existing_ts_raw = existing.get("timestamp")
|
||||
existing_ts = (
|
||||
datetime.fromisoformat(str(existing_ts_raw).replace("Z", "+00:00")).timestamp()
|
||||
if existing_ts_raw
|
||||
else now
|
||||
)
|
||||
except Exception:
|
||||
existing_ts = now
|
||||
if existing_ts >= cutoff:
|
||||
if not existing.get("root") and next_message.get("root"):
|
||||
existing["root"] = next_message.get("root")
|
||||
if not existing.get("region") and next_message.get("region"):
|
||||
existing["region"] = next_message.get("region")
|
||||
return False
|
||||
self.messages.appendleft(next_message)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _coerce_node_ref(value) -> str:
|
||||
"""Normalize Meshtastic node identifiers into the public !xxxxxxxx form."""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, int):
|
||||
return f"!{value & 0xFFFFFFFF:08x}"
|
||||
raw = str(value).strip()
|
||||
if not raw:
|
||||
return ""
|
||||
if raw.startswith("!"):
|
||||
return raw
|
||||
lowered = raw.lower()
|
||||
if lowered.startswith("0x"):
|
||||
try:
|
||||
return f"!{int(lowered, 16) & 0xFFFFFFFF:08x}"
|
||||
except ValueError:
|
||||
return raw
|
||||
if raw.isdigit():
|
||||
try:
|
||||
return f"!{int(raw) & 0xFFFFFFFF:08x}"
|
||||
except ValueError:
|
||||
return raw
|
||||
if len(raw) == 8 and all(ch in "0123456789abcdefABCDEF" for ch in raw):
|
||||
return f"!{raw.lower()}"
|
||||
return raw
|
||||
|
||||
@staticmethod
|
||||
def _first_text_value(*values) -> str:
|
||||
for value in values:
|
||||
if isinstance(value, bytes):
|
||||
value = value.decode("utf-8", errors="replace")
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if text:
|
||||
return MeshtasticBridge._repair_text_mojibake(text)
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _repair_text_mojibake(text: str) -> str:
|
||||
"""Repair common UTF-8-as-Latin-1 mojibake from MQTT JSON bridges."""
|
||||
if not text or not any(marker in text for marker in ("Ã", "Ð", "Ñ")):
|
||||
return text
|
||||
try:
|
||||
repaired = text.encode("latin-1").decode("utf-8").strip()
|
||||
except UnicodeError:
|
||||
return text
|
||||
if repaired and repaired != text:
|
||||
return repaired
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _first_present(*values):
|
||||
for value in values:
|
||||
if value is not None and value != "":
|
||||
return value
|
||||
return None
|
||||
|
||||
def _extract_json_text_message(self, data: dict, topic: str) -> dict | None:
|
||||
"""Extract a public Meshtastic text event from decoded MQTT JSON.
|
||||
|
||||
Meshtastic JSON brokers are not perfectly uniform. Some packets expose
|
||||
text at the top level, some under ``decoded`` or ``payload``. Keep this
|
||||
permissive for receive, but only return messages with non-empty text.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
topic_meta = parse_topic_metadata(topic)
|
||||
packet = data.get("packet") if isinstance(data.get("packet"), dict) else {}
|
||||
decoded = data.get("decoded") if isinstance(data.get("decoded"), dict) else {}
|
||||
payload_obj = data.get("payload")
|
||||
payload = payload_obj if isinstance(payload_obj, dict) else {}
|
||||
decoded_payload_obj = decoded.get("payload") if decoded else None
|
||||
decoded_payload = decoded_payload_obj if isinstance(decoded_payload_obj, dict) else {}
|
||||
|
||||
text = self._first_text_value(
|
||||
data.get("text"),
|
||||
data.get("message"),
|
||||
data.get("msg"),
|
||||
payload_obj if isinstance(payload_obj, str) else "",
|
||||
payload.get("text"),
|
||||
payload.get("message"),
|
||||
payload.get("msg"),
|
||||
payload.get("payload") if isinstance(payload.get("payload"), str) else "",
|
||||
decoded.get("text"),
|
||||
decoded.get("message"),
|
||||
decoded.get("payload") if isinstance(decoded.get("payload"), str) else "",
|
||||
decoded_payload.get("text"),
|
||||
decoded_payload.get("message"),
|
||||
decoded_payload.get("msg"),
|
||||
)
|
||||
if not text:
|
||||
return None
|
||||
|
||||
sender = self._coerce_node_ref(
|
||||
self._first_present(
|
||||
data.get("from"),
|
||||
data.get("fromId"),
|
||||
data.get("from_id"),
|
||||
data.get("sender"),
|
||||
data.get("senderId"),
|
||||
data.get("sender_id"),
|
||||
packet.get("from"),
|
||||
packet.get("fromId"),
|
||||
packet.get("from_id"),
|
||||
decoded.get("from"),
|
||||
)
|
||||
)
|
||||
recipient = self._coerce_node_ref(
|
||||
self._first_present(
|
||||
data.get("to"),
|
||||
data.get("toId"),
|
||||
data.get("to_id"),
|
||||
data.get("recipient"),
|
||||
data.get("recipientId"),
|
||||
data.get("recipient_id"),
|
||||
packet.get("to"),
|
||||
packet.get("toId"),
|
||||
packet.get("to_id"),
|
||||
decoded.get("to"),
|
||||
)
|
||||
)
|
||||
if not recipient or recipient in {"!ffffffff", "broadcast"}:
|
||||
recipient = "broadcast"
|
||||
|
||||
timestamp = datetime.utcnow().isoformat() + "Z"
|
||||
rx_time = self._first_present(
|
||||
data.get("rxTime"),
|
||||
data.get("rx_time"),
|
||||
data.get("timestamp"),
|
||||
packet.get("rxTime"),
|
||||
packet.get("timestamp"),
|
||||
)
|
||||
if isinstance(rx_time, (int, float)) and rx_time > 0:
|
||||
try:
|
||||
timestamp = datetime.fromtimestamp(float(rx_time), tz=timezone.utc).isoformat()
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
return {
|
||||
"from": sender or topic.split("/")[-1],
|
||||
"to": recipient,
|
||||
"text": text[:500],
|
||||
"region": topic_meta["region"],
|
||||
"root": topic_meta["root"],
|
||||
"channel": topic_meta["channel"],
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
|
||||
def start(self):
|
||||
if self._thread and self._thread.is_alive():
|
||||
return
|
||||
if not self._stop.is_set():
|
||||
return
|
||||
self._thread.join(timeout=2.0)
|
||||
if self._thread.is_alive():
|
||||
logger.warning("Meshtastic MQTT bridge is still stopping; start deferred")
|
||||
return
|
||||
self._stop.clear()
|
||||
self._thread = threading.Thread(target=self._run, daemon=True, name="mesh-bridge")
|
||||
self._thread.start()
|
||||
@@ -552,13 +752,37 @@ class MeshtasticBridge:
|
||||
|
||||
def stop(self):
|
||||
self._stop.set()
|
||||
self._connected = False
|
||||
|
||||
def is_running(self) -> bool:
|
||||
return bool(self._thread and self._thread.is_alive() and not self._stop.is_set())
|
||||
|
||||
def status(self) -> dict:
|
||||
broker, port, user, _pw = self._mqtt_config()
|
||||
display_user = "" if user == "meshdev" else user
|
||||
return {
|
||||
"enabled": mqtt_bridge_enabled(),
|
||||
"running": self.is_running(),
|
||||
"connected": bool(self._connected),
|
||||
"broker": broker,
|
||||
"port": port,
|
||||
"username": display_user,
|
||||
"client_id": self._client_id,
|
||||
"message_log_size": len(self.messages),
|
||||
"signal_log_size": len(self.signals),
|
||||
"last_error": self._last_error,
|
||||
"last_broker": self._last_broker,
|
||||
"last_connected_at": self._last_connected_at,
|
||||
"last_disconnected_at": self._last_disconnected_at,
|
||||
"rx_dropped": self._rx_dropped,
|
||||
}
|
||||
|
||||
def _subscription_topics(self) -> list[str]:
|
||||
settings = get_settings()
|
||||
extra_roots, extra_topics, include_defaults = mqtt_subscription_settings()
|
||||
return build_subscription_topics(
|
||||
extra_roots=str(getattr(settings, "MESH_MQTT_EXTRA_ROOTS", "") or ""),
|
||||
extra_topics=str(getattr(settings, "MESH_MQTT_EXTRA_TOPICS", "") or ""),
|
||||
include_defaults=bool(getattr(settings, "MESH_MQTT_INCLUDE_DEFAULT_ROOTS", True)),
|
||||
extra_roots=extra_roots,
|
||||
extra_topics=extra_topics,
|
||||
include_defaults=include_defaults,
|
||||
)
|
||||
|
||||
def _run(self):
|
||||
@@ -582,6 +806,9 @@ class MeshtasticBridge:
|
||||
|
||||
def _on_connect(client, userdata, flags, rc):
|
||||
if rc == 0:
|
||||
self._connected = True
|
||||
self._last_error = ""
|
||||
self._last_connected_at = time.time()
|
||||
logger.info(
|
||||
"Meshtastic MQTT connected (%s), subscribing to %s",
|
||||
self._client_id,
|
||||
@@ -590,6 +817,8 @@ class MeshtasticBridge:
|
||||
for topic in topics:
|
||||
client.subscribe(topic, qos=0)
|
||||
else:
|
||||
self._connected = False
|
||||
self._last_error = f"connect_refused:{rc}"
|
||||
logger.error(
|
||||
"Meshtastic MQTT connection refused (%s): rc=%s",
|
||||
self._client_id,
|
||||
@@ -597,7 +826,10 @@ class MeshtasticBridge:
|
||||
)
|
||||
|
||||
def _on_disconnect(client, userdata, rc):
|
||||
self._connected = False
|
||||
self._last_disconnected_at = time.time()
|
||||
if rc != 0:
|
||||
self._last_error = f"disconnect:{rc}"
|
||||
logger.warning(
|
||||
"Meshtastic MQTT disconnected unexpectedly (%s, rc=%s), will auto-reconnect",
|
||||
self._client_id,
|
||||
@@ -607,6 +839,7 @@ class MeshtasticBridge:
|
||||
logger.info("Meshtastic MQTT disconnected cleanly (%s)", self._client_id)
|
||||
|
||||
broker, port, user, pw = self._mqtt_config()
|
||||
self._last_broker = f"{broker}:{port}"
|
||||
client = mqtt.Client(client_id=self._client_id, protocol=mqtt.MQTTv311)
|
||||
client.username_pw_set(user, pw)
|
||||
client.on_connect = _on_connect
|
||||
@@ -645,9 +878,6 @@ class MeshtasticBridge:
|
||||
def _on_message(self, client, userdata, msg):
|
||||
"""Parse Meshtastic MQTT messages — protobuf + AES decryption."""
|
||||
try:
|
||||
if self._rate_limited():
|
||||
return
|
||||
|
||||
payload = msg.payload
|
||||
topic = msg.topic
|
||||
|
||||
@@ -655,6 +885,11 @@ class MeshtasticBridge:
|
||||
if "/json/" in topic:
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
text_message = self._extract_json_text_message(data, topic)
|
||||
if text_message:
|
||||
self.append_text_message(text_message, dedupe_window_s=30.0)
|
||||
if self._rate_limited():
|
||||
return
|
||||
self._ingest_data(data, topic)
|
||||
return
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
@@ -675,7 +910,7 @@ class MeshtasticBridge:
|
||||
topic_meta["root"],
|
||||
):
|
||||
return
|
||||
self.messages.appendleft(
|
||||
self.append_text_message(
|
||||
{
|
||||
"from": data.get("from", "???"),
|
||||
"to": recipient,
|
||||
@@ -687,6 +922,8 @@ class MeshtasticBridge:
|
||||
}
|
||||
)
|
||||
else:
|
||||
if self._rate_limited():
|
||||
return
|
||||
self._ingest_data(data, topic)
|
||||
|
||||
except Exception as e:
|
||||
@@ -1011,7 +1248,7 @@ class SIGINTGrid:
|
||||
self._started = True
|
||||
self.aprs.start()
|
||||
try:
|
||||
mqtt_enabled = bool(getattr(get_settings(), "MESH_MQTT_ENABLED", False))
|
||||
mqtt_enabled = mqtt_bridge_enabled()
|
||||
except Exception:
|
||||
mqtt_enabled = False
|
||||
if mqtt_enabled:
|
||||
@@ -1123,13 +1360,12 @@ class SIGINTGrid:
|
||||
ch = msg.get("channel", "LongFast")
|
||||
channel_msgs[ch] = channel_msgs.get(ch, 0) + 1
|
||||
|
||||
extra_roots, _extra_topics, include_defaults = mqtt_subscription_settings()
|
||||
|
||||
return {
|
||||
"regions": regions,
|
||||
"roots": roots,
|
||||
"known_roots": known_roots(
|
||||
str(getattr(get_settings(), "MESH_MQTT_EXTRA_ROOTS", "") or ""),
|
||||
include_defaults=bool(getattr(get_settings(), "MESH_MQTT_INCLUDE_DEFAULT_ROOTS", True)),
|
||||
),
|
||||
"known_roots": known_roots(extra_roots, include_defaults=include_defaults),
|
||||
"all_roots": all_available_roots(),
|
||||
"channel_messages": channel_msgs,
|
||||
"total_nodes": len(seen_callsigns),
|
||||
|
||||
@@ -19,6 +19,13 @@ from pathlib import Path
|
||||
import requests
|
||||
from sgp4.api import Satrec, WGS72, jday
|
||||
|
||||
|
||||
|
||||
def _tinygs_user_agent(purpose: str) -> str:
|
||||
"""Round 7a: per-install handle for CelesTrak / TinyGS attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent(f"tinygs-{purpose}")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -113,7 +120,7 @@ def _fetch_celestrak_tles() -> list[dict]:
|
||||
params={"GROUP": group, "FORMAT": "json"},
|
||||
timeout=20,
|
||||
headers={
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (CelesTrak fair-use)",
|
||||
"User-Agent": _tinygs_user_agent("celestrak"),
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
@@ -259,7 +266,7 @@ def _fetch_tinygs_telemetry() -> None:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
"User-Agent": _tinygs_user_agent("tinygs"),
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
"""Tor Hidden Service auto-provisioner.
|
||||
"""Tor hidden-service auto-provisioner.
|
||||
|
||||
Manages a Tor hidden service that points to the local ShadowBroker backend.
|
||||
Tor is started as a subprocess with a generated torrc — no manual config needed.
|
||||
Auto-installs the Tor Expert Bundle on Windows if not present.
|
||||
|
||||
Usage:
|
||||
from services.tor_hidden_service import tor_service
|
||||
status = tor_service.start() # -> {"ok": True, "onion_address": "http://xxxx.onion:8000"}
|
||||
tor_service.stop()
|
||||
Tor is started as a subprocess with a generated torrc. Windows source installs
|
||||
can download the Tor Expert Bundle into backend/data without admin rights.
|
||||
Docker images should already include the `tor` package.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -31,31 +27,33 @@ HOSTNAME_PATH = TOR_DIR / "hidden_service" / "hostname"
|
||||
TOR_DATA_DIR = TOR_DIR / "data"
|
||||
PIDFILE_PATH = TOR_DIR / "tor.pid"
|
||||
|
||||
# Bundled Tor install location (inside our data dir so no admin rights needed)
|
||||
# Bundled Tor install location (inside data dir so no admin rights are needed).
|
||||
TOR_INSTALL_DIR = TOR_DIR / "tor_bin"
|
||||
|
||||
# How long to wait for Tor to generate the hostname file
|
||||
_STARTUP_TIMEOUT_S = 90
|
||||
_POLL_INTERVAL_S = 1.0
|
||||
|
||||
# Tor Expert Bundle download URL (Windows x86_64)
|
||||
_TOR_EXPERT_BUNDLE_URL = "https://dist.torproject.org/torbrowser/15.0.8/tor-expert-bundle-windows-x86_64-15.0.8.tar.gz"
|
||||
# Windows x86_64 Tor Expert Bundle URLs. Keep a fallback so first-run
|
||||
# onboarding does not break when Tor rotates point releases.
|
||||
_TOR_EXPERT_BUNDLE_URLS = [
|
||||
"https://dist.torproject.org/torbrowser/15.0.11/tor-expert-bundle-windows-x86_64-15.0.11.tar.gz",
|
||||
"https://dist.torproject.org/torbrowser/15.0.8/tor-expert-bundle-windows-x86_64-15.0.8.tar.gz",
|
||||
]
|
||||
|
||||
|
||||
def _find_tor_binary() -> str | None:
|
||||
"""Locate the tor binary on the system, including our bundled install."""
|
||||
# Check our bundled install first
|
||||
bundled = TOR_INSTALL_DIR / "tor" / "tor.exe"
|
||||
if bundled.exists():
|
||||
return str(bundled)
|
||||
# Also check for extracted layout variants
|
||||
|
||||
for sub in TOR_INSTALL_DIR.rglob("tor.exe"):
|
||||
return str(sub)
|
||||
|
||||
tor = shutil.which("tor")
|
||||
if tor:
|
||||
return tor
|
||||
# Common locations on Windows
|
||||
|
||||
for candidate in [
|
||||
r"C:\Program Files\Tor Browser\Browser\TorBrowser\Tor\tor.exe",
|
||||
r"C:\Program Files (x86)\Tor Browser\Browser\TorBrowser\Tor\tor.exe",
|
||||
@@ -66,79 +64,251 @@ def _find_tor_binary() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _auto_install_tor() -> str | None:
|
||||
"""Download and extract the Tor Expert Bundle. Returns path to tor binary or None."""
|
||||
if os.name != "nt":
|
||||
# On Linux/Mac, try package manager
|
||||
try:
|
||||
if shutil.which("apt-get"):
|
||||
subprocess.run(["sudo", "apt-get", "install", "-y", "tor"], check=True, capture_output=True, timeout=120)
|
||||
elif shutil.which("brew"):
|
||||
subprocess.run(["brew", "install", "tor"], check=True, capture_output=True, timeout=120)
|
||||
elif shutil.which("pacman"):
|
||||
subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "tor"], check=True, capture_output=True, timeout=120)
|
||||
else:
|
||||
logger.warning("No supported package manager found for auto-install")
|
||||
return None
|
||||
return shutil.which("tor")
|
||||
except Exception as exc:
|
||||
logger.error("Failed to auto-install Tor via package manager: %s", exc)
|
||||
return None
|
||||
# Baked-in expected digest list. Loaded lazily; populated by maintainers
|
||||
# when a new Tor Expert Bundle URL is added to _TOR_EXPERT_BUNDLE_URLS.
|
||||
# See issue #201 for rationale.
|
||||
_TOR_DIGEST_FILE = Path(__file__).resolve().parent.parent / "data" / "tor_bundle_digests.json"
|
||||
_DIGEST_PLACEHOLDER = "PLACEHOLDER_REPLACE_BEFORE_RELEASE"
|
||||
|
||||
# Windows: download Tor Expert Bundle (no admin needed)
|
||||
TOR_INSTALL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_path = TOR_INSTALL_DIR / "tor-expert-bundle.tar.gz"
|
||||
|
||||
def _load_baked_in_digests() -> dict[str, str]:
|
||||
"""Return {url: expected_sha256_lower} for URLs we ship a known digest for.
|
||||
|
||||
Entries whose value is the placeholder sentinel are filtered out — they
|
||||
represent versions the maintainer has not yet pinned, and we don't
|
||||
want to trust them via this layer.
|
||||
"""
|
||||
if not _TOR_DIGEST_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
import json as _json
|
||||
raw = _json.loads(_TOR_DIGEST_FILE.read_text(encoding="utf-8"))
|
||||
except Exception as exc:
|
||||
logger.warning("Tor bundle digests file unreadable: %s", exc)
|
||||
return {}
|
||||
result: dict[str, str] = {}
|
||||
for k, v in raw.items():
|
||||
if not isinstance(k, str) or k.startswith("_"):
|
||||
continue
|
||||
if not isinstance(v, str) or v == _DIGEST_PLACEHOLDER:
|
||||
continue
|
||||
result[k] = v.strip().lower()
|
||||
return result
|
||||
|
||||
|
||||
def _verify_tor_bundle(archive_path: Path, bundle_url: str) -> tuple[bool, str]:
|
||||
"""Verify the downloaded Tor bundle against any source we trust.
|
||||
|
||||
Returns (verified, reason). The bundle is considered verified if EITHER:
|
||||
|
||||
* The upstream ``.sha256sum`` file is reachable AND its digest matches
|
||||
what we just downloaded, OR
|
||||
* Our baked-in digest list (``backend/data/tor_bundle_digests.json``)
|
||||
contains this URL AND that digest matches.
|
||||
|
||||
If both sources are unavailable (e.g. fresh checkout before the
|
||||
maintainer has populated the digest file AND the upstream
|
||||
``.sha256sum`` is unreachable), we **fall back to HTTPS-only trust**
|
||||
with a warning so first-run onboarding does not break. As soon as the
|
||||
digest file is populated for a shipped Tor version, the secure path
|
||||
activates automatically — no operator action required.
|
||||
|
||||
Issue #201.
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
actual_hash = hashlib.sha256(archive_path.read_bytes()).hexdigest().lower()
|
||||
|
||||
# Source 1: upstream .sha256sum
|
||||
upstream_hash: str | None = None
|
||||
sha256_url = bundle_url + ".sha256sum"
|
||||
sha256_file = TOR_INSTALL_DIR / "sha256sum.txt"
|
||||
try:
|
||||
urlretrieve(sha256_url, str(sha256_file))
|
||||
upstream_hash = sha256_file.read_text().strip().split()[0].lower()
|
||||
sha256_file.unlink(missing_ok=True)
|
||||
except Exception as hash_err:
|
||||
logger.info("Tor bundle upstream .sha256sum unreachable: %s", hash_err)
|
||||
sha256_file.unlink(missing_ok=True)
|
||||
|
||||
if upstream_hash and upstream_hash == actual_hash:
|
||||
return True, f"verified via upstream .sha256sum ({actual_hash[:16]}...)"
|
||||
|
||||
# Source 2: baked-in digest list
|
||||
baked = _load_baked_in_digests()
|
||||
baked_hash = baked.get(bundle_url)
|
||||
if baked_hash and baked_hash == actual_hash:
|
||||
return True, f"verified via baked-in digest list ({actual_hash[:16]}...)"
|
||||
|
||||
# If we got an upstream digest AND a baked-in digest AND neither
|
||||
# matched, the bundle is genuinely suspect — refuse it.
|
||||
if upstream_hash and baked_hash:
|
||||
return False, (
|
||||
f"SHA-256 mismatch: archive={actual_hash[:16]}..., "
|
||||
f"upstream={upstream_hash[:16]}..., baked={baked_hash[:16]}..."
|
||||
)
|
||||
if upstream_hash and upstream_hash != actual_hash:
|
||||
return False, (
|
||||
f"SHA-256 mismatch vs upstream: archive={actual_hash[:16]}..., "
|
||||
f"upstream={upstream_hash[:16]}..."
|
||||
)
|
||||
if baked_hash and baked_hash != actual_hash:
|
||||
return False, (
|
||||
f"SHA-256 mismatch vs baked-in digest: archive={actual_hash[:16]}..., "
|
||||
f"expected={baked_hash[:16]}..."
|
||||
)
|
||||
|
||||
# Neither verification source available. This is the fallback path for
|
||||
# the case where the upstream .sha256sum is temporarily unreachable
|
||||
# AND the maintainer hasn't yet pinned this Tor version. Trust HTTPS
|
||||
# only (current behavior pre-#201) with a clear warning. Onboarding
|
||||
# works; once we populate the digest file, the secure path activates.
|
||||
logger.warning(
|
||||
"Tor bundle integrity check fell back to HTTPS-only trust "
|
||||
"(upstream .sha256sum unreachable AND no baked-in digest for %s). "
|
||||
"Add this URL's SHA-256 to backend/data/tor_bundle_digests.json "
|
||||
"to enable the secure path.",
|
||||
bundle_url,
|
||||
)
|
||||
return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)"
|
||||
|
||||
|
||||
def _extract_tor_bundle_safely(archive_path: Path, install_dir: Path) -> bool:
|
||||
"""Extract a Tor Expert Bundle tar.gz safely.
|
||||
|
||||
Issue #251: the previous extractor checked tarinfo.name against path
|
||||
traversal but never inspected tarinfo.linkname for symlink/hardlink
|
||||
members. Python 3.11's tarfile honors symlinks during extractall(),
|
||||
so a malicious archive could ship a member like::
|
||||
|
||||
name = "innocent.txt" # passes the path check
|
||||
type = SYMTYPE
|
||||
linkname = "C:\\Windows\\System32\\config\\system"
|
||||
|
||||
and extractall() would then create that symlink. Subsequent reads
|
||||
of innocent.txt deference to a sensitive system file; subsequent
|
||||
writes corrupt one. Tor bundles never legitimately contain symlinks
|
||||
or hardlinks, so we refuse all link members categorically rather
|
||||
than trying to validate linkname targets (which has its own pitfalls
|
||||
around relative path resolution).
|
||||
|
||||
Also refuses non-regular-non-directory members (devices, FIFOs,
|
||||
character/block special files) for completeness — none of those
|
||||
belong in a Tor Expert Bundle and accepting them is a category of
|
||||
bug we don't need to debug later.
|
||||
|
||||
Returns True on success, False on rejection (and logs the reason).
|
||||
The caller is responsible for cleaning up the archive file.
|
||||
"""
|
||||
import tarfile
|
||||
|
||||
install_resolved = install_dir.resolve()
|
||||
|
||||
try:
|
||||
logger.info("Downloading Tor Expert Bundle over HTTPS from dist.torproject.org...")
|
||||
urlretrieve(_TOR_EXPERT_BUNDLE_URL, str(archive_path))
|
||||
|
||||
# Verify SHA-256 of the downloaded archive
|
||||
sha256_url = _TOR_EXPERT_BUNDLE_URL + ".sha256sum"
|
||||
sha256_file = TOR_INSTALL_DIR / "sha256sum.txt"
|
||||
try:
|
||||
urlretrieve(sha256_url, str(sha256_file))
|
||||
expected_hash = sha256_file.read_text().strip().split()[0].lower()
|
||||
import hashlib
|
||||
actual_hash = hashlib.sha256(archive_path.read_bytes()).hexdigest().lower()
|
||||
sha256_file.unlink(missing_ok=True)
|
||||
if actual_hash != expected_hash:
|
||||
logger.error("SHA-256 MISMATCH — download may be compromised! Expected %s, got %s", expected_hash, actual_hash)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
logger.info("SHA-256 verified: %s", actual_hash[:16] + "...")
|
||||
except Exception as hash_err:
|
||||
# If we can't fetch the hash file, warn but proceed (HTTPS provides baseline integrity)
|
||||
logger.warning("Could not verify SHA-256 (hash file unavailable): %s — proceeding with HTTPS-only verification", hash_err)
|
||||
|
||||
logger.info("Download complete, extracting...")
|
||||
|
||||
# Extract .tar.gz with path traversal protection
|
||||
import tarfile
|
||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||
for member in tar.getmembers():
|
||||
member_path = (TOR_INSTALL_DIR / member.name).resolve()
|
||||
if not str(member_path).startswith(str(TOR_INSTALL_DIR.resolve())):
|
||||
logger.error("Tar path traversal blocked: %s", member.name)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
tar.extractall(path=str(TOR_INSTALL_DIR))
|
||||
# Reject anything that isn't a regular file or directory.
|
||||
# Symlinks (SYMTYPE) and hardlinks (LNKTYPE) are the
|
||||
# path-traversal vectors; the others (CHRTYPE, BLKTYPE,
|
||||
# FIFOTYPE, CONTTYPE) have no legitimate use in a Tor
|
||||
# Expert Bundle.
|
||||
if member.issym() or member.islnk():
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: link member %s -> %s "
|
||||
"(symlinks/hardlinks are not allowed in Tor bundles; "
|
||||
"this archive is malformed or hostile)",
|
||||
member.name,
|
||||
member.linkname,
|
||||
)
|
||||
return False
|
||||
if not (member.isfile() or member.isdir()):
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: unexpected member type "
|
||||
"for %s (only regular files and directories are allowed)",
|
||||
member.name,
|
||||
)
|
||||
return False
|
||||
|
||||
# Clean up archive
|
||||
archive_path.unlink(missing_ok=True)
|
||||
# Path traversal check (preserves the original guard).
|
||||
try:
|
||||
member_path = (install_dir / member.name).resolve()
|
||||
except OSError as exc:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: cannot resolve member "
|
||||
"path %s: %s",
|
||||
member.name,
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
try:
|
||||
member_path.relative_to(install_resolved)
|
||||
except ValueError:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: path traversal on %s "
|
||||
"(resolves to %s, outside install dir %s)",
|
||||
member.name,
|
||||
member_path,
|
||||
install_resolved,
|
||||
)
|
||||
return False
|
||||
|
||||
# Find the tor.exe in extracted files
|
||||
for p in TOR_INSTALL_DIR.rglob("tor.exe"):
|
||||
logger.info("Tor installed at: %s", p)
|
||||
return str(p)
|
||||
# All members validated — extract.
|
||||
tar.extractall(path=str(install_dir))
|
||||
except tarfile.TarError as exc:
|
||||
logger.error("Tor bundle extraction failed: malformed tar (%s)", exc)
|
||||
return False
|
||||
|
||||
logger.error("tor.exe not found after extraction")
|
||||
return None
|
||||
except Exception as exc:
|
||||
logger.error("Failed to download/extract Tor: %s", exc)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return True
|
||||
|
||||
|
||||
def _auto_install_tor() -> str | None:
|
||||
"""Install or download Tor when it is safe to do so."""
|
||||
if os.name != "nt":
|
||||
# In Docker this should already be baked into the image. For source
|
||||
# installs we avoid unattended sudo prompts from a web request path.
|
||||
logger.warning("Tor is not installed. Install the tor package or use the Docker image with Tor baked in.")
|
||||
return None
|
||||
|
||||
TOR_INSTALL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
for bundle_url in _TOR_EXPERT_BUNDLE_URLS:
|
||||
archive_path = TOR_INSTALL_DIR / "tor-expert-bundle.tar.gz"
|
||||
try:
|
||||
logger.info("Downloading Tor Expert Bundle over HTTPS from %s...", bundle_url)
|
||||
urlretrieve(bundle_url, str(archive_path))
|
||||
|
||||
# Issue #201: multi-source verification. If neither upstream
|
||||
# .sha256sum nor a baked-in digest matches, we refuse this URL
|
||||
# and try the next one in _TOR_EXPERT_BUNDLE_URLS. If neither
|
||||
# source is reachable at all, we fall back to HTTPS-only trust
|
||||
# (current behavior) rather than blocking onboarding.
|
||||
verified, reason = _verify_tor_bundle(archive_path, bundle_url)
|
||||
if not verified:
|
||||
logger.error("Tor bundle verification failed for %s: %s", bundle_url, reason)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
continue
|
||||
logger.info("Tor bundle %s", reason)
|
||||
|
||||
logger.info("Download complete, extracting...")
|
||||
import tarfile
|
||||
|
||||
if not _extract_tor_bundle_safely(archive_path, TOR_INSTALL_DIR):
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
|
||||
archive_path.unlink(missing_ok=True)
|
||||
|
||||
for p in TOR_INSTALL_DIR.rglob("tor.exe"):
|
||||
logger.info("Tor installed at: %s", p)
|
||||
return str(p)
|
||||
|
||||
logger.error("tor.exe not found after extracting %s", bundle_url)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to download/extract Tor from %s: %s", bundle_url, exc)
|
||||
finally:
|
||||
archive_path.unlink(missing_ok=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class TorHiddenService:
|
||||
"""Manages a Tor hidden service subprocess."""
|
||||
@@ -150,7 +320,6 @@ class TorHiddenService:
|
||||
self._running = False
|
||||
self._error: str = ""
|
||||
|
||||
# Check if we already have a hostname from a previous run
|
||||
if HOSTNAME_PATH.exists():
|
||||
try:
|
||||
hostname = HOSTNAME_PATH.read_text().strip()
|
||||
@@ -198,19 +367,20 @@ class TorHiddenService:
|
||||
self._error = ""
|
||||
tor_bin = _find_tor_binary()
|
||||
if not tor_bin:
|
||||
logger.info("Tor not found, attempting auto-install...")
|
||||
logger.info("Tor not found, attempting bootstrap...")
|
||||
tor_bin = _auto_install_tor()
|
||||
if not tor_bin:
|
||||
self._error = "Failed to auto-install Tor. Please install it manually."
|
||||
self._error = (
|
||||
"Could not prepare Tor automatically. Check network access to dist.torproject.org "
|
||||
"or install Tor, then try again."
|
||||
)
|
||||
return {"ok": False, "detail": self._error}
|
||||
|
||||
# Create directories
|
||||
TOR_DIR.mkdir(parents=True, exist_ok=True)
|
||||
TOR_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
hidden_service_dir = TOR_DIR / "hidden_service"
|
||||
hidden_service_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# On non-Windows, Tor requires strict permissions on HiddenServiceDir
|
||||
if os.name != "nt":
|
||||
try:
|
||||
os.chmod(str(hidden_service_dir), 0o700)
|
||||
@@ -218,19 +388,15 @@ class TorHiddenService:
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Write torrc — enables both hidden service (inbound) and SOCKS proxy
|
||||
# (outbound) so the mesh/wormhole system can route node-to-node
|
||||
# traffic through Tor as well.
|
||||
torrc_content = (
|
||||
f"DataDirectory {TOR_DATA_DIR.as_posix()}\n"
|
||||
f"HiddenServiceDir {hidden_service_dir.as_posix()}\n"
|
||||
f"HiddenServicePort {target_port} 127.0.0.1:{target_port}\n"
|
||||
f"SocksPort 9050\n"
|
||||
f"Log notice stderr\n"
|
||||
"SocksPort 9050\n"
|
||||
"Log notice stderr\n"
|
||||
)
|
||||
TORRC_PATH.write_text(torrc_content, encoding="utf-8")
|
||||
|
||||
# Start Tor
|
||||
try:
|
||||
self._process = subprocess.Popen(
|
||||
[tor_bin, "-f", str(TORRC_PATH)],
|
||||
@@ -245,15 +411,12 @@ class TorHiddenService:
|
||||
logger.error(self._error)
|
||||
return {"ok": False, "detail": self._error}
|
||||
|
||||
# Wait for hostname file to appear
|
||||
deadline = time.monotonic() + _STARTUP_TIMEOUT_S
|
||||
while time.monotonic() < deadline:
|
||||
if self._process.poll() is not None:
|
||||
# Tor exited prematurely
|
||||
stdout = self._process.stdout.read() if self._process.stdout else ""
|
||||
self._error = f"Tor exited with code {self._process.returncode}"
|
||||
if stdout:
|
||||
# Get last few lines for error context
|
||||
lines = stdout.strip().split("\n")
|
||||
self._error += ": " + " | ".join(lines[-3:])
|
||||
self._running = False
|
||||
@@ -273,7 +436,6 @@ class TorHiddenService:
|
||||
|
||||
time.sleep(_POLL_INTERVAL_S)
|
||||
|
||||
# Timeout
|
||||
self._error = f"Tor did not generate hostname within {_STARTUP_TIMEOUT_S}s"
|
||||
self.stop()
|
||||
return {"ok": False, "detail": self._error}
|
||||
@@ -292,10 +454,7 @@ class TorHiddenService:
|
||||
pass
|
||||
self._process = None
|
||||
self._running = False
|
||||
# Keep the onion_address — it persists across restarts
|
||||
# since the key is stored in hidden_service_dir
|
||||
return {"ok": True, "detail": "stopped"}
|
||||
|
||||
|
||||
# Singleton
|
||||
tor_service = TorHiddenService()
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def disable_public_mesh_lane(*, reason: str = "private_lane_enabled") -> dict[str, Any]:
|
||||
"""Disable public Meshtastic MQTT before private Wormhole/Infonet starts."""
|
||||
result: dict[str, Any] = {
|
||||
"ok": True,
|
||||
"reason": reason,
|
||||
"settings_disabled": False,
|
||||
"runtime_stopped": False,
|
||||
}
|
||||
|
||||
# Scheduled Wormhole prewarm must not mutate the user's explicit public
|
||||
# MeshChat session. Only a deliberate private-lane activation should sever
|
||||
# the public MQTT lane.
|
||||
normalized_reason = str(reason or "").strip().lower()
|
||||
if normalized_reason == "wormhole_scheduled_prewarm" or normalized_reason.endswith(":scheduled_prewarm"):
|
||||
try:
|
||||
from services.meshtastic_mqtt_settings import mqtt_bridge_enabled
|
||||
|
||||
if mqtt_bridge_enabled():
|
||||
logger.info("Keeping public Mesh lane active during Wormhole prewarm: %s", reason)
|
||||
result["skipped"] = True
|
||||
result["skip_reason"] = "public_mesh_user_enabled"
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.debug("Could not inspect public Mesh state during %s: %s", reason, exc)
|
||||
|
||||
logger.info("Disabling public Mesh lane: %s", reason)
|
||||
|
||||
try:
|
||||
from services.meshtastic_mqtt_settings import write_meshtastic_mqtt_settings
|
||||
|
||||
settings = write_meshtastic_mqtt_settings(enabled=False)
|
||||
result["settings_disabled"] = not bool(settings.get("enabled"))
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to disable public Mesh settings during %s: %s", reason, exc)
|
||||
result["ok"] = False
|
||||
result["settings_error"] = str(exc)
|
||||
|
||||
try:
|
||||
from services.sigint_bridge import sigint_grid
|
||||
|
||||
if sigint_grid.mesh.is_running():
|
||||
sigint_grid.mesh.stop()
|
||||
result["runtime_stopped"] = not sigint_grid.mesh.is_running()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to stop public Mesh runtime during %s: %s", reason, exc)
|
||||
result["ok"] = False
|
||||
result["runtime_error"] = str(exc)
|
||||
|
||||
return result
|
||||
@@ -24,7 +24,9 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_FINNHUB_BASE = "https://finnhub.io/api/v1"
|
||||
_USER_AGENT = "ShadowBroker/0.9.7 Finnhub connector"
|
||||
def _finnhub_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("finnhub")
|
||||
_REQUEST_TIMEOUT = 12
|
||||
_MIN_INTERVAL_SECONDS = 0.35 # Stay well under 60 calls/min
|
||||
|
||||
@@ -89,7 +91,7 @@ def _request(path: str, params: dict[str, Any] | None = None) -> Any:
|
||||
f"{_FINNHUB_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
headers={"User-Agent": _finnhub_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
+232
-14
@@ -6,9 +6,11 @@ Public API:
|
||||
schedule_restart(project_root) (spawn detached start script, then exit)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
@@ -29,6 +31,19 @@ DOCKER_UPDATE_COMMANDS = (
|
||||
"docker compose pull && docker compose up -d"
|
||||
)
|
||||
|
||||
# Issue #231: baked-in release digests. Loaded lazily, used as a fallback
|
||||
# verification source when the release's SHA256SUMS.txt asset can't be
|
||||
# fetched (e.g. transient network failure during update).
|
||||
_RELEASE_DIGESTS_FILE = (
|
||||
Path(__file__).resolve().parent.parent / "data" / "release_digests.json"
|
||||
)
|
||||
# Pattern for the maintainer's signed source-archive release asset. This
|
||||
# is the file we prefer over the auto-generated ``zipball_url`` because
|
||||
# the maintainer's build process publishes it with a matching entry in
|
||||
# SHA256SUMS.txt — the zipball does not have a signed digest.
|
||||
_SOURCE_ASSET_PATTERN = re.compile(r"^ShadowBroker_v\d", re.IGNORECASE)
|
||||
_SHA256SUMS_ASSET_NAME = "SHA256SUMS.txt"
|
||||
|
||||
|
||||
def _is_docker() -> bool:
|
||||
"""Detect if we're running inside a Docker container."""
|
||||
@@ -40,7 +55,6 @@ def _is_docker() -> bool:
|
||||
except (FileNotFoundError, PermissionError):
|
||||
pass
|
||||
return os.environ.get("container") == "docker"
|
||||
_EXPECTED_SHA256 = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
||||
_ALLOWED_UPDATE_HOSTS = {
|
||||
"api.github.com",
|
||||
"codeload.github.com",
|
||||
@@ -119,7 +133,16 @@ def _validate_update_url(url: str, *, allow_release_page: bool = False) -> str:
|
||||
# ---------------------------------------------------------------------------
|
||||
def _download_release(temp_dir: str) -> tuple:
|
||||
"""Fetch latest release info and download the source zip archive.
|
||||
Returns (zip_path, version_tag, download_url, release_url).
|
||||
|
||||
Issue #231: prefer the maintainer's signed release asset (matching
|
||||
``ShadowBroker_v*.zip``) over the auto-generated ``zipball_url``,
|
||||
because the maintainer's release process publishes a matching entry
|
||||
in SHA256SUMS.txt for the named asset but NOT for the zipball.
|
||||
|
||||
Returns (zip_path, version_tag, download_url, release_url, asset_name,
|
||||
sha256sums_url) — the last two are empty strings when the release
|
||||
doesn't publish a signed asset, falling back to the legacy zipball
|
||||
path.
|
||||
"""
|
||||
logger.info("Fetching latest release info from GitHub...")
|
||||
_validate_update_url(GITHUB_RELEASES_URL)
|
||||
@@ -131,9 +154,42 @@ def _download_release(temp_dir: str) -> tuple:
|
||||
tag = release.get("tag_name", "unknown")
|
||||
release_url = str(release.get("html_url") or GITHUB_RELEASES_PAGE_URL).strip()
|
||||
_validate_update_url(release_url, allow_release_page=True)
|
||||
zip_url = str(release.get("zipball_url") or "").strip()
|
||||
if not zip_url:
|
||||
raise RuntimeError("Latest release is missing a source archive URL")
|
||||
|
||||
# Prefer the maintainer-signed release asset. Fall back to the
|
||||
# auto-generated zipball if the release doesn't publish one.
|
||||
assets = release.get("assets") or []
|
||||
asset_name = ""
|
||||
asset_url = ""
|
||||
sha256sums_url = ""
|
||||
for a in assets:
|
||||
name = str(a.get("name") or "").strip()
|
||||
download = str(a.get("browser_download_url") or "").strip()
|
||||
if not name or not download:
|
||||
continue
|
||||
if _SOURCE_ASSET_PATTERN.match(name) and name.lower().endswith(".zip"):
|
||||
asset_name = name
|
||||
asset_url = download
|
||||
elif name == _SHA256SUMS_ASSET_NAME:
|
||||
sha256sums_url = download
|
||||
|
||||
if asset_url:
|
||||
zip_url = asset_url
|
||||
logger.info(
|
||||
"Using signed release asset %s (sha256sums=%s)",
|
||||
asset_name,
|
||||
"yes" if sha256sums_url else "no",
|
||||
)
|
||||
else:
|
||||
zip_url = str(release.get("zipball_url") or "").strip()
|
||||
if not zip_url:
|
||||
raise RuntimeError("Latest release is missing a source archive URL")
|
||||
logger.warning(
|
||||
"Release does not publish a signed ShadowBroker_v*.zip asset — "
|
||||
"falling back to auto-generated zipball_url. Integrity will be "
|
||||
"verified against the baked-in release_digests.json (if present) "
|
||||
"or HTTPS-only otherwise."
|
||||
)
|
||||
|
||||
_validate_update_url(zip_url)
|
||||
|
||||
logger.info(f"Downloading {zip_url} ...")
|
||||
@@ -150,19 +206,174 @@ def _download_release(temp_dir: str) -> tuple:
|
||||
|
||||
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
|
||||
logger.info(f"Downloaded {size_mb:.1f} MB — ZIP validated OK")
|
||||
return zip_path, tag, zip_url, release_url
|
||||
return zip_path, tag, zip_url, release_url, asset_name, sha256sums_url
|
||||
|
||||
|
||||
def _validate_zip_hash(zip_path: str) -> None:
|
||||
if not _EXPECTED_SHA256:
|
||||
return
|
||||
def _compute_sha256(zip_path: str) -> str:
|
||||
"""Return the hex SHA-256 of the file at ``zip_path`` (lowercase)."""
|
||||
h = hashlib.sha256()
|
||||
with open(zip_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 128), b""):
|
||||
h.update(chunk)
|
||||
digest = h.hexdigest().lower()
|
||||
if digest != _EXPECTED_SHA256:
|
||||
raise RuntimeError("Update SHA-256 mismatch")
|
||||
return h.hexdigest().lower()
|
||||
|
||||
|
||||
def _load_baked_in_release_digests() -> dict:
|
||||
"""Return the ``release_digests.json`` mapping, or an empty dict.
|
||||
|
||||
Schema (issue #231):
|
||||
{
|
||||
"<release_tag>": {
|
||||
"<asset_filename>": "<sha256_hex>",
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
"""
|
||||
try:
|
||||
raw = _RELEASE_DIGESTS_FILE.read_text(encoding="utf-8")
|
||||
parsed = json.loads(raw)
|
||||
except (OSError, ValueError) as exc:
|
||||
logger.debug("Release digest file unreadable: %s", exc)
|
||||
return {}
|
||||
if not isinstance(parsed, dict):
|
||||
return {}
|
||||
cleaned: dict[str, dict[str, str]] = {}
|
||||
for k, v in parsed.items():
|
||||
if not isinstance(k, str) or k.startswith("_"):
|
||||
continue
|
||||
if isinstance(v, dict):
|
||||
entries = {
|
||||
fname: digest.strip().lower()
|
||||
for fname, digest in v.items()
|
||||
if isinstance(fname, str) and isinstance(digest, str)
|
||||
}
|
||||
if entries:
|
||||
cleaned[k] = entries
|
||||
return cleaned
|
||||
|
||||
|
||||
def _fetch_sha256sums(sha256sums_url: str) -> dict[str, str]:
|
||||
"""Download a SHA256SUMS.txt and return {filename: digest_hex_lower}.
|
||||
|
||||
Standard ``sha256sum`` format: ``<digest> <filename>`` per line. The
|
||||
leading ``*`` binary-mode marker (e.g. ``<digest> *<filename>``) is
|
||||
handled.
|
||||
"""
|
||||
try:
|
||||
_validate_update_url(sha256sums_url)
|
||||
except RuntimeError as exc:
|
||||
logger.warning("SHA256SUMS URL rejected: %s", exc)
|
||||
return {}
|
||||
try:
|
||||
resp = requests.get(sha256sums_url, timeout=15)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
logger.info("SHA256SUMS fetch failed: %s", exc)
|
||||
return {}
|
||||
out: dict[str, str] = {}
|
||||
for line in resp.text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
# Tolerant split: handle both `<digest> <name>` and `<digest> *<name>`.
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
digest, fname = parts
|
||||
fname = fname.lstrip("*").strip()
|
||||
digest = digest.strip().lower()
|
||||
if len(digest) == 64 and all(c in "0123456789abcdef" for c in digest) and fname:
|
||||
out[fname] = digest
|
||||
return out
|
||||
|
||||
|
||||
def _validate_zip_hash(
|
||||
zip_path: str,
|
||||
*,
|
||||
asset_name: str = "",
|
||||
sha256sums_url: str = "",
|
||||
release_tag: str = "",
|
||||
) -> str:
|
||||
"""Verify the downloaded archive against trusted digest sources.
|
||||
|
||||
Issue #231: previously this returned silently when ``MESH_UPDATE_SHA256``
|
||||
was unset, which made the auto-updater a supply-chain RCE vector on any
|
||||
compromise of the GitHub release pipeline. The chain now is:
|
||||
|
||||
1. ``MESH_UPDATE_SHA256`` env var (operator override — preserved for
|
||||
power-users who want to pin an exact digest manually)
|
||||
2. ``SHA256SUMS.txt`` release asset (primary — the maintainer's
|
||||
release process already publishes this)
|
||||
3. Baked-in ``backend/data/release_digests.json`` (second line of
|
||||
defense for releases that lack the SHA256SUMS asset, or when the
|
||||
asset can't be fetched at update time)
|
||||
4. HTTPS-only fallback with a loud warning (preserves the auto-update
|
||||
flow during transient outages — but never silently)
|
||||
|
||||
A mismatch from a source that DID respond is fatal: the update is
|
||||
refused and the existing install keeps running. Only the "no source
|
||||
reachable at all" case falls back to HTTPS-only.
|
||||
|
||||
Returns a short human-readable description of which source verified
|
||||
the archive (used in the update-success message).
|
||||
"""
|
||||
actual = _compute_sha256(zip_path)
|
||||
|
||||
# Source 1: explicit operator override.
|
||||
override = os.environ.get("MESH_UPDATE_SHA256", "").strip().lower()
|
||||
if override:
|
||||
if actual == override:
|
||||
return f"verified via MESH_UPDATE_SHA256 ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs MESH_UPDATE_SHA256: archive={actual[:16]}..., "
|
||||
f"expected={override[:16]}..."
|
||||
)
|
||||
|
||||
# Source 2: SHA256SUMS.txt asset from the release.
|
||||
sums_map: dict[str, str] = {}
|
||||
if sha256sums_url and asset_name:
|
||||
sums_map = _fetch_sha256sums(sha256sums_url)
|
||||
|
||||
sums_expected = sums_map.get(asset_name) if asset_name else None
|
||||
if sums_expected:
|
||||
if actual == sums_expected:
|
||||
return f"verified via release SHA256SUMS.txt ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs release SHA256SUMS.txt: "
|
||||
f"archive={actual[:16]}..., expected={sums_expected[:16]}..."
|
||||
)
|
||||
|
||||
# Source 3: baked-in digest list.
|
||||
baked = _load_baked_in_release_digests()
|
||||
baked_expected = ""
|
||||
if release_tag and asset_name:
|
||||
baked_expected = baked.get(release_tag, {}).get(asset_name, "")
|
||||
if baked_expected:
|
||||
if actual == baked_expected:
|
||||
return f"verified via baked-in digest list ({actual[:16]}...)"
|
||||
raise RuntimeError(
|
||||
f"Update SHA-256 mismatch vs baked-in digest list: "
|
||||
f"archive={actual[:16]}..., expected={baked_expected[:16]}..."
|
||||
)
|
||||
|
||||
# Source 4: HTTPS-only fallback. We keep onboarding/auto-update working
|
||||
# during transient outages (no SHA256SUMS reachable AND no baked-in
|
||||
# entry for this release), but surface the degraded posture loudly so
|
||||
# the operator can see it in logs and the maintainer can populate the
|
||||
# digest list on the next release bump.
|
||||
logger.warning(
|
||||
"Update integrity check fell back to HTTPS-only trust "
|
||||
"(no SHA256SUMS.txt response and no baked-in digest for "
|
||||
"release=%s asset=%s). The archive SHA-256 is %s. Once the "
|
||||
"release ships a SHA256SUMS.txt asset OR backend/data/"
|
||||
"release_digests.json is updated with this release, the secure "
|
||||
"path will activate automatically.",
|
||||
release_tag or "unknown",
|
||||
asset_name or "unknown",
|
||||
actual,
|
||||
)
|
||||
return f"https-only (no digest source reachable, archive={actual[:16]}...)"
|
||||
|
||||
|
||||
def _is_source_checkout(project_root: str) -> bool:
|
||||
@@ -334,7 +545,7 @@ def perform_update(project_root: str) -> dict:
|
||||
temp_dir = tempfile.mkdtemp(prefix="sb_update_")
|
||||
manual_url = GITHUB_RELEASES_PAGE_URL
|
||||
try:
|
||||
zip_path, version, url, release_url = _download_release(temp_dir)
|
||||
zip_path, version, url, release_url, asset_name, sha256sums_url = _download_release(temp_dir)
|
||||
manual_url = release_url or manual_url
|
||||
|
||||
if in_docker:
|
||||
@@ -366,7 +577,13 @@ def perform_update(project_root: str) -> dict:
|
||||
),
|
||||
}
|
||||
|
||||
_validate_zip_hash(zip_path)
|
||||
verification_note = _validate_zip_hash(
|
||||
zip_path,
|
||||
asset_name=asset_name,
|
||||
sha256sums_url=sha256sums_url,
|
||||
release_tag=version,
|
||||
)
|
||||
logger.info("Update archive %s", verification_note)
|
||||
backup_path = _backup_current(project_root, temp_dir)
|
||||
copied = _extract_and_copy(zip_path, project_root, temp_dir)
|
||||
|
||||
@@ -378,6 +595,7 @@ def perform_update(project_root: str) -> dict:
|
||||
"manual_url": manual_url,
|
||||
"release_url": release_url,
|
||||
"download_url": url,
|
||||
"integrity": verification_note,
|
||||
"message": f"Updated to {version} — {copied} files replaced. Restarting...",
|
||||
}
|
||||
except Exception as e:
|
||||
|
||||
@@ -225,6 +225,11 @@ def _installed() -> bool:
|
||||
def _pid_alive(pid: int) -> bool:
|
||||
if pid <= 0:
|
||||
return False
|
||||
if os.name == "nt":
|
||||
# Windows PIDs are reused and os.kill(pid, 0) is not a reliable
|
||||
# ownership check. A persisted wormhole_status.json PID from an older
|
||||
# run must never be treated as a process we own.
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except OSError:
|
||||
@@ -238,6 +243,48 @@ def _pid_alive(pid: int) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _find_wormhole_server_pid() -> int:
|
||||
if os.name == "nt":
|
||||
return 0
|
||||
proc_dir = Path("/proc")
|
||||
if not proc_dir.exists():
|
||||
return 0
|
||||
current_pid = os.getpid()
|
||||
script_name = WORMHOLE_SCRIPT.name
|
||||
script_path = str(WORMHOLE_SCRIPT)
|
||||
for entry in proc_dir.iterdir():
|
||||
if not entry.name.isdigit():
|
||||
continue
|
||||
pid = int(entry.name)
|
||||
if pid == current_pid:
|
||||
continue
|
||||
try:
|
||||
raw = (entry / "cmdline").read_bytes()
|
||||
except OSError:
|
||||
continue
|
||||
cmdline = raw.replace(b"\x00", b" ").decode("utf-8", errors="replace")
|
||||
if script_path in cmdline or script_name in cmdline:
|
||||
return pid
|
||||
return 0
|
||||
|
||||
|
||||
def _terminate_pid(pid: int, *, timeout_s: float = 5.0) -> None:
|
||||
if os.name == "nt" or pid <= 0:
|
||||
return
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except Exception:
|
||||
return
|
||||
deadline = time.monotonic() + timeout_s
|
||||
while time.monotonic() < deadline and _pid_alive(pid):
|
||||
time.sleep(0.1)
|
||||
if _pid_alive(pid):
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _probe_ready(timeout_s: float = 1.5) -> bool:
|
||||
try:
|
||||
with urlopen(f"http://{WORMHOLE_HOST}:{WORMHOLE_PORT}/api/health", timeout=timeout_s) as resp:
|
||||
@@ -261,14 +308,34 @@ def _probe_json(path: str, timeout_s: float = 1.5) -> dict[str, Any] | None:
|
||||
def _current_runtime_state() -> dict[str, Any]:
|
||||
settings = read_wormhole_settings()
|
||||
status = read_wormhole_status()
|
||||
configured = bool(settings.get("enabled"))
|
||||
running = False
|
||||
ready = False
|
||||
pid = int(status.get("pid", 0) or 0)
|
||||
if _PROCESS and _PROCESS.poll() is None:
|
||||
if not configured:
|
||||
# Disabled private transport must stay disabled even if a stale local
|
||||
# wormhole process is still answering on the health port. Public
|
||||
# MeshChat relies on this state to keep the MQTT and Wormhole lanes
|
||||
# mutually exclusive.
|
||||
pid = 0
|
||||
ready = False
|
||||
elif _PROCESS and _PROCESS.poll() is None:
|
||||
running = True
|
||||
pid = int(_PROCESS.pid or 0)
|
||||
elif _pid_alive(pid):
|
||||
running = True
|
||||
ready = running and _probe_ready()
|
||||
else:
|
||||
if _pid_alive(pid):
|
||||
running = True
|
||||
else:
|
||||
discovered_pid = _find_wormhole_server_pid()
|
||||
if discovered_pid > 0:
|
||||
running = True
|
||||
pid = discovered_pid
|
||||
if not running and _probe_ready(timeout_s=0.35):
|
||||
running = True
|
||||
pid = 0
|
||||
ready = running and _probe_ready()
|
||||
if not running:
|
||||
pid = 0
|
||||
transport_active = status.get("transport_active", "") if ready else ""
|
||||
proxy_active = status.get("proxy_active", "") if ready else ""
|
||||
effective_transport = str(transport_active or settings.get("transport", "direct") or "direct").lower()
|
||||
@@ -309,13 +376,13 @@ def _current_runtime_state() -> dict[str, Any]:
|
||||
anonymous_mode = bool(settings.get("anonymous_mode"))
|
||||
anonymous_mode_ready = bool(
|
||||
anonymous_mode
|
||||
and settings.get("enabled")
|
||||
and configured
|
||||
and ready
|
||||
and effective_transport in {"tor", "tor_arti", "i2p", "mixnet"}
|
||||
)
|
||||
snapshot = {
|
||||
"installed": _installed(),
|
||||
"configured": bool(settings.get("enabled")),
|
||||
"configured": configured,
|
||||
"running": running,
|
||||
"ready": ready,
|
||||
"transport_configured": str(settings.get("transport", "direct") or "direct"),
|
||||
@@ -385,6 +452,12 @@ def get_wormhole_state() -> dict[str, Any]:
|
||||
def connect_wormhole(*, reason: str = "connect") -> dict[str, Any]:
|
||||
with _LOCK:
|
||||
_invalidate_state_cache()
|
||||
try:
|
||||
from services.transport_lane_isolation import disable_public_mesh_lane
|
||||
|
||||
disable_public_mesh_lane(reason=f"wormhole_{reason}")
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to enforce public/private lane isolation during %s: %s", reason, exc)
|
||||
settings = read_wormhole_settings()
|
||||
if not settings.get("enabled"):
|
||||
settings = settings.copy()
|
||||
@@ -477,8 +550,8 @@ def connect_wormhole(*, reason: str = "connect") -> dict[str, Any]:
|
||||
def disconnect_wormhole(*, reason: str = "disconnect") -> dict[str, Any]:
|
||||
with _LOCK:
|
||||
_invalidate_state_cache()
|
||||
current = _current_runtime_state()
|
||||
pid = int(current.get("pid", 0) or 0)
|
||||
status = read_wormhole_status()
|
||||
pid = int(status.get("pid", 0) or 0)
|
||||
global _PROCESS
|
||||
if _PROCESS and _PROCESS.poll() is None:
|
||||
try:
|
||||
@@ -489,14 +562,15 @@ def disconnect_wormhole(*, reason: str = "disconnect") -> dict[str, Any]:
|
||||
_PROCESS.kill()
|
||||
except Exception:
|
||||
pass
|
||||
elif _pid_alive(pid):
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except Exception:
|
||||
pass
|
||||
if os.name != "nt":
|
||||
_terminate_pid(pid)
|
||||
discovered_pid = _find_wormhole_server_pid()
|
||||
if discovered_pid > 0 and discovered_pid != pid:
|
||||
_terminate_pid(discovered_pid)
|
||||
_PROCESS = None
|
||||
write_wormhole_status(
|
||||
reason=reason,
|
||||
configured=False,
|
||||
running=False,
|
||||
ready=False,
|
||||
pid=0,
|
||||
|
||||
@@ -0,0 +1,677 @@
|
||||
{
|
||||
"_meta": {
|
||||
"issue": "#239",
|
||||
"note": "Snapshot of currently-tolerated duplicate route registrations. The test in test_no_new_duplicate_routes.py fails if any NEW (method, path) duplicate appears outside this list. Removing entries (by actually deduping) is fine and the test stays green. New entries here require explicit, reviewed updates.",
|
||||
"generated_with": "python -c 'see tests/test_no_new_duplicate_routes.py'"
|
||||
},
|
||||
"duplicates": {
|
||||
"DELETE /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/contact/{peer_id}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/invite/handles/{handle}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/cctv/media": [
|
||||
"main",
|
||||
"routers.cctv"
|
||||
],
|
||||
"GET /api/debug-latest": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/geocode/reverse": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/geocode/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/health": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/live-data": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/fast": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/slow": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/mesh/channels": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/prekey-bundle": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/pubkey": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/gate/list": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/event/{event_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/events": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/locator": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/merkle": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages/wait": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/node/{node_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/log": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/metrics": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/oracle/consensus": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets/more": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/predictions": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/profile": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/search": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/stakes/{message_id}": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/all": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/batch": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/rns/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/signals": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/trust/vouches": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/oracle/region-intel": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/radio/nearest": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/nearest-list": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/audio": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/calls/{sys_name}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/systems": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/top": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/refresh": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/region-dossier": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/route/{callsign}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/sentinel2/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/settings/api-keys": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/api-keys/meta": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole-status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/sigint/nearest-sdr": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/thermal/verify": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/tools/shodan/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/tools/uw/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/wormhole/dm/contacts": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite/handles": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/personas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/health": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PATCH /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/ais/feed": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/layers": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/mesh/dm/block": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/register": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/send": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/gate/create": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-pull": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/{gate_id}/message": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/revoke": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/rotate": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/ingest": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/oracle/predict": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve-stakes": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/stake": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/report": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/send": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/trust/vouch": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/vote": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/sentinel/tile": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/sentinel/token": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/settings/news-feeds/reset": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/sigint/transmit": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"POST /api/system/update": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/tools/shodan/count": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/host": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/congress": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/darkpool": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/flow": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/viewport": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/wormhole/connect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/disconnect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/build-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-tokens": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/invite/import": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/open-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/prekey/register": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/register-key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/reset": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sender-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/enter": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/grant": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/sign-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/messages/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/activate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/clear": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/create": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/retire": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/proof": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/state/export": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/identity/bootstrap": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/join": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/restart": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign-raw": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/envelope_policy": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/legacy_envelope_fallback": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/wormhole/dm/contact": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -87,11 +87,28 @@ def _run_gate_release_once(monkeypatch, *, transport_tier="private_strong"):
|
||||
def _patch_for_successful_post(monkeypatch, module):
|
||||
"""Apply standard monkeypatches so a gate_message post succeeds."""
|
||||
import main
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_setup_gate_outbox(monkeypatch)
|
||||
monkeypatch.setattr(main, "_verify_gate_message_signed_write", lambda **kw: (True, "ok", kw.get("reply_to", "")))
|
||||
monkeypatch.setattr(main, "_resolve_envelope_policy", lambda _gate_id: "envelope_disabled")
|
||||
|
||||
def _fake_private_gate_append(**kwargs):
|
||||
return {
|
||||
"event_id": f"ledger-ev-{kwargs.get('sequence', 0)}",
|
||||
"event_type": "gate_message",
|
||||
"node_id": kwargs["node_id"],
|
||||
"payload": dict(kwargs["payload"]),
|
||||
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||
"sequence": kwargs["sequence"],
|
||||
"signature": kwargs["signature"],
|
||||
"public_key": kwargs["public_key"],
|
||||
"public_key_algo": kwargs["public_key_algo"],
|
||||
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", _fake_private_gate_append)
|
||||
|
||||
from services.mesh.mesh_reputation import gate_manager, reputation_ledger
|
||||
|
||||
monkeypatch.setattr(gate_manager, "can_enter", lambda *a, **kw: (True, "ok"))
|
||||
@@ -255,19 +272,30 @@ def test_gate_post_preserves_gate_envelope_in_store(monkeypatch):
|
||||
|
||||
|
||||
def test_gate_post_advances_sequence(monkeypatch):
|
||||
"""validate_and_set_sequence must be called to advance the counter."""
|
||||
"""append_private_gate_message must receive the gate sequence."""
|
||||
import main
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_patch_for_successful_post(monkeypatch, main)
|
||||
|
||||
seq_calls = []
|
||||
append_calls = []
|
||||
|
||||
def track_seq(node_id, seq, *, domain=""):
|
||||
seq_calls.append((node_id, seq, domain))
|
||||
return (True, "ok")
|
||||
def track_private_append(**kwargs):
|
||||
append_calls.append(kwargs)
|
||||
return {
|
||||
"event_id": "ev-seq",
|
||||
"event_type": "gate_message",
|
||||
"node_id": kwargs["node_id"],
|
||||
"payload": dict(kwargs["payload"]),
|
||||
"timestamp": kwargs.get("timestamp", 0) or 123.0,
|
||||
"sequence": kwargs["sequence"],
|
||||
"signature": kwargs["signature"],
|
||||
"public_key": kwargs["public_key"],
|
||||
"public_key_algo": kwargs["public_key_algo"],
|
||||
"protocol_version": kwargs.get("protocol_version", "infonet/2"),
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "validate_and_set_sequence", track_seq)
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", track_private_append)
|
||||
monkeypatch.setattr(
|
||||
mesh_hashchain.gate_store,
|
||||
"append",
|
||||
@@ -280,8 +308,9 @@ def test_gate_post_advances_sequence(monkeypatch):
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["queued"] is True
|
||||
assert len(seq_calls) == 1
|
||||
assert seq_calls[0] == ("!sb_test1234567890", 42, "gate_message")
|
||||
assert len(append_calls) == 1
|
||||
assert append_calls[0]["node_id"] == "!sb_test1234567890"
|
||||
assert append_calls[0]["sequence"] == 42
|
||||
|
||||
|
||||
def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
||||
@@ -290,11 +319,11 @@ def test_gate_post_rejects_replay_via_sequence(monkeypatch):
|
||||
from services.mesh import mesh_hashchain
|
||||
|
||||
_patch_for_successful_post(monkeypatch, main)
|
||||
monkeypatch.setattr(
|
||||
mesh_hashchain.infonet,
|
||||
"validate_and_set_sequence",
|
||||
lambda node_id, seq: (False, "Replay detected: sequence 1 <= last 1"),
|
||||
)
|
||||
|
||||
def reject_private_append(**_kwargs):
|
||||
raise ValueError("Replay detected: sequence 1 <= last 1")
|
||||
|
||||
monkeypatch.setattr(mesh_hashchain.infonet, "append_private_gate_message", reject_private_append)
|
||||
|
||||
gate_id = "infonet"
|
||||
body = _build_gate_message_body(gate_id, sequence=1)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user