9 Commits

Author SHA1 Message Date
anoracleofra-code 0edc84c997 bump: release v0.2.0
Former-commit-id: 15f1a1dc3c
2026-03-08 14:27:54 -06:00
anoracleofra-code 0519ed040b fix: make test_trace.py curl commands OS-agnostic
Former-commit-id: b57830c1a6
2026-03-08 14:24:36 -06:00
anoracleofra-code 80fedc103a fix: make dev scripts cross-platform compatible
Former-commit-id: 9802fe55a3
2026-03-08 14:20:28 -06:00
anoracleofra-code 5cefd8f8d5 feat: add Docker publishing via GitHub Actions
Former-commit-id: 36c92881c8
2026-03-08 14:04:52 -06:00
Shadowbroker 75537a8570 Update README.md
Former-commit-id: 313aa32a9b
2026-03-08 12:23:56 -06:00
Shadowbroker bc13706311 Update README.md
Former-commit-id: b5f3b08dee
2026-03-08 12:23:39 -06:00
Shadowbroker 3711c84ebe Update README.md
Former-commit-id: e1acd44e43
2026-03-04 23:39:43 -07:00
Shadowbroker 8e79c03d88 Update README.md
Former-commit-id: 65a8c836c4
2026-03-04 23:38:27 -07:00
Shadowbroker 9419ed9883 Update README.md
Former-commit-id: 955907c056
2026-03-04 23:38:05 -07:00
166 changed files with 18575 additions and 14873 deletions
-16
View File
@@ -1,16 +0,0 @@
# ShadowBroker — Docker Compose Environment Variables
# Copy this file to .env and fill in your keys:
# cp .env.example .env
# ── Required for backend container ─────────────────────────────
OPENSKY_CLIENT_ID=
OPENSKY_CLIENT_SECRET=
AIS_API_KEY=
# ── Optional ───────────────────────────────────────────────────
# LTA (Singapore traffic cameras) — leave blank to skip
# LTA_ACCOUNT_KEY=
# Override the backend URL the frontend uses (leave blank for auto-detect)
# NEXT_PUBLIC_API_URL=http://192.168.1.50:8000
-39
View File
@@ -1,39 +0,0 @@
name: CI — Lint & Test
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
frontend:
name: Frontend Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: frontend
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
cache-dependency-path: frontend/package-lock.json
- run: npm ci
- run: npx vitest run --reporter=verbose
backend:
name: Backend Lint
runs-on: ubuntu-latest
defaults:
run:
working-directory: backend
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install -r requirements.txt
- run: python -c "from services.fetchers.retry import with_retry; from services.env_check import validate_env; print('Module imports OK')"
- run: python -m pytest tests/ -v --tb=short || echo "No pytest tests found (OK)"
+13 -163
View File
@@ -13,29 +13,17 @@ env:
IMAGE_NAME: ${{ github.repository }}
jobs:
build-frontend:
runs-on: ${{ matrix.runner }}
build-and-push-frontend:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
id-token: write
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Lowercase image name
run: echo "IMAGE_NAME=${IMAGE_NAME,,}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.0.0
@@ -53,103 +41,28 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-frontend
- name: Build and push Docker image by digest
id: build
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5.0.0
with:
context: ./frontend
platforms: ${{ matrix.platform }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=frontend-${{ matrix.platform }}
cache-to: type=gha,mode=max,scope=frontend-${{ matrix.platform }}
outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-frontend,push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Export digest
if: github.event_name != 'pull_request'
run: |
mkdir -p /tmp/digests/frontend
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/frontend/${digest#sha256:}"
- name: Upload digest
if: github.event_name != 'pull_request'
uses: actions/upload-artifact@v4
with:
name: digests-frontend-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
path: /tmp/digests/frontend/*
if-no-files-found: error
retention-days: 1
merge-frontend:
build-and-push-backend:
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
needs: build-frontend
permissions:
contents: read
packages: write
steps:
- name: Lowercase image name
run: echo "IMAGE_NAME=${IMAGE_NAME,,}" >> $GITHUB_ENV
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests/frontend
pattern: digests-frontend-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.0.0
- name: Log into registry ${{ env.REGISTRY }}
uses: docker/login-action@v3.0.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5.0.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-frontend
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Create and push manifest
working-directory: /tmp/digests/frontend
run: |
docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-frontend@sha256:%s ' *)
build-backend:
runs-on: ${{ matrix.runner }}
permissions:
contents: read
packages: write
id-token: write
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Lowercase image name
run: echo "IMAGE_NAME=${IMAGE_NAME,,}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.0.0
@@ -167,76 +80,13 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-backend
- name: Build and push Docker image by digest
id: build
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5.0.0
with:
context: ./backend
platforms: ${{ matrix.platform }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=backend-${{ matrix.platform }}
cache-to: type=gha,mode=max,scope=backend-${{ matrix.platform }}
outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-backend,push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
- name: Export digest
if: github.event_name != 'pull_request'
run: |
mkdir -p /tmp/digests/backend
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/backend/${digest#sha256:}"
- name: Upload digest
if: github.event_name != 'pull_request'
uses: actions/upload-artifact@v4
with:
name: digests-backend-${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
path: /tmp/digests/backend/*
if-no-files-found: error
retention-days: 1
merge-backend:
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
needs: build-backend
permissions:
contents: read
packages: write
steps:
- name: Lowercase image name
run: echo "IMAGE_NAME=${IMAGE_NAME,,}" >> $GITHUB_ENV
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests/backend
pattern: digests-backend-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.0.0
- name: Log into registry ${{ env.REGISTRY }}
uses: docker/login-action@v3.0.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5.0.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-backend
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Create and push manifest
working-directory: /tmp/digests/backend
run: |
docker buildx imagetools create \
$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-backend@sha256:%s ' *)
cache-from: type=gha
cache-to: type=gha,mode=max
-42
View File
@@ -58,51 +58,9 @@ tmp/
*.log
*.tmp
*.bak
*.swp
*.swo
out.txt
out_sys.txt
rss_output.txt
merged.txt
tmp_fast.json
diff.txt
local_diff.txt
map_diff.txt
TheAirTraffic Database.xlsx
# Debug dumps & release artifacts
backend/dump.json
backend/debug_fast.json
backend/nyc_sample.json
backend/nyc_full.json
backend/liveua_test.html
backend/out_liveua.json
frontend/server_logs*.txt
frontend/cctv.db
*.zip
*.tar.gz
.git_backup/
coverage/
.coverage
dist/
# Test files (may contain hardcoded keys)
backend/test_*.py
backend/services/test_*.py
# Local analysis & dev tools
backend/analyze_xlsx.py
backend/xlsx_analysis.txt
backend/services/ais_cache.json
# Internal update tracking (not for repo)
updatestuff.md
# Misc dev artifacts
clean_zip.py
zip_repo.py
refactor_cesium.py
jobs.json
.claude
.mise.local.toml
+90 -329
View File
@@ -7,224 +7,76 @@
</p>
---
https://github.com/user-attachments/assets/248208ec-62f7-49d1-831d-4bd0a1fa6852
**ShadowBroker** is a real-time, multi-domain OSINT dashboard that aggregates live data from dozens of open-source intelligence feeds and renders them on a unified dark-ops map interface. It tracks aircraft, ships, satellites, earthquakes, conflict zones, CCTV networks, GPS jamming, and breaking geopolitical events — all updating in real time.
![Shadowbroker1](https://github.com/user-attachments/assets/000b94eb-bf33-4e8b-8c60-15ca4a723c68)
**ShadowBroker** is a real-time, full-spectrum geospatial intelligence dashboard that aggregates live data from dozens of open-source intelligence (OSINT) feeds and renders them on a unified dark-ops map interface. It tracks aircraft, ships, satellites, earthquakes, conflict zones, CCTV networks, GPS jamming, and breaking geopolitical events — all updating in real time.
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**, it's designed for analysts, researchers, and enthusiasts who want a single-pane-of-glass view of global activity.
---
## Why This Exists
A surprising amount of global telemetry is already public:
- Aircraft ADS-B broadcasts
- Maritime AIS signals
- Satellite orbital data
- Earthquake sensors
- Environmental monitoring networks
This data is scattered across dozens of tools and APIs. ShadowBroker began as an experiment to see what the world looks like when these signals are combined into a single interface.
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets, including public aircraft registration records. It is fully open-source so anyone can audit exactly what data is accessed and how. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
---
## Interesting Use Cases
* Track everything from Air Force One to the private jets of billionaires, dictators, and corporations
* Monitor satellites passing overhead and see high-resolution satellite imagery
* Nose around local emergency scanners
* Watch naval traffic worldwide
* Detect GPS jamming zones
* Follow earthquakes and other natural disasters in real time
---
## ⚡ Quick Start (Docker or Podman)
Linux/Mac
```bash
git clone https://github.com/BigBodyCobain/Shadowbroker.git
cd Shadowbroker
./compose.sh up -d
```
Windows
```bash
git clone https://github.com/BigBodyCobain/Shadowbroker.git
cd Shadowbroker
docker-compose up -d
```
Open `http://localhost:3000` to view the dashboard! *(Requires Docker or Podman)*
`compose.sh` auto-detects `docker compose`, `docker-compose`, `podman compose`, and `podman-compose`.
If both runtimes are installed, you can force Podman with `./compose.sh --engine podman up -d`.
Do not append a trailing `.` to that command; Compose treats it as a service name.
---
## 🔄 **How to Update**
If you are coming from v0.8.0 or older, you must pull the new code and rebuild your containers to see the latest data layers and performance fixes.
### 🐧 **Linux & 🍎 macOS** (Terminal / Zsh / Bash)
Since these systems are Unix-based, you can use the helper script directly.
**Pull the latest code:**
```bash
git pull origin main
```
**Run the update script:**
```bash
./compose.sh down
./compose.sh up --build -d
```
### 🪟 **Windows** (Command Prompt or PowerShell)
Windows handles scripts differently. You have two ways to update:
**Method A: The Direct Way (Recommended)**
Use the docker compose commands directly. This works in any Windows terminal (CMD, PowerShell, or Windows Terminal).
**Pull the latest code:**
```DOS
git pull origin main
```
**Rebuild the containers:**
```DOS
docker compose down
docker compose up --build -d
```
**Method B: Using the Script (Git Bash)**
If you prefer using the ./compose.sh script on Windows, you must use Git Bash (installed with Git for Windows).
Open your project folder, Right-Click, and select "Open Git Bash here".
**Run the Linux commands:**
```bash
./compose.sh down
./compose.sh up --build -d
```
---
### ⚠️ **Stuck on the old version?**
If the dashboard still shows old data after updating:
**Clear Docker Cache:** docker compose build --no-cache
**Prune Images:** docker image prune -f
**Check Logs:** ./compose.sh logs -f backend (or docker compose logs -f backend)
---
## ✨ Features
### 🛩️ Aviation Tracking
* **Commercial Flights** — Real-time positions via OpenSky Network (~5,000+ aircraft)
* **Private Aircraft** — Light GA, turboprops, bizjets tracked separately
* **Private Jets** — High-net-worth individual aircraft with owner identification
* **Military Flights** — Tankers, ISR, fighters, transports via adsb.lol military endpoint
* **Flight Trail Accumulation** — Persistent breadcrumb trails for all tracked aircraft
* **Holding Pattern Detection** — Automatically flags aircraft circling (>300° total turn)
* **Aircraft Classification** — Shape-accurate SVG icons: airliners, turboprops, bizjets, helicopters
* **Grounded Detection** — Aircraft below 100ft AGL rendered with grey icons
- **Commercial Flights** — Real-time positions via OpenSky Network (~5,000+ aircraft)
- **Private Aircraft** — Light GA, turboprops, bizjets tracked separately
- **Private Jets** — High-net-worth individual aircraft with owner identification
- **Military Flights** — Tankers, ISR, fighters, transports via adsb.lol military endpoint
- **Flight Trail Accumulation** — Persistent breadcrumb trails for all tracked aircraft
- **Holding Pattern Detection** — Automatically flags aircraft circling (>300° total turn)
- **Aircraft Classification** — Shape-accurate SVG icons: airliners, turboprops, bizjets, helicopters
- **Grounded Detection** — Aircraft below 100ft AGL rendered with grey icons
### 🚢 Maritime Tracking
* **AIS Vessel Stream** — 25,000+ vessels via aisstream.io WebSocket (real-time)
* **Ship Classification** — Cargo, tanker, passenger, yacht, military vessel types with color-coded icons
* **Carrier Strike Group Tracker** — All 11 active US Navy aircraft carriers with OSINT-estimated positions
* Automated GDELT news scraping for carrier movement intelligence
* 50+ geographic region-to-coordinate mappings
* Disk-cached positions, auto-updates at 00:00 & 12:00 UTC
* **Cruise & Passenger Ships** — Dedicated layer for cruise liners and ferries
* **Clustered Display** — Ships cluster at low zoom with count labels, decluster on zoom-in
- **AIS Vessel Stream** — 25,000+ vessels via aisstream.io WebSocket (real-time)
- **Ship Classification** — Cargo, tanker, passenger, yacht, military vessel types with color-coded icons
- **Carrier Strike Group Tracker** — All 11 active US Navy aircraft carriers with OSINT-estimated positions
- Automated GDELT news scraping for carrier movement intelligence
- 50+ geographic region-to-coordinate mappings
- Disk-cached positions, auto-updates at 00:00 & 12:00 UTC
- **Cruise & Passenger Ships** — Dedicated layer for cruise liners and ferries
- **Clustered Display** — Ships cluster at low zoom with count labels, decluster on zoom-in
### 🛰️ Space & Satellites
* **Orbital Tracking** — Real-time satellite positions via CelesTrak TLE data + SGP4 propagation (2,000+ active satellites, no API key required)
* **Mission-Type Classification** — Color-coded by mission: military recon (red), SAR (cyan), SIGINT (white), navigation (blue), early warning (magenta), commercial imaging (green), space station (gold)
- **Orbital Tracking** — Real-time satellite positions from N2YO API
- **Mission-Type Classification** — Color-coded by mission: military recon (red), SAR (cyan), SIGINT (white), navigation (blue), early warning (magenta), commercial imaging (green), space station (gold)
### 🌍 Geopolitics & Conflict
* **Global Incidents** — GDELT-powered conflict event aggregation (last 8 hours, ~1,000 events)
* **Ukraine Frontline** — Live warfront GeoJSON from DeepState Map
* **SIGINT/RISINT News Feed** — Real-time RSS aggregation from multiple intelligence-focused sources with user-customizable feeds (up to 20 sources, configurable priority weights 1-5)
* **Region Dossier** — Right-click anywhere on the map for:
* Country profile (population, capital, languages, currencies, area)
* Head of state & government type (Wikidata SPARQL)
* Local Wikipedia summary with thumbnail
### 🛰️ Satellite Imagery
* **NASA GIBS (MODIS Terra)** — Daily true-color satellite imagery overlay with 30-day time slider, play/pause animation, and opacity control (~250m/pixel)
* **High-Res Satellite (Esri)** — Sub-meter resolution imagery via Esri World Imagery — zoom into buildings and terrain detail (zoom 18+)
* **Sentinel-2 Intel Card** — Right-click anywhere on the map for a floating intel card showing the latest Sentinel-2 satellite photo with capture date, cloud cover %, and clickable full-resolution image (10m resolution, updated every ~5 days)
* **SATELLITE Style Preset** — Quick-toggle high-res imagery via the STYLE button (DEFAULT → SATELLITE → FLIR → NVG → CRT)
### 📻 Software-Defined Radio (SDR)
* **KiwiSDR Receivers** — 500+ public SDR receivers plotted worldwide with clustered amber markers
* **Live Radio Tuner** — Click any KiwiSDR node to open an embedded SDR tuner directly in the SIGINT panel
* **Metadata Display** — Node name, location, antenna type, frequency bands, active users
- **Global Incidents** — GDELT-powered conflict event aggregation (last 8 hours, ~1,000 events)
- **Ukraine Frontline** — Live warfront GeoJSON from DeepState Map
- **SIGINT/RISINT News Feed** — Real-time RSS aggregation from multiple intelligence-focused sources
- **Region Dossier** — Right-click anywhere on the map for:
- Country profile (population, capital, languages, currencies, area)
- Head of state & government type (Wikidata SPARQL)
- Local Wikipedia summary with thumbnail
### 📷 Surveillance
* **CCTV Mesh** — 2,000+ live traffic cameras from:
* 🇬🇧 Transport for London JamCams
* 🇺🇸 Austin, TX TxDOT
* 🇺🇸 NYC DOT
* 🇸🇬 Singapore LTA
* Custom URL ingestion
* **Feed Rendering** — Automatic detection & rendering of video, MJPEG, HLS, embed, satellite tile, and image feeds
* **Clustered Map Display** — Green dots cluster with count labels, decluster on zoom
- **CCTV Mesh** — 2,000+ live traffic cameras from:
- 🇬🇧 Transport for London JamCams
- 🇺🇸 Austin, TX TxDOT
- 🇺🇸 NYC DOT
- 🇸🇬 Singapore LTA
- Custom URL ingestion
- **Feed Rendering** — Automatic detection & rendering of video, MJPEG, HLS, embed, satellite tile, and image feeds
- **Clustered Map Display** — Green dots cluster with count labels, decluster on zoom
### 📡 Signal Intelligence
* **GPS Jamming Detection** — Real-time analysis of aircraft NAC-P (Navigation Accuracy Category) values
* Grid-based aggregation identifies interference zones
* Red overlay squares with "GPS JAM XX%" severity labels
* **Radio Intercept Panel** — Scanner-style UI for monitoring communications
### 🔥 Environmental & Infrastructure Monitoring
* **NASA FIRMS Fire Hotspots (24h)** — 5,000+ global thermal anomalies from NOAA-20 VIIRS satellite, updated every cycle. Flame-shaped icons color-coded by fire radiative power (FRP): yellow (low), orange, red, dark red (intense). Clustered at low zoom with fire-shaped cluster markers.
* **Space Weather Badge** — Live NOAA geomagnetic storm indicator in the bottom status bar. Color-coded Kp index: green (quiet), yellow (active), red (storm G1G5). Data from SWPC planetary K-index 1-minute feed.
* **Internet Outage Monitoring** — Regional internet connectivity alerts from Georgia Tech IODA. Grey markers at affected regions with severity percentage. Uses only reliable datasources (BGP routing tables, active ping probing) — no telescope or interpolated data.
* **Data Center Mapping** — 2,000+ global data centers plotted from a curated dataset. Clustered purple markers with server-rack icons. Click for operator, location, and automatic internet outage cross-referencing by country.
- **GPS Jamming Detection** — Real-time analysis of aircraft NAC-P (Navigation Accuracy Category) values
- Grid-based aggregation identifies interference zones
- Red overlay squares with "GPS JAM XX%" severity labels
- **Radio Intercept Panel** — Scanner-style UI for monitoring communications
### 🌐 Additional Layers
* **Earthquakes (24h)** — USGS real-time earthquake feed with magnitude-scaled markers
* **Day/Night Cycle** — Solar terminator overlay showing global daylight/darkness
* **Global Markets Ticker** — Live financial market indices (minimizable)
* **Measurement Tool** — Point-to-point distance & bearing measurement on the map
* **LOCATE Bar** — Search by coordinates (31.8, 34.8) or place name (Tehran, Strait of Hormuz) to fly directly to any location — geocoded via OpenStreetMap Nominatim
![Gaza](https://github.com/user-attachments/assets/f2c953b2-3528-4360-af5a-7ea34ff28489)
- **Earthquakes (24h)** — USGS real-time earthquake feed with magnitude-scaled markers
- **Day/Night Cycle** — Solar terminator overlay showing global daylight/darkness
- **Global Markets Ticker** — Live financial market indices (minimizable)
- **Measurement Tool** — Point-to-point distance & bearing measurement on the map
---
@@ -240,7 +92,7 @@ If the dashboard still shows old data after updating:
│ │ Map Render │ │ Intel │ │ Markets/Radio │ │
│ └──────┬──────┘ └────┬─────┘ └───────┬───────┘ │
│ └────────────────┼──────────────────┘ │
│ │ REST API (60s / 120s) │
│ │ REST API (15s / 60s)
├──────────────────────────┼─────────────────────────────┤
│ BACKEND (FastAPI) │
│ │ │
@@ -248,7 +100,7 @@ If the dashboard still shows old data after updating:
│ │ Data Fetcher (Scheduler) │ │
│ │ │ │
│ │ ┌──────────┬──────────┬──────────┬───────────┐ │ │
│ │ │ OpenSky │ adsb.lol │CelesTrak │ USGS │ │ │
│ │ │ OpenSky │ adsb.lol │ N2YO │ USGS │ │ │
│ │ │ Flights │ Military │ Sats │ Quakes │ │ │
│ │ ├──────────┼──────────┼──────────┼───────────┤ │ │
│ │ │ AIS WS │ Carrier │ GDELT │ CCTV │ │ │
@@ -256,9 +108,6 @@ If the dashboard still shows old data after updating:
│ │ ├──────────┼──────────┼──────────┼───────────┤ │ │
│ │ │ DeepState│ RSS │ Region │ GPS │ │ │
│ │ │ Frontline│ Intel │ Dossier │ Jamming │ │ │
│ │ ├──────────┼──────────┼──────────┼───────────┤ │ │
│ │ │ NASA │ NOAA │ IODA │ KiwiSDR │ │ │
│ │ │ FIRMS │ Space Wx│ Outages │ Radios │ │ │
│ │ └──────────┴──────────┴──────────┴───────────┘ │ │
│ └──────────────────────────────────────────────────┘ │
└────────────────────────────────────────────────────────┘
@@ -273,7 +122,7 @@ If the dashboard still shows old data after updating:
| [OpenSky Network](https://opensky-network.org) | Commercial & private flights | ~60s | Optional (anonymous limited) |
| [adsb.lol](https://adsb.lol) | Military aircraft | ~60s | No |
| [aisstream.io](https://aisstream.io) | AIS vessel positions | Real-time WebSocket | **Yes** |
| [CelesTrak](https://celestrak.org) | Satellite orbital positions (TLE + SGP4) | ~60s | No |
| [N2YO](https://www.n2yo.com) | Satellite orbital positions | ~60s | **Yes** |
| [USGS Earthquake](https://earthquake.usgs.gov) | Global seismic events | ~60s | No |
| [GDELT Project](https://www.gdeltproject.org) | Global conflict events | ~6h | No |
| [DeepState Map](https://deepstatemap.live) | Ukraine frontline | ~30min | No |
@@ -284,90 +133,42 @@ If the dashboard still shows old data after updating:
| [RestCountries](https://restcountries.com) | Country profile data | On-demand (cached 24h) | No |
| [Wikidata SPARQL](https://query.wikidata.org) | Head of state data | On-demand (cached 24h) | No |
| [Wikipedia API](https://en.wikipedia.org/api) | Location summaries & aircraft images | On-demand (cached) | No |
| [NASA GIBS](https://gibs.earthdata.nasa.gov) | MODIS Terra daily satellite imagery | Daily (24-48h delay) | No |
| [Esri World Imagery](https://www.arcgis.com) | High-res satellite basemap | Static (periodically updated) | No |
| [MS Planetary Computer](https://planetarycomputer.microsoft.com) | Sentinel-2 L2A scenes (right-click) | On-demand | No |
| [KiwiSDR](https://kiwisdr.com) | Public SDR receiver locations | ~30min | No |
| [OSM Nominatim](https://nominatim.openstreetmap.org) | Place name geocoding (LOCATE bar) | On-demand | No |
| [NASA FIRMS](https://firms.modaps.eosdis.nasa.gov) | NOAA-20 VIIRS fire/thermal hotspots | ~120s | No |
| [NOAA SWPC](https://services.swpc.noaa.gov) | Space weather Kp index & solar events | ~120s | No |
| [IODA (Georgia Tech)](https://ioda.inetintel.cc.gatech.edu) | Regional internet outage alerts | ~120s | No |
| [DC Map (GitHub)](https://github.com/Ringmast4r/Data-Center-Map---Global) | Global data center locations | Static (cached 7d) | No |
| [CARTO Basemaps](https://carto.com) | Dark map tiles | Continuous | No |
---
## 🚀 Getting Started
### 🐳 Docker / Podman Setup (Recommended for Self-Hosting)
### 🐳 Docker Setup (Recommended for Self-Hosting)
The repo includes a `docker-compose.yml` that builds both images locally.
You can run the dashboard easily using the pre-built Docker images hosted on GitHub Container Registry (GHCR).
```bash
git clone https://github.com/BigBodyCobain/Shadowbroker.git
cd Shadowbroker
# Add your API keys in a repo-root .env file (optional — see Environment Variables below)
./compose.sh up -d
```
Open `http://localhost:3000` to view the dashboard.
> **Deploying publicly or on a LAN?** No configuration needed for most setups.
> The frontend proxies all API calls through the Next.js server to `BACKEND_URL`,
> which defaults to `http://backend:8000` (Docker internal networking).
> Port 8000 does not need to be exposed externally.
>
> If your backend runs on a **different host or port**, set `BACKEND_URL` at runtime — no rebuild required:
>
> ```bash
> # Linux / macOS
> BACKEND_URL=http://myserver.com:9096 docker-compose up -d
>
> # Podman (via compose.sh wrapper)
> BACKEND_URL=http://192.168.1.50:9096 ./compose.sh up -d
>
> # Windows (PowerShell)
> $env:BACKEND_URL="http://myserver.com:9096"; docker-compose up -d
>
> # Or add to a .env file next to docker-compose.yml:
> # BACKEND_URL=http://myserver.com:9096
> ```
If you prefer to call the container engine directly, Podman users can run `podman compose up -d`, or force the wrapper to use Podman with `./compose.sh --engine podman up -d`.
Depending on your local Podman configuration, `podman compose` may still delegate to an external compose provider while talking to the Podman socket.
---
### 🐋 Standalone Deploy (Portainer, Uncloud, NAS, etc.)
No need to clone the repo. Use the pre-built images published to the GitHub Container Registry.
Create a `docker-compose.yml` with the following content and deploy it directly — paste it into Portainer's stack editor, `uncloud deploy`, or any Docker host:
1. Create a `docker-compose.yml` file:
```yaml
version: '3.8'
services:
backend:
image: ghcr.io/bigbodycobain/shadowbroker-backend:latest
image: ghcr.io/<your-username>/live-risk-dashboard-backend:main
container_name: shadowbroker-backend
ports:
- "8000:8000"
environment:
- AIS_API_KEY=your_aisstream_key # Required — get one free at aisstream.io
- OPENSKY_CLIENT_ID= # Optional — higher flight data rate limits
- OPENSKY_CLIENT_SECRET= # Optional — paired with Client ID above
- LTA_ACCOUNT_KEY= # Optional — Singapore CCTV cameras
- CORS_ORIGINS= # Optional — comma-separated allowed origins
- AISSTREAM_API_KEY=${AISSTREAM_API_KEY}
- N2YO_API_KEY=${N2YO_API_KEY}
# Add other required environment variables here
volumes:
- backend_data:/app/data
restart: unless-stopped
frontend:
image: ghcr.io/bigbodycobain/shadowbroker-frontend:latest
image: ghcr.io/<your-username>/live-risk-dashboard-frontend:main
container_name: shadowbroker-frontend
ports:
- "3000:3000"
environment:
- BACKEND_URL=http://backend:8000 # Docker internal networking — no rebuild needed
- NEXT_PUBLIC_API_URL=http://localhost:8000
depends_on:
- backend
restart: unless-stopped
@@ -376,9 +177,9 @@ volumes:
backend_data:
```
> **How it works:** The frontend container proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. The browser only ever talks to port 3000 — port 8000 does not need to be exposed externally.
>
> `BACKEND_URL` is a plain runtime environment variable (not a build-time `NEXT_PUBLIC_*`), so you can change it in Portainer, Uncloud, or any compose editor without rebuilding the image. Set it to the address where your backend is reachable from inside the Docker network (e.g. `http://backend:8000`, `http://192.168.1.50:8000`).
1. Create a `.env` file in the same directory with your API keys.
2. Run `docker-compose up -d`.
3. Access the dashboard at `http://localhost:3000`.
---
@@ -387,7 +188,7 @@ volumes:
If you just want to run the dashboard without dealing with terminal commands:
1. Go to the **[Releases](../../releases)** tab on the right side of this GitHub page.
2. Download the latest `.zip` file from the release.
2. Download the `ShadowBroker_v0.2.zip` file.
3. Extract the folder to your computer.
4. **Windows:** Double-click `start.bat`.
**Mac/Linux:** Open terminal, type `chmod +x start.sh`, and run `./start.sh`.
@@ -401,10 +202,9 @@ If you want to modify the code or run from source:
#### Prerequisites
* **Node.js** 18+ and **npm** — [nodejs.org](https://nodejs.org/)
* **Python** 3.10, 3.11, or 3.12 with `pip` — [python.org](https://www.python.org/downloads/) (**check "Add to PATH"** during install)
* ⚠️ Python 3.13+ may have compatibility issues with some dependencies. **3.11 or 3.12 is recommended.**
* API keys for: `aisstream.io` (required), and optionally `opensky-network.org` (OAuth2), `lta.gov.sg`
- **Node.js** 18+ and **npm**
- **Python** 3.10+ with `pip`
- API keys for: `aisstream.io`, `n2yo.com` (and optionally `opensky-network.org`, `lta.gov.sg`)
### Installation
@@ -418,12 +218,13 @@ cd backend
python -m venv venv
venv\Scripts\activate # Windows
# source venv/bin/activate # macOS/Linux
pip install -r requirements.txt # includes pystac-client for Sentinel-2
pip install -r requirements.txt
# Create .env with your API keys
echo "AIS_API_KEY=your_aisstream_key" >> .env
echo "OPENSKY_CLIENT_ID=your_opensky_client_id" >> .env
echo "OPENSKY_CLIENT_SECRET=your_opensky_secret" >> .env
echo "AISSTREAM_API_KEY=your_key_here" >> .env
echo "N2YO_API_KEY=your_key_here" >> .env
echo "OPENSKY_USERNAME=your_user" >> .env
echo "OPENSKY_PASSWORD=your_pass" >> .env
# Frontend setup
cd ../frontend
@@ -439,29 +240,8 @@ npm run dev
This starts:
* **Next.js** frontend on `http://localhost:3000`
* **FastAPI** backend on `http://localhost:8000`
### Local AIS Receiver (Optional)
You can feed your own AIS ship data into ShadowBroker using an RTL-SDR dongle and [AIS-catcher](https://github.com/jvde-github/AIS-catcher), an open-source AIS decoder. This gives you real-time coverage of vessels in your local area — no API key needed.
1. Plug in an RTL-SDR dongle
2. Install AIS-catcher ([releases](https://github.com/jvde-github/AIS-catcher/releases)) or use the Docker image:
```bash
docker run -d --device /dev/bus/usb \
ghcr.io/jvde-github/ais-catcher -H http://host.docker.internal:4000/api/ais/feed interval 10
```
3. Or run natively:
```bash
AIS-catcher -H http://localhost:4000/api/ais/feed interval 10
```
AIS-catcher decodes VHF radio signals on 161.975 MHz and 162.025 MHz and POSTs decoded vessel data to ShadowBroker every 10 seconds. Ships detected by your SDR antenna appear alongside the global AIS stream.
**Docker (ARM/Raspberry Pi):** See [docker-shipfeeder](https://github.com/sdr-enthusiasts/docker-shipfeeder) for a production-ready Docker image optimized for ARM.
**Note:** AIS range depends on your antenna — typically 20-40 nautical miles with a basic setup, 60+ nm with a marine VHF antenna at elevation.
- **Next.js** frontend on `http://localhost:3000`
- **FastAPI** backend on `http://localhost:8000`
---
@@ -480,18 +260,11 @@ All layers are independently toggleable from the left panel:
| Carriers / Mil / Cargo | ✅ ON | Navy carriers, cargo ships, tankers |
| Civilian Vessels | ❌ OFF | Yachts, fishing, recreational |
| Cruise / Passenger | ✅ ON | Cruise ships and ferries |
| Tracked Yachts | ✅ ON | Billionaire & oligarch superyachts (Yacht-Alert DB) |
| Earthquakes (24h) | ✅ ON | USGS seismic events |
| CCTV Mesh | ❌ OFF | Surveillance camera network |
| Ukraine Frontline | ✅ ON | Live warfront positions |
| Global Incidents | ✅ ON | GDELT conflict events |
| GPS Jamming | ✅ ON | NAC-P degradation zones |
| MODIS Terra (Daily) | ❌ OFF | NASA GIBS daily satellite imagery |
| High-Res Satellite | ❌ OFF | Esri sub-meter satellite imagery |
| KiwiSDR Receivers | ❌ OFF | Public SDR radio receivers |
| Fire Hotspots (24h) | ❌ OFF | NASA FIRMS VIIRS thermal anomalies |
| Internet Outages | ❌ OFF | IODA regional connectivity alerts |
| Data Centers | ❌ OFF | Global data center locations (2,000+) |
| Day / Night Cycle | ✅ ON | Solar terminator overlay |
---
@@ -500,15 +273,14 @@ All layers are independently toggleable from the left panel:
The platform is optimized for handling massive real-time datasets:
* **Gzip Compression** — API payloads compressed ~92% (11.6 MB → 915 KB)
* **ETag Caching** — `304 Not Modified` responses skip redundant JSON parsing
* **Viewport Culling** — Only features within the visible map bounds (+20% buffer) are rendered
* **Imperative Map Updates** — High-volume layers (flights, satellites, fires) bypass React reconciliation via direct `setData()` calls
* **Clustered Rendering** — Ships, CCTV, earthquakes, and data centers use MapLibre clustering to reduce feature count
* **Debounced Viewport Updates** — 300ms debounce prevents GeoJSON rebuild thrash during pan/zoom; 2s debounce on dense layers (satellites, fires)
* **Position Interpolation** — Smooth 10s tick animation between data refreshes
* **React.memo** — Heavy components wrapped to prevent unnecessary re-renders
* **Coordinate Precision** — Lat/lng rounded to 5 decimals (~1m) to reduce JSON size
- **Gzip Compression** — API payloads compressed ~92% (11.6 MB → 915 KB)
- **ETag Caching** — `304 Not Modified` responses skip redundant JSON parsing
- **Viewport Culling** — Only features within the visible map bounds (+20% buffer) are rendered
- **Clustered Rendering** — Ships, CCTV, and earthquakes use MapLibre clustering to reduce feature count
- **Debounced Viewport Updates** — 300ms debounce prevents GeoJSON rebuild thrash during pan/zoom
- **Position Interpolation** — Smooth 10s tick animation between data refreshes
- **React.memo** — Heavy components wrapped to prevent unnecessary re-renders
- **Coordinate Precision** — Lat/lng rounded to 5 decimals (~1m) to reduce JSON size
---
@@ -520,8 +292,6 @@ live-risk-dashboard/
│ ├── main.py # FastAPI app, middleware, API routes
│ ├── carrier_cache.json # Persisted carrier OSINT positions
│ ├── cctv.db # SQLite CCTV camera database
│ ├── config/
│ │ └── news_feeds.json # User-customizable RSS feed list (persists across restarts)
│ └── services/
│ ├── data_fetcher.py # Core scheduler — fetches all data sources
│ ├── ais_stream.py # AIS WebSocket client (25K+ vessels)
@@ -530,11 +300,8 @@ live-risk-dashboard/
│ ├── geopolitics.py # GDELT + Ukraine frontline fetcher
│ ├── region_dossier.py # Right-click country/city intelligence
│ ├── radio_intercept.py # Scanner radio feed integration
│ ├── kiwisdr_fetcher.py # KiwiSDR receiver scraper
│ ├── sentinel_search.py # Sentinel-2 STAC imagery search
│ ├── network_utils.py # HTTP client with curl fallback
── api_settings.py # API key management
│ └── news_feed_config.py # RSS feed config manager (add/remove/weight feeds)
── api_settings.py # API key management
├── frontend/
│ ├── src/
@@ -551,8 +318,7 @@ live-risk-dashboard/
│ │ ├── MarketsPanel.tsx # Global financial markets ticker
│ │ ├── RadioInterceptPanel.tsx # Scanner-style radio panel
│ │ ├── FindLocateBar.tsx # Search/locate bar
│ │ ├── ChangelogModal.tsx # Version changelog popup
│ │ ├── SettingsPanel.tsx # App settings (API Keys + News Feed manager)
│ │ ├── SettingsPanel.tsx # App settings
│ │ ├── ScaleBar.tsx # Map scale indicator
│ │ ├── WikiImage.tsx # Wikipedia image fetcher
│ │ └── ErrorBoundary.tsx # Crash recovery wrapper
@@ -563,31 +329,26 @@ live-risk-dashboard/
## 🔑 Environment Variables
### Backend (`backend/.env`)
Create a `.env` file in the `backend/` directory:
```env
# Required
AIS_API_KEY=your_aisstream_key # Maritime vessel tracking (aisstream.io)
AISSTREAM_API_KEY=your_aisstream_key # Maritime vessel tracking
N2YO_API_KEY=your_n2yo_key # Satellite position data
# Optional (enhances data quality)
OPENSKY_CLIENT_ID=your_opensky_client_id # OAuth2 — higher rate limits for flight data
OPENSKY_CLIENT_SECRET=your_opensky_secret # OAuth2 — paired with Client ID above
LTA_ACCOUNT_KEY=your_lta_key # Singapore CCTV cameras
OPENSKY_CLIENT_ID=your_opensky_client_id # Higher rate limits for flight data
OPENSKY_CLIENT_SECRET=your_opensky_secret
LTA_ACCOUNT_KEY=your_lta_key # Singapore CCTV cameras
```
### Frontend
| Variable | Where to set | Purpose |
|---|---|---|
| `BACKEND_URL` | `environment` in `docker-compose.yml`, or shell env | URL the Next.js server uses to proxy API calls to the backend. Defaults to `http://backend:8000`. **Runtime variable — no rebuild needed.** |
**How it works:** The frontend proxies all `/api/*` requests through the Next.js server to `BACKEND_URL` using Docker's internal networking. Browsers only talk to port 3000; port 8000 never needs to be exposed externally. For local dev without Docker, `BACKEND_URL` defaults to `http://localhost:8000`.
---
## ⚠️ Disclaimer
This tool is built entirely on publicly available, open-source intelligence (OSINT) data. No classified, restricted, or non-public data is used. Carrier positions are estimates based on public reporting. The military-themed UI is purely aesthetic.
This is an **educational and research tool** built entirely on publicly available, open-source intelligence (OSINT) data. No classified, restricted, or non-public data sources are used. Carrier positions are estimates based on public reporting. The military-themed UI is purely aesthetic.
**Do not use this tool for any operational, military, or intelligence purpose.**
---
+1
View File
@@ -0,0 +1 @@
ba57965389036194d6dd60e6de33d2e1e1bbf20b
+1 -23
View File
@@ -4,29 +4,7 @@ __pycache__/
.env
.pytest_cache/
.coverage
.git/
node_modules/
cctv.db
*.sqlite
*.db
# Debug/log files
*.json
*.txt
!requirements.txt
!requirements-dev.txt
*.html
*.xlsx
# Debug/cache JSON (keep package*.json and data files)
ais_cache.json
carrier_cache.json
carrier_positions.json
dump.json
debug_fast.json
nyc_full.json
nyc_sample.json
tmp_fast.json
# Test files (not needed in production image)
test_*.py
tests/
-23
View File
@@ -1,23 +0,0 @@
# ShadowBroker Backend — Environment Variables
# Copy this file to .env and fill in your keys:
# cp .env.example .env
# ── Required Keys ──────────────────────────────────────────────
# Without these, the corresponding data layers will be empty.
OPENSKY_CLIENT_ID= # https://opensky-network.org/ — free account, OAuth2 client ID
OPENSKY_CLIENT_SECRET= # OAuth2 client secret from your OpenSky dashboard
AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
# ── Optional ───────────────────────────────────────────────────
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
# Admin key — protects sensitive endpoints (API key management, system update).
# If unset, these endpoints remain open (fine for local dev).
# Set this in production and enter the same key in Settings → Admin Key.
# ADMIN_KEY=your-secret-admin-key-here
# LTA Singapore traffic cameras — leave blank to skip this data source.
# LTA_ACCOUNT_KEY=
+3 -16
View File
@@ -1,23 +1,10 @@
FROM python:3.10-slim-bookworm
FROM python:3.10-slim
WORKDIR /app
# Install Node.js (for AIS WebSocket proxy) and curl (for network fallback)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt \
&& playwright install --with-deps chromium
# Install Node.js dependencies (ws module for AIS WebSocket proxy)
# Copy manifests first so this layer is cached unless deps change
COPY package*.json ./
RUN npm ci --omit=dev
RUN pip install --no-cache-dir -r requirements.txt
# Copy source code
COPY . .
+18 -40
View File
@@ -1,23 +1,24 @@
const WebSocket = require('ws');
const readline = require('readline');
const args = process.argv.slice(2);
const API_KEY = args[0] || process.env.AIS_API_KEY;
const API_KEY = args[0] || '75cc39af03c9cc23c90e8a7b3c3bc2b2a507c5fb';
if (!API_KEY) {
console.error("FATAL: AIS_API_KEY is not set. WebSocket proxy cannot start.");
process.exit(1);
}
const FILTER = [
// US Aircraft Carriers and major naval groups
{ "MMSI": 338000000 }, { "MMSI": 338100000 }, // US Navy general prefixes
// Plus let's grab some global shipping for density
{ "BoundingBoxes": [[[-90, -180], [90, 180]]] }
];
// Start with global coverage, until frontend updates it
let currentBboxes = [[[-90, -180], [90, 180]]];
let activeWs = null;
function connect() {
const ws = new WebSocket('wss://stream.aisstream.io/v0/stream');
function sendSub(ws) {
if (ws && ws.readyState === WebSocket.OPEN) {
ws.on('open', () => {
const subMsg = {
APIKey: API_KEY,
BoundingBoxes: currentBboxes,
BoundingBoxes: [
[[-90, -180], [90, 180]]
],
FilterMessageTypes: [
"PositionReport",
"ShipStaticData",
@@ -25,39 +26,17 @@ function sendSub(ws) {
]
};
ws.send(JSON.stringify(subMsg));
}
}
// Listen for dynamic bounding box updates via stdin from Python orchestrator
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
rl.on('line', (line) => {
try {
const cmd = JSON.parse(line);
if (cmd.type === "update_bbox" && cmd.bboxes) {
currentBboxes = cmd.bboxes;
if (activeWs) sendSub(activeWs); // Resend subscription (swap and replace)
}
} catch (e) {}
});
function connect() {
const ws = new WebSocket('wss://stream.aisstream.io/v0/stream');
activeWs = ws;
ws.on('open', () => {
sendSub(ws);
});
ws.on('message', (data) => {
// Output raw AIS message JSON to stdout so Python can consume it
// We ensure exactly one JSON object per line.
try {
const parsed = JSON.parse(data);
console.log(JSON.stringify(parsed));
} catch (e) {}
} catch (e) {
// ignore non-json
}
});
ws.on('error', (err) => {
@@ -65,7 +44,6 @@ function connect() {
});
ws.on('close', () => {
activeWs = null;
console.error("WebSocket Proxy Closed. Reconnecting in 5s...");
setTimeout(connect, 5000);
});
+112
View File
@@ -0,0 +1,112 @@
import zipfile
import xml.etree.ElementTree as ET
import re
import csv
import os
xlsx_path = r"f:\Codebase\Oracle\live-risk-dashboard\TheAirTraffic Database.xlsx"
output_path = r"f:\Codebase\Oracle\live-risk-dashboard\backend\xlsx_analysis.txt"
def parse_xlsx_sheet(z, shared_strings, sheet_num):
ns = {'s': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
sheet_file = f'xl/worksheets/sheet{sheet_num}.xml'
if sheet_file not in z.namelist():
return []
ws_xml = z.read(sheet_file)
ws_root = ET.fromstring(ws_xml)
rows = []
for row in ws_root.findall('.//s:sheetData/s:row', ns):
cells = {}
for cell in row.findall('s:c', ns):
cell_ref = cell.get('r', '')
cell_type = cell.get('t', '')
val_elem = cell.find('s:v', ns)
val = val_elem.text if val_elem is not None else ''
if cell_type == 's' and val:
val = shared_strings[int(val)]
col = re.match(r'([A-Z]+)', cell_ref).group(1) if re.match(r'([A-Z]+)', cell_ref) else ''
cells[col] = val
rows.append(cells)
return rows
with open(output_path, 'w', encoding='utf-8') as out:
with zipfile.ZipFile(xlsx_path, 'r') as z:
shared_strings = []
if 'xl/sharedStrings.xml' in z.namelist():
ss_xml = z.read('xl/sharedStrings.xml')
root = ET.fromstring(ss_xml)
ns = {'s': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
for si in root.findall('.//s:si', ns):
texts = si.findall('.//s:t', ns)
val = ''.join(t.text or '' for t in texts)
shared_strings.append(val)
all_entries = []
for sheet_idx in range(1, 5):
rows = parse_xlsx_sheet(z, shared_strings, sheet_idx)
if not rows:
continue
out.write(f"\n=== SHEET {sheet_idx}: {len(rows)} rows ===\n")
# Print first 5 rows
for i in range(min(5, len(rows))):
for col in sorted(rows[i].keys(), key=lambda x: (len(x), x)):
val = rows[i][col]
if val:
out.write(f" Row{i} {col}: '{val[:80]}'\n")
out.write("\n")
for r in rows[1:]:
for col, val in r.items():
val = str(val).strip()
n_regs = re.findall(r'N\d{1,5}[A-Z]{0,2}', val)
owner = r.get('B', r.get('A', '')).strip()
aircraft_type = r.get('C', r.get('D', '')).strip()
for reg in n_regs:
all_entries.append({
'registration': reg.upper(),
'owner': owner,
'type': aircraft_type,
'sheet': sheet_idx
})
unique_regs = set(e['registration'] for e in all_entries)
out.write(f"\nTOTAL ENTRIES: {len(all_entries)}\n")
out.write(f"UNIQUE REGISTRATIONS: {len(unique_regs)}\n")
csv_path = r"f:\Codebase\Oracle\live-risk-dashboard\PLANEALERTLIST\plane-alert-db-main\plane-alert-db.csv"
existing = {}
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
icao = row.get('$ICAO', '').strip().upper()
reg = row.get('$Registration', '').strip().upper()
if reg:
existing[reg] = {
'icao': icao,
'category': row.get('Category', ''),
'operator': row.get('$Operator', ''),
}
already_in = unique_regs & set(existing.keys())
missing = unique_regs - set(existing.keys())
out.write(f"\nplane-alert-db: {len(existing)} registrations\n")
out.write(f"Already covered: {len(already_in)}\n")
out.write(f"MISSING: {len(missing)}\n")
out.write(f"\n--- ALREADY TRACKED ---\n")
seen = set()
for e in all_entries:
if e['registration'] in already_in and e['registration'] not in seen:
info = existing[e['registration']]
out.write(f" {e['owner'][:40]:40s} {e['registration']:10s} DB_CAT: {info['category'][:25]:25s} DB_OP: {info['operator'][:40]}\n")
seen.add(e['registration'])
out.write(f"\n--- MISSING (NEED TO ADD) ---\n")
seen = set()
for e in all_entries:
if e['registration'] in missing and e['registration'] not in seen:
out.write(f" {e['owner'][:40]:40s} {e['registration']:10s} TYPE: {e['type'][:30]}\n")
seen.add(e['registration'])
print(f"Analysis written to {output_path}")
+17
View File
@@ -0,0 +1,17 @@
import requests
regions = [
{"lat": 39.8, "lon": -98.5, "dist": 2000}, # USA
{"lat": 50.0, "lon": 15.0, "dist": 2000}, # Europe
{"lat": 35.0, "lon": 105.0, "dist": 2000} # Asia / China
]
for r in regions:
url = f"https://api.adsb.lol/v2/lat/{r['lat']}/lon/{r['lon']}/dist/{r['dist']}"
res = requests.get(url, timeout=10)
if res.status_code == 200:
data = res.json()
acs = data.get("ac", [])
print(f"Region lat:{r['lat']} lon:{r['lon']} dist:{r['dist']} -> Flights: {len(acs)}")
else:
print(f"Error for Region lat:{r['lat']} lon:{r['lon']}: HTTP {res.status_code}")
+10
View File
@@ -0,0 +1,10 @@
import sqlite3
import os
db_path = os.path.join(os.path.dirname(__file__), 'cctv.db')
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("DELETE FROM cameras WHERE id LIKE 'OSM-%'")
print(f"Deleted {cur.rowcount} OSM cameras from DB.")
conn.commit()
conn.close()
-44
View File
@@ -1,44 +0,0 @@
{
"feeds": [
{
"name": "NPR",
"url": "https://feeds.npr.org/1004/rss.xml",
"weight": 4
},
{
"name": "BBC",
"url": "http://feeds.bbci.co.uk/news/world/rss.xml",
"weight": 3
},
{
"name": "AlJazeera",
"url": "https://www.aljazeera.com/xml/rss/all.xml",
"weight": 2
},
{
"name": "NYT",
"url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
"weight": 1
},
{
"name": "GDACS",
"url": "https://www.gdacs.org/xml/rss.xml",
"weight": 5
},
{
"name": "NHK",
"url": "https://www3.nhk.or.jp/nhkworld/rss/world.xml",
"weight": 3
},
{
"name": "CNA",
"url": "https://www.channelnewsasia.com/rssfeed/8395986",
"weight": 3
},
{
"name": "Mercopress",
"url": "https://en.mercopress.com/rss/",
"weight": 3
}
]
}
@@ -1 +0,0 @@
430ac93c4f7c4fb5a3e596ec38e3b7794c731cc1
@@ -1 +0,0 @@
50180452f0522f50b2624161407cb8ccc80a00db
File diff suppressed because one or more lines are too long
+1
View File
@@ -0,0 +1 @@
{}
@@ -1 +0,0 @@
38a18cbbf1acbec5eb9266b809c28d31e2941c53
File diff suppressed because one or more lines are too long
-122
View File
@@ -1,122 +0,0 @@
{
"319225400": {
"name": "KORU",
"owner": "Jeff Bezos",
"builder": "Oceanco",
"length_m": 127,
"year": 2023,
"category": "Tech Billionaire",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Koru_(yacht)"
},
"538072122": {
"name": "LAUNCHPAD",
"owner": "Mark Zuckerberg",
"builder": "Feadship",
"length_m": 118,
"year": 2024,
"category": "Tech Billionaire",
"flag": "Marshall Islands",
"link": "https://www.superyachtfan.com/yacht/launchpad/"
},
"319032600": {
"name": "MUSASHI",
"owner": "Larry Ellison",
"builder": "Feadship",
"length_m": 88,
"year": 2011,
"category": "Tech Billionaire",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Musashi_(yacht)"
},
"319011000": {
"name": "RISING SUN",
"owner": "David Geffen",
"builder": "Lurssen",
"length_m": 138,
"year": 2004,
"category": "Celebrity / Mogul",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Rising_Sun_(yacht)"
},
"310593000": {
"name": "ECLIPSE",
"owner": "Roman Abramovich",
"builder": "Blohm+Voss",
"length_m": 162,
"year": 2010,
"category": "Oligarch Watch",
"flag": "Bermuda",
"link": "https://en.wikipedia.org/wiki/Eclipse_(yacht)"
},
"310792000": {
"name": "SOLARIS",
"owner": "Roman Abramovich",
"builder": "Lloyd Werft",
"length_m": 140,
"year": 2021,
"category": "Oligarch Watch",
"flag": "Bermuda",
"link": "https://en.wikipedia.org/wiki/Solaris_(yacht)"
},
"319094900": {
"name": "DILBAR",
"owner": "Alisher Usmanov (seized)",
"builder": "Lurssen",
"length_m": 156,
"year": 2016,
"category": "Oligarch Watch",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Dilbar_(yacht)"
},
"273610820": {
"name": "NORD",
"owner": "Alexei Mordashov",
"builder": "Lurssen",
"length_m": 142,
"year": 2021,
"category": "Oligarch Watch",
"flag": "Russia",
"link": "https://en.wikipedia.org/wiki/Nord_(yacht)"
},
"319179200": {
"name": "SCHEHERAZADE",
"owner": "Eduard Khudainatov (alleged Putin)",
"builder": "Lurssen",
"length_m": 140,
"year": 2020,
"category": "Oligarch Watch",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Scheherazade_(yacht)"
},
"319112900": {
"name": "AMADEA",
"owner": "Suleiman Kerimov (seized by US DOJ)",
"builder": "Lurssen",
"length_m": 106,
"year": 2017,
"category": "Oligarch Watch",
"flag": "Cayman Islands",
"link": "https://en.wikipedia.org/wiki/Amadea_(yacht)"
},
"319156800": {
"name": "BRAVO EUGENIA",
"owner": "Jerry Jones",
"builder": "Oceanco",
"length_m": 109,
"year": 2018,
"category": "Celebrity / Mogul",
"flag": "Cayman Islands",
"link": "https://www.superyachtfan.com/yacht/bravo-eugenia/"
},
"319137200": {
"name": "LADY S",
"owner": "Dan Snyder",
"builder": "Feadship",
"length_m": 93,
"year": 2019,
"category": "Celebrity / Mogul",
"flag": "Cayman Islands",
"link": "https://www.superyachtfan.com/yacht/lady-s/"
}
}
+1
View File
@@ -0,0 +1 @@
5c3b1c768973ca54e9a1befee8dc075f38e8cc56
+1
View File
@@ -0,0 +1 @@
2b64633521ffb6f06da36e19f5c8eb86979e2187
+25
View File
@@ -0,0 +1,25 @@
import re
import json
try:
with open('liveua_test.html', 'r', encoding='utf-8') as f:
html = f.read()
m = re.search(r"var\s+ovens\s*=\s*(.*?);(?!function)", html, re.DOTALL)
if m:
json_str = m.group(1)
# Handle if it is a string containing base64
if json_str.startswith("'") or json_str.startswith('"'):
json_str = json_str.strip('"\'')
import base64
import urllib.parse
json_str = base64.b64decode(urllib.parse.unquote(json_str)).decode('utf-8')
data = json.loads(json_str)
with open('out_liveua.json', 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
print(f"Successfully extracted {len(data)} ovens items.")
else:
print("var ovens not found.")
except Exception as e:
print("Error:", e)
File diff suppressed because one or more lines are too long
+77 -396
View File
@@ -1,134 +1,22 @@
import os
import time
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
_start_time = time.time()
# ---------------------------------------------------------------------------
# Docker Swarm Secrets support
# For each VAR below, if VAR_FILE is set (e.g. AIS_API_KEY_FILE=/run/secrets/AIS_API_KEY),
# the file is read and its trimmed content is placed into VAR.
# This MUST run before service imports — modules read os.environ at import time.
# ---------------------------------------------------------------------------
_SECRET_VARS = [
"AIS_API_KEY",
"OPENSKY_CLIENT_ID",
"OPENSKY_CLIENT_SECRET",
"LTA_ACCOUNT_KEY",
"CORS_ORIGINS",
"ADMIN_KEY",
]
for _var in _SECRET_VARS:
_file_var = f"{_var}_FILE"
_file_path = os.environ.get(_file_var)
if _file_path:
try:
with open(_file_path, "r") as _f:
_value = _f.read().strip()
if _value:
os.environ[_var] = _value
logger.info(f"Loaded secret {_var} from {_file_path}")
else:
logger.warning(f"Secret file {_file_path} for {_var} is empty")
except FileNotFoundError:
logger.error(f"Secret file {_file_path} for {_var} not found")
except Exception as _e:
logger.error(f"Failed to read secret file {_file_path} for {_var}: {_e}")
from fastapi import FastAPI, Request, Response, Query, Depends, HTTPException
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
from services.data_fetcher import start_scheduler, stop_scheduler, get_latest_data, source_timestamps
from services.data_fetcher import start_scheduler, stop_scheduler, get_latest_data
from services.ais_stream import start_ais_stream, stop_ais_stream
from services.carrier_tracker import start_carrier_tracker, stop_carrier_tracker
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
from services.schemas import HealthResponse, RefreshResponse
import uvicorn
import logging
import hashlib
import json as json_mod
import socket
import threading
limiter = Limiter(key_func=get_remote_address)
# ---------------------------------------------------------------------------
# Admin authentication — protects settings & system endpoints
# Set ADMIN_KEY in .env or Docker secrets. If unset, endpoints remain open
# for local-dev convenience but will log a startup warning.
# ---------------------------------------------------------------------------
_ADMIN_KEY = os.environ.get("ADMIN_KEY", "")
if not _ADMIN_KEY:
logger.warning("ADMIN_KEY is not set — sensitive endpoints are UNPROTECTED. "
"Set ADMIN_KEY in .env or Docker secrets for production.")
def require_admin(request: Request):
"""FastAPI dependency that rejects requests without a valid X-Admin-Key header."""
if not _ADMIN_KEY:
return # No key configured — allow all (local dev)
if request.headers.get("X-Admin-Key") != _ADMIN_KEY:
raise HTTPException(status_code=403, detail="Forbidden — invalid or missing admin key")
def _build_cors_origins():
"""Build a CORS origins whitelist: localhost + LAN IPs + env overrides.
Falls back to wildcard only if auto-detection fails entirely."""
origins = [
"http://localhost:3000",
"http://127.0.0.1:3000",
"http://localhost:8000",
"http://127.0.0.1:8000",
]
# Add this machine's LAN IPs (covers common home/office setups)
try:
hostname = socket.gethostname()
for info in socket.getaddrinfo(hostname, None, socket.AF_INET):
ip = info[4][0]
if ip not in ("127.0.0.1", "0.0.0.0"):
origins.append(f"http://{ip}:3000")
origins.append(f"http://{ip}:8000")
except Exception:
pass
# Allow user override via CORS_ORIGINS env var (comma-separated)
extra = os.environ.get("CORS_ORIGINS", "")
if extra:
origins.extend([o.strip() for o in extra.split(",") if o.strip()])
return list(set(origins)) # deduplicate
logging.basicConfig(level=logging.INFO)
@asynccontextmanager
async def lifespan(app: FastAPI):
# Validate environment variables before starting anything
from services.env_check import validate_env
validate_env(strict=True)
# Start AIS stream first — it loads the disk cache (instant ships) then
# begins accumulating live vessel data via WebSocket in the background.
start_ais_stream()
# Carrier tracker runs its own initial update_carrier_positions() internally
# in _scheduler_loop, so we do NOT call it again in the preload thread.
# Startup: Start background data fetching, AIS stream, and carrier tracker
start_carrier_tracker()
# Start the recurring scheduler (fast=60s, slow=30min).
start_ais_stream()
start_scheduler()
# Kick off the full data preload in a background thread so the server
# is listening on port 8000 instantly. The frontend's adaptive polling
# (retries every 3s) will pick up data piecemeal as each fetcher finishes.
def _background_preload():
logger.info("=== PRELOADING DATA (background — server already accepting requests) ===")
try:
update_all_data()
logger.info("=== PRELOAD COMPLETE ===")
except Exception as e:
logger.error(f"Data preload failed (non-fatal): {e}")
threading.Thread(target=_background_preload, daemon=True).start()
yield
# Shutdown: Stop all background services
stop_ais_stream()
@@ -136,14 +24,12 @@ async def lifespan(app: FastAPI):
stop_carrier_tracker()
app = FastAPI(title="Live Risk Dashboard API", lifespan=lifespan)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
from fastapi.middleware.gzip import GZipMiddleware
app.add_middleware(GZipMiddleware, minimum_size=1000)
app.add_middleware(
CORSMiddleware,
allow_origins=_build_cors_origins(),
allow_origins=["*"], # For prototyping, allow all
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
@@ -151,244 +37,113 @@ app.add_middleware(
from services.data_fetcher import update_all_data
_refresh_lock = threading.Lock()
@app.get("/api/refresh", response_model=RefreshResponse)
@limiter.limit("2/minute")
async def force_refresh(request: Request):
if not _refresh_lock.acquire(blocking=False):
return {"status": "refresh already in progress"}
def _do_refresh():
try:
update_all_data()
finally:
_refresh_lock.release()
t = threading.Thread(target=_do_refresh)
@app.get("/api/refresh")
async def force_refresh():
# Force an immediate synchronous update of the data payload
import threading
t = threading.Thread(target=update_all_data)
t.start()
return {"status": "refreshing in background"}
@app.post("/api/ais/feed")
@limiter.limit("60/minute")
async def ais_feed(request: Request):
"""Accept AIS-catcher HTTP JSON feed (POST decoded AIS messages)."""
from services.ais_stream import ingest_ais_catcher
try:
body = await request.json()
except Exception:
return Response(content='{"error":"invalid JSON"}', status_code=400, media_type="application/json")
msgs = body.get("msgs", [])
if not msgs:
return {"status": "ok", "ingested": 0}
count = ingest_ais_catcher(msgs)
return {"status": "ok", "ingested": count}
from pydantic import BaseModel
class ViewportUpdate(BaseModel):
s: float
w: float
n: float
e: float
@app.post("/api/viewport")
@limiter.limit("60/minute")
async def update_viewport(vp: ViewportUpdate, request: Request):
"""Receive frontend map bounds to dynamically choke the AIS stream."""
from services.ais_stream import update_ais_bbox
# Add a gentle 10% padding so ships don't pop-in right at the edge
pad_lat = (vp.n - vp.s) * 0.1
# handle antimeridian bounding box padding later if needed, simple for now:
pad_lng = (vp.e - vp.w) * 0.1 if vp.e > vp.w else 0
update_ais_bbox(
south=max(-90, vp.s - pad_lat),
west=max(-180, vp.w - pad_lng) if pad_lng else vp.w,
north=min(90, vp.n + pad_lat),
east=min(180, vp.e + pad_lng) if pad_lng else vp.e
)
return {"status": "ok"}
@app.get("/api/live-data")
@limiter.limit("120/minute")
async def live_data(request: Request):
async def live_data():
return get_latest_data()
def _etag_response(request: Request, payload: dict, prefix: str = "", default=None):
"""Serialize once, hash the bytes for ETag, return 304 or full response."""
content = json_mod.dumps(payload, default=default)
etag = hashlib.md5(f"{prefix}{content}".encode()).hexdigest()[:16]
@app.get("/api/live-data/fast")
async def live_data_fast(request: Request):
d = get_latest_data()
payload = {
"commercial_flights": d.get("commercial_flights", []),
"military_flights": d.get("military_flights", []),
"private_flights": d.get("private_flights", []),
"private_jets": d.get("private_jets", []),
"tracked_flights": d.get("tracked_flights", []),
"ships": d.get("ships", []),
"cctv": d.get("cctv", []),
"uavs": d.get("uavs", []),
"liveuamap": d.get("liveuamap", []),
"gps_jamming": d.get("gps_jamming", []),
}
# ETag includes last_updated timestamp so it changes on every data refresh,
# not just when item counts change (old bug: positions went stale)
last_updated = d.get("last_updated", "")
counts = "|".join(f"{k}:{len(v) if isinstance(v, list) else 0}" for k, v in payload.items())
etag = hashlib.md5(f"{last_updated}|{counts}".encode()).hexdigest()[:16]
if request.headers.get("if-none-match") == etag:
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
return Response(content=content, media_type="application/json",
headers={"ETag": etag, "Cache-Control": "no-cache"})
def _bbox_filter(items: list, s: float, w: float, n: float, e: float,
lat_key: str = "lat", lng_key: str = "lng") -> list:
"""Filter a list of dicts to those within the bounding box (with 20% padding).
Handles antimeridian crossing (e.g. w=170, e=-170)."""
pad_lat = (n - s) * 0.2
pad_lng = (e - w) * 0.2 if e > w else ((e + 360 - w) * 0.2)
s2, n2 = s - pad_lat, n + pad_lat
w2, e2 = w - pad_lng, e + pad_lng
crosses_antimeridian = w2 > e2
out = []
for item in items:
lat = item.get(lat_key)
lng = item.get(lng_key)
if lat is None or lng is None:
out.append(item) # Keep items without coords (don't filter them out)
continue
if not (s2 <= lat <= n2):
continue
if crosses_antimeridian:
if lng >= w2 or lng <= e2:
out.append(item)
else:
if w2 <= lng <= e2:
out.append(item)
return out
@app.get("/api/live-data/fast")
@limiter.limit("120/minute")
async def live_data_fast(request: Request,
s: float = Query(None, description="South bound"),
w: float = Query(None, description="West bound"),
n: float = Query(None, description="North bound"),
e: float = Query(None, description="East bound")):
d = get_latest_data()
has_bbox = all(v is not None for v in (s, w, n, e))
def _f(items, lat_key="lat", lng_key="lng"):
return _bbox_filter(items, s, w, n, e, lat_key, lng_key) if has_bbox else items
payload = {
"commercial_flights": _f(d.get("commercial_flights", [])),
"military_flights": _f(d.get("military_flights", [])),
"private_flights": _f(d.get("private_flights", [])),
"private_jets": _f(d.get("private_jets", [])),
"tracked_flights": d.get("tracked_flights", []), # Always send tracked (small set)
"ships": _f(d.get("ships", [])),
"cctv": _f(d.get("cctv", []), lat_key="lat", lng_key="lon"),
"uavs": _f(d.get("uavs", [])),
"liveuamap": _f(d.get("liveuamap", [])),
"gps_jamming": _f(d.get("gps_jamming", [])),
"satellites": _f(d.get("satellites", [])),
"satellite_source": d.get("satellite_source", "none"),
"freshness": dict(source_timestamps),
}
bbox_tag = f"{s},{w},{n},{e}" if has_bbox else "full"
return _etag_response(request, payload, prefix=f"fast|{bbox_tag}|")
return Response(
content=json_mod.dumps(payload),
media_type="application/json",
headers={"ETag": etag, "Cache-Control": "no-cache"}
)
@app.get("/api/live-data/slow")
@limiter.limit("60/minute")
async def live_data_slow(request: Request,
s: float = Query(None, description="South bound"),
w: float = Query(None, description="West bound"),
n: float = Query(None, description="North bound"),
e: float = Query(None, description="East bound")):
async def live_data_slow(request: Request):
d = get_latest_data()
has_bbox = all(v is not None for v in (s, w, n, e))
def _f(items, lat_key="lat", lng_key="lng"):
return _bbox_filter(items, s, w, n, e, lat_key, lng_key) if has_bbox else items
payload = {
"last_updated": d.get("last_updated"),
"news": d.get("news", []), # News has coords but we always send it (small set, important)
"news": d.get("news", []),
"stocks": d.get("stocks", {}),
"oil": d.get("oil", {}),
"weather": d.get("weather"),
"traffic": d.get("traffic", []),
"earthquakes": _f(d.get("earthquakes", [])),
"frontlines": d.get("frontlines"), # Always send (GeoJSON polygon, not point-filterable)
"gdelt": d.get("gdelt", []), # GeoJSON features — filtered client-side
"airports": d.get("airports", []), # Always send (reference data)
"kiwisdr": _f(d.get("kiwisdr", []), lat_key="lat", lng_key="lon"),
"space_weather": d.get("space_weather"),
"internet_outages": _f(d.get("internet_outages", [])),
"firms_fires": _f(d.get("firms_fires", [])),
"datacenters": _f(d.get("datacenters", [])),
"freshness": dict(source_timestamps),
"earthquakes": d.get("earthquakes", []),
"frontlines": d.get("frontlines"),
"gdelt": d.get("gdelt", []),
"airports": d.get("airports", []),
"satellites": d.get("satellites", [])
}
bbox_tag = f"{s},{w},{n},{e}" if has_bbox else "full"
return _etag_response(request, payload, prefix=f"slow|{bbox_tag}|", default=str)
# ETag based on last_updated + item counts
last_updated = d.get("last_updated", "")
counts = "|".join(f"{k}:{len(v) if isinstance(v, list) else 0}" for k, v in payload.items())
etag = hashlib.md5(f"slow|{last_updated}|{counts}".encode()).hexdigest()[:16]
if request.headers.get("if-none-match") == etag:
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
return Response(
content=json_mod.dumps(payload, default=str),
media_type="application/json",
headers={"ETag": etag, "Cache-Control": "no-cache"}
)
@app.get("/api/debug-latest")
@limiter.limit("30/minute")
async def debug_latest_data(request: Request):
async def debug_latest_data():
return list(get_latest_data().keys())
@app.get("/api/health", response_model=HealthResponse)
@limiter.limit("30/minute")
async def health_check(request: Request):
import time
d = get_latest_data()
last = d.get("last_updated")
return {
"status": "ok",
"last_updated": last,
"sources": {
"flights": len(d.get("commercial_flights", [])),
"military": len(d.get("military_flights", [])),
"ships": len(d.get("ships", [])),
"satellites": len(d.get("satellites", [])),
"earthquakes": len(d.get("earthquakes", [])),
"cctv": len(d.get("cctv", [])),
"news": len(d.get("news", [])),
"uavs": len(d.get("uavs", [])),
"firms_fires": len(d.get("firms_fires", [])),
"liveuamap": len(d.get("liveuamap", [])),
"gdelt": len(d.get("gdelt", [])),
},
"freshness": dict(source_timestamps),
"uptime_seconds": round(time.time() - _start_time),
}
@app.get("/api/health")
async def health_check():
return {"status": "ok"}
from services.radio_intercept import get_top_broadcastify_feeds, get_openmhz_systems, get_recent_openmhz_calls, find_nearest_openmhz_system
@app.get("/api/radio/top")
@limiter.limit("30/minute")
async def get_top_radios(request: Request):
async def get_top_radios():
return get_top_broadcastify_feeds()
@app.get("/api/radio/openmhz/systems")
@limiter.limit("30/minute")
async def api_get_openmhz_systems(request: Request):
async def api_get_openmhz_systems():
return get_openmhz_systems()
@app.get("/api/radio/openmhz/calls/{sys_name}")
@limiter.limit("60/minute")
async def api_get_openmhz_calls(request: Request, sys_name: str):
async def api_get_openmhz_calls(sys_name: str):
return get_recent_openmhz_calls(sys_name)
@app.get("/api/radio/nearest")
@limiter.limit("60/minute")
async def api_get_nearest_radio(
request: Request,
lat: float = Query(..., ge=-90, le=90),
lng: float = Query(..., ge=-180, le=180),
):
async def api_get_nearest_radio(lat: float, lng: float):
return find_nearest_openmhz_system(lat, lng)
from services.radio_intercept import find_nearest_openmhz_systems_list
@app.get("/api/radio/nearest-list")
@limiter.limit("60/minute")
async def api_get_nearest_radios_list(
request: Request,
lat: float = Query(..., ge=-90, le=90),
lng: float = Query(..., ge=-180, le=180),
limit: int = Query(5, ge=1, le=20),
):
async def api_get_nearest_radios_list(lat: float, lng: float, limit: int = 5):
return find_nearest_openmhz_systems_list(lat, lng, limit=limit)
from services.network_utils import fetch_with_curl
@app.get("/api/route/{callsign}")
@limiter.limit("60/minute")
async def get_flight_route(request: Request, callsign: str, lat: float = 0.0, lng: float = 0.0):
r = fetch_with_curl("https://api.adsb.lol/api/0/routeset", method="POST", json_data={"planes": [{"callsign": callsign, "lat": lat, "lng": lng}]}, timeout=10)
if r and r.status_code == 200:
async def get_flight_route(callsign: str):
r = fetch_with_curl("https://api.adsb.lol/api/0/routeset", method="POST", json_data={"planes": [{"callsign": callsign}]}, timeout=10)
if r.status_code == 200:
data = r.json()
route_list = []
if isinstance(data, dict):
@@ -400,40 +155,19 @@ async def get_flight_route(request: Request, callsign: str, lat: float = 0.0, ln
route = route_list[0]
airports = route.get("_airports", [])
if len(airports) >= 2:
orig = airports[0]
dest = airports[-1]
return {
"orig_loc": [orig.get("lon", 0), orig.get("lat", 0)],
"dest_loc": [dest.get("lon", 0), dest.get("lat", 0)],
"origin_name": f"{orig.get('iata', '') or orig.get('icao', '')}: {orig.get('name', 'Unknown')}",
"dest_name": f"{dest.get('iata', '') or dest.get('icao', '')}: {dest.get('name', 'Unknown')}",
"orig_loc": [airports[0].get("lon", 0), airports[0].get("lat", 0)],
"dest_loc": [airports[-1].get("lon", 0), airports[-1].get("lat", 0)]
}
return {}
from services.region_dossier import get_region_dossier
@app.get("/api/region-dossier")
@limiter.limit("30/minute")
def api_region_dossier(
request: Request,
lat: float = Query(..., ge=-90, le=90),
lng: float = Query(..., ge=-180, le=180),
):
def api_region_dossier(lat: float, lng: float):
"""Sync def so FastAPI runs it in a threadpool — prevents blocking the event loop."""
return get_region_dossier(lat, lng)
from services.sentinel_search import search_sentinel2_scene
@app.get("/api/sentinel2/search")
@limiter.limit("30/minute")
def api_sentinel2_search(
request: Request,
lat: float = Query(..., ge=-90, le=90),
lng: float = Query(..., ge=-180, le=180),
):
"""Search for latest Sentinel-2 imagery at a point. Sync for threadpool execution."""
return search_sentinel2_scene(lat, lng)
# ---------------------------------------------------------------------------
# API Settings — key registry & management
# ---------------------------------------------------------------------------
@@ -444,71 +178,18 @@ class ApiKeyUpdate(BaseModel):
env_key: str
value: str
@app.get("/api/settings/api-keys", dependencies=[Depends(require_admin)])
@limiter.limit("30/minute")
async def api_get_keys(request: Request):
@app.get("/api/settings/api-keys")
async def api_get_keys():
return get_api_keys()
@app.put("/api/settings/api-keys", dependencies=[Depends(require_admin)])
@limiter.limit("10/minute")
async def api_update_key(request: Request, body: ApiKeyUpdate):
@app.put("/api/settings/api-keys")
async def api_update_key(body: ApiKeyUpdate):
ok = update_api_key(body.env_key, body.value)
if ok:
return {"status": "updated", "env_key": body.env_key}
return {"status": "error", "message": "Failed to update .env file"}
# ---------------------------------------------------------------------------
# News Feed Configuration
# ---------------------------------------------------------------------------
from services.news_feed_config import get_feeds, save_feeds, reset_feeds
@app.get("/api/settings/news-feeds")
@limiter.limit("30/minute")
async def api_get_news_feeds(request: Request):
return get_feeds()
@app.put("/api/settings/news-feeds", dependencies=[Depends(require_admin)])
@limiter.limit("10/minute")
async def api_save_news_feeds(request: Request):
body = await request.json()
ok = save_feeds(body)
if ok:
return {"status": "updated", "count": len(body)}
return Response(
content=json_mod.dumps({"status": "error", "message": "Validation failed (max 20 feeds, each needs name/url/weight 1-5)"}),
status_code=400,
media_type="application/json",
)
@app.post("/api/settings/news-feeds/reset", dependencies=[Depends(require_admin)])
@limiter.limit("10/minute")
async def api_reset_news_feeds(request: Request):
ok = reset_feeds()
if ok:
return {"status": "reset", "feeds": get_feeds()}
return {"status": "error", "message": "Failed to reset feeds"}
# ---------------------------------------------------------------------------
# System — self-update
# ---------------------------------------------------------------------------
from pathlib import Path
from services.updater import perform_update, schedule_restart
@app.post("/api/system/update", dependencies=[Depends(require_admin)])
@limiter.limit("1/minute")
async def system_update(request: Request):
"""Download latest release, backup current files, extract update, and restart."""
project_root = str(Path(__file__).resolve().parent.parent)
result = perform_update(project_root)
if result.get("status") == "error":
return Response(
content=json_mod.dumps(result),
status_code=500,
media_type="application/json",
)
# Schedule restart AFTER response flushes (2s delay)
threading.Timer(2.0, schedule_restart, args=[project_root]).start()
return result
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
# Application successfully initialized with background scraping tasks
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-4
View File
@@ -1,4 +0,0 @@
[pytest]
testpaths = tests
python_files = test_*.py
python_functions = test_*
-3
View File
@@ -1,3 +0,0 @@
-r requirements.txt
pytest==8.3.4
httpx==0.28.1
+7 -22
View File
@@ -1,25 +1,10 @@
fastapi==0.115.12
uvicorn==0.34.0
yfinance==0.2.54
fastapi==0.103.1
uvicorn==0.23.2
yfinance>=0.2.40
feedparser==6.0.10
legacy-cgi==2.6.2
requests==2.31.0
apscheduler==3.10.3
pydantic==2.11.1
pydantic-settings==2.8.1
playwright==1.50.0
playwright-stealth==1.0.6
beautifulsoup4==4.13.3
cachetools==5.5.2
slowapi==0.1.9
cloudscraper==1.2.71
python-dotenv==1.0.1
lxml==5.3.1
reverse_geocoder==1.5.1
sgp4==2.23
geopy==2.4.1
pytz==2024.2
pystac-client==0.8.6
pytest==8.3.4
pytest-asyncio==0.25.0
httpx==0.28.1
pydantic==2.3.0
pydantic-settings==2.0.3
playwright>=1.58.0
beautifulsoup4>=4.12.0
@@ -0,0 +1 @@
5d33551b09405e7e252c6a11f080a6c9eca50f6b
+31 -123
View File
@@ -14,7 +14,7 @@ import os
logger = logging.getLogger(__name__)
AIS_WS_URL = "wss://stream.aisstream.io/v0/stream"
API_KEY = os.environ.get("AIS_API_KEY", "")
API_KEY = os.environ.get("AIS_API_KEY", "75cc39af03c9cc23c90e8a7b3c3bc2b2a507c5fb")
# AIS vessel type code classification
# See: https://coast.noaa.gov/data/marinecadastre/ais/VesselTypeCodes2018.pdf
@@ -144,7 +144,7 @@ def _save_cache():
with open(CACHE_FILE, 'w') as f:
json.dump(data, f)
logger.info(f"AIS cache saved: {len(data)} vessels")
except (IOError, OSError) as e:
except Exception as e:
logger.error(f"Failed to save AIS cache: {e}")
@@ -165,7 +165,7 @@ def _load_cache():
_vessels[int(k)] = v
loaded += 1
logger.info(f"AIS cache loaded: {loaded} vessels from disk")
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
except Exception as e:
logger.error(f"Failed to load AIS cache: {e}")
@@ -207,84 +207,23 @@ def get_ais_vessels() -> list[dict]:
return result
def ingest_ais_catcher(msgs: list[dict]) -> int:
"""Ingest decoded AIS messages from AIS-catcher HTTP feed.
Returns number of vessels updated."""
count = 0
now = time.time()
with _vessels_lock:
for msg in msgs:
mmsi = msg.get("mmsi")
if not mmsi or not isinstance(mmsi, int):
continue
vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
msg_type = msg.get("type", 0)
# Position reports (types 1, 2, 3 = Class A; 18, 19 = Class B)
if msg_type in (1, 2, 3, 18, 19):
lat = msg.get("lat")
lon = msg.get("lon")
if lat is not None and lon is not None and lat != 91.0 and lon != 181.0:
vessel["lat"] = lat
vessel["lng"] = lon
vessel["sog"] = msg.get("speed", 0)
vessel["cog"] = msg.get("course", 0)
heading = msg.get("heading", 511)
vessel["heading"] = heading if heading != 511 else vessel.get("cog", 0)
vessel["_updated"] = now
if msg.get("shipname"):
vessel["name"] = msg["shipname"].strip()
count += 1
# Static data (type 5 = Class A static; 24 = Class B static)
elif msg_type in (5, 24):
if msg.get("shipname"):
vessel["name"] = msg["shipname"].strip()
if msg.get("callsign"):
vessel["callsign"] = msg["callsign"].strip()
if msg.get("imo"):
vessel["imo"] = msg["imo"]
if msg.get("destination"):
vessel["destination"] = msg["destination"].strip().replace("@", "")
ship_type = msg.get("shiptype", 0)
if ship_type:
vessel["ais_type_code"] = ship_type
vessel["type"] = classify_vessel(ship_type, mmsi)
vessel["_updated"] = now
# Ensure country is set from MMSI MID
if "country" not in vessel:
vessel["country"] = get_country_from_mmsi(mmsi)
# Ensure name exists
if "name" not in vessel:
vessel["name"] = msg.get("shipname", "UNKNOWN") or "UNKNOWN"
return count
def _ais_stream_loop():
"""Main loop: spawn node proxy and process messages from stdout."""
global _proxy_process
import subprocess
import os
proxy_script = os.path.join(os.path.dirname(os.path.dirname(__file__)), "ais_proxy.js")
backoff = 1 # Exponential backoff starting at 1 second
while _ws_running:
try:
logger.info("Starting Node.js AIS Stream Proxy...")
process = subprocess.Popen(
['node', proxy_script, API_KEY],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1
)
_proxy_process = process
# Drain stderr in a background thread to prevent deadlock
import threading
@@ -298,51 +237,49 @@ def _ais_stream_loop():
logger.info("AIS Stream proxy started — receiving vessel data")
msg_count = 0
ok_streak = 0 # Track consecutive successful messages for backoff reset
last_log_time = time.time()
for raw_msg in iter(process.stdout.readline, ''):
if not _ws_running:
process.terminate()
break
raw_msg = raw_msg.strip()
if not raw_msg:
continue
try:
data = json.loads(raw_msg)
except json.JSONDecodeError:
continue
if "error" in data:
logger.error(f"AIS Stream error: {data['error']}")
continue
msg_type = data.get("MessageType", "")
metadata = data.get("MetaData", {})
message = data.get("Message", {})
mmsi = metadata.get("MMSI", 0)
if not mmsi:
continue
with _vessels_lock:
if mmsi not in _vessels:
_vessels[mmsi] = {"_updated": time.time()}
vessel = _vessels[mmsi]
# Update position from PositionReport or StandardClassBPositionReport
if msg_type in ("PositionReport", "StandardClassBPositionReport"):
report = message.get(msg_type, {})
lat = report.get("Latitude", metadata.get("latitude", 0))
lng = report.get("Longitude", metadata.get("longitude", 0))
# Skip invalid positions
if lat == 0 and lng == 0:
continue
if abs(lat) > 90 or abs(lng) > 180:
continue
with _vessels_lock:
vessel["lat"] = lat
vessel["lng"] = lng
@@ -354,12 +291,12 @@ def _ais_stream_loop():
# Use metadata name if we don't have one yet
if not vessel.get("name") or vessel["name"] == "UNKNOWN":
vessel["name"] = metadata.get("ShipName", "UNKNOWN").strip() or "UNKNOWN"
# Update static data from ShipStaticData
elif msg_type == "ShipStaticData":
static = message.get("ShipStaticData", {})
ais_type = static.get("Type", 0)
with _vessels_lock:
vessel["name"] = (static.get("Name", "") or metadata.get("ShipName", "UNKNOWN")).strip() or "UNKNOWN"
vessel["callsign"] = (static.get("CallSign", "") or "").strip()
@@ -368,31 +305,26 @@ def _ais_stream_loop():
vessel["ais_type_code"] = ais_type
vessel["type"] = classify_vessel(ais_type, mmsi)
vessel["_updated"] = time.time()
msg_count += 1
ok_streak += 1
# Reset backoff after 200 consecutive successful messages
if ok_streak >= 200 and backoff > 1:
backoff = 1
ok_streak = 0
# Periodic logging + cache save (time-based instead of count-based to avoid lock in hot loop)
now = time.time()
if now - last_log_time >= 60:
if msg_count % 5000 == 0:
with _vessels_lock:
# Inline pruning: remove vessels not updated in 15 minutes
prune_cutoff = time.time() - 900
stale = [k for k, v in _vessels.items() if v.get("_updated", 0) < prune_cutoff]
for k in stale:
del _vessels[k]
count = len(_vessels)
if stale:
logger.info(f"AIS pruned {len(stale)} stale vessels")
logger.info(f"AIS Stream: processed {msg_count} messages, tracking {count} vessels")
_save_cache()
last_log_time = now
except (ConnectionError, TimeoutError, OSError, ValueError, KeyError) as e:
_save_cache() # Auto-save every 5000 messages (~60 seconds)
except Exception as e:
logger.error(f"AIS proxy connection error: {e}")
if _ws_running:
logger.info(f"Restarting AIS proxy in {backoff}s (exponential backoff)...")
time.sleep(backoff)
backoff = min(backoff * 2, 60) # Double up to 60s max
continue
logger.info("Restarting AIS proxy in 5 seconds...")
time.sleep(5)
def _run_ais_loop():
@@ -421,31 +353,7 @@ def start_ais_stream():
def stop_ais_stream():
"""Stop the AIS WebSocket stream and save cache."""
global _ws_running, _proxy_process
global _ws_running
_ws_running = False
if _proxy_process and _proxy_process.stdin:
try:
_proxy_process.stdin.close()
except Exception:
pass
_save_cache() # Save on shutdown
logger.info("AIS Stream stopping...")
def update_ais_bbox(south: float, west: float, north: float, east: float):
"""Dynamically update the AIS stream bounding box via proxy stdin."""
global _proxy_process
if not _proxy_process or not _proxy_process.stdin:
return
try:
cmd = json.dumps({
"type": "update_bbox",
"bboxes": [[[south, west], [north, east]]]
})
_proxy_process.stdin.write(cmd + "\n")
_proxy_process.stdin.flush()
logger.info(f"Updated AIS bounding box to: S:{south:.2f} W:{west:.2f} N:{north:.2f} E:{east:.2f}")
except Exception as e:
logger.error(f"Failed to update AIS bbox: {e}")
+3 -12
View File
@@ -145,29 +145,20 @@ def get_api_keys():
"has_key": api["env_key"] is not None,
"env_key": api["env_key"],
"value_obfuscated": None,
"is_set": False,
"value_plain": None,
}
if api["env_key"]:
raw = os.environ.get(api["env_key"], "")
entry["value_obfuscated"] = _obfuscate(raw)
entry["is_set"] = bool(raw)
entry["value_plain"] = raw # Sent only when reveal is requested
result.append(entry)
return result
def update_api_key(env_key: str, new_value: str) -> bool:
"""Update a single key in the .env file and in the current process env."""
valid_keys = {api["env_key"] for api in API_REGISTRY if api.get("env_key")}
if env_key not in valid_keys:
return False
if not isinstance(new_value, str):
return False
if "\n" in new_value or "\r" in new_value:
return False
if not ENV_PATH.exists():
ENV_PATH.write_text("", encoding="utf-8")
return False
# Update os.environ immediately
os.environ[env_key] = new_value
+84 -163
View File
@@ -26,117 +26,105 @@ logger = logging.getLogger(__name__)
# Carrier registry: hull number → metadata + fallback position
# -----------------------------------------------------------------
CARRIER_REGISTRY: Dict[str, dict] = {
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
# --- Bremerton, WA (Naval Base Kitsap) ---
# Distinct pier positions along Sinclair Inlet so carriers don't stack
"CVN-68": {
"name": "USS Nimitz (CVN-68)",
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
"homeport": "Bremerton, WA",
"homeport_lat": 47.5535, "homeport_lng": -122.6400,
"fallback_lat": 47.5535, "fallback_lng": -122.6400,
"fallback_heading": 90,
"fallback_desc": "Bremerton, WA (Maintenance)"
"homeport_lat": 47.56, "homeport_lng": -122.63,
"fallback_lat": 21.35, "fallback_lng": -157.95,
"fallback_heading": 270,
"fallback_desc": "Pacific Fleet / Pearl Harbor"
},
"CVN-76": {
"name": "USS Ronald Reagan (CVN-76)",
"wiki": "https://en.wikipedia.org/wiki/USS_Ronald_Reagan",
"homeport": "Bremerton, WA",
"homeport_lat": 47.5580, "homeport_lng": -122.6360,
"fallback_lat": 47.5580, "fallback_lng": -122.6360,
"fallback_heading": 90,
"fallback_desc": "Bremerton, WA (Decommissioning)"
},
# --- Norfolk, VA (Naval Station Norfolk) ---
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
"CVN-69": {
"name": "USS Dwight D. Eisenhower (CVN-69)",
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
"homeport": "Norfolk, VA",
"homeport_lat": 36.9465, "homeport_lng": -76.3265,
"fallback_lat": 36.9465, "fallback_lng": -76.3265,
"fallback_heading": 0,
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)"
"homeport_lat": 36.95, "homeport_lng": -76.33,
"fallback_lat": 18.0, "fallback_lng": 39.5,
"fallback_heading": 120,
"fallback_desc": "Red Sea / CENTCOM AOR"
},
"CVN-78": {
"name": "USS Gerald R. Ford (CVN-78)",
"wiki": "https://en.wikipedia.org/wiki/USS_Gerald_R._Ford",
"homeport": "Norfolk, VA",
"homeport_lat": 36.9505, "homeport_lng": -76.3250,
"fallback_lat": 18.0, "fallback_lng": 39.5,
"fallback_heading": 0,
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)"
"homeport_lat": 36.95, "homeport_lng": -76.33,
"fallback_lat": 34.0, "fallback_lng": 25.0,
"fallback_heading": 90,
"fallback_desc": "Eastern Mediterranean deterrence"
},
"CVN-74": {
"name": "USS John C. Stennis (CVN-74)",
"wiki": "https://en.wikipedia.org/wiki/USS_John_C._Stennis",
"homeport": "Norfolk, VA",
"homeport_lat": 36.9540, "homeport_lng": -76.3235,
"fallback_lat": 36.98, "fallback_lng": -76.43,
"fallback_heading": 0,
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)"
},
"CVN-75": {
"name": "USS Harry S. Truman (CVN-75)",
"wiki": "https://en.wikipedia.org/wiki/USS_Harry_S._Truman",
"homeport": "Norfolk, VA",
"homeport_lat": 36.9580, "homeport_lng": -76.3220,
"fallback_lat": 36.0, "fallback_lng": 15.0,
"fallback_heading": 0,
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)"
},
"CVN-77": {
"name": "USS George H.W. Bush (CVN-77)",
"wiki": "https://en.wikipedia.org/wiki/USS_George_H.W._Bush",
"homeport": "Norfolk, VA",
"homeport_lat": 36.9620, "homeport_lng": -76.3210,
"fallback_lat": 36.5, "fallback_lng": -74.0,
"fallback_heading": 0,
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)"
},
# --- San Diego, CA (Naval Base San Diego) ---
# Carrier piers along the east shore of San Diego Bay, spread N-S
"CVN-70": {
"name": "USS Carl Vinson (CVN-70)",
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
"homeport": "San Diego, CA",
"homeport_lat": 32.6840, "homeport_lng": -117.1290,
"fallback_lat": 32.6840, "fallback_lng": -117.1290,
"fallback_heading": 180,
"fallback_desc": "San Diego, CA (Homeport)"
"homeport_lat": 32.68, "homeport_lng": -117.15,
"fallback_lat": 15.0, "fallback_lng": 115.0,
"fallback_heading": 45,
"fallback_desc": "South China Sea patrol"
},
"CVN-71": {
"name": "USS Theodore Roosevelt (CVN-71)",
"wiki": "https://en.wikipedia.org/wiki/USS_Theodore_Roosevelt_(CVN-71)",
"homeport": "San Diego, CA",
"homeport_lat": 32.6885, "homeport_lng": -117.1280,
"fallback_lat": 32.6885, "fallback_lng": -117.1280,
"fallback_heading": 180,
"fallback_desc": "San Diego, CA (Maintenance)"
"homeport_lat": 32.68, "homeport_lng": -117.15,
"fallback_lat": 22.0, "fallback_lng": 122.0,
"fallback_heading": 300,
"fallback_desc": "Philippine Sea / Taiwan Strait"
},
"CVN-72": {
"name": "USS Abraham Lincoln (CVN-72)",
"wiki": "https://en.wikipedia.org/wiki/USS_Abraham_Lincoln_(CVN-72)",
"homeport": "San Diego, CA",
"homeport_lat": 32.6925, "homeport_lng": -117.1275,
"fallback_lat": 20.0, "fallback_lng": 64.0,
"fallback_heading": 0,
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)"
"homeport_lat": 32.68, "homeport_lng": -117.15,
"fallback_lat": 21.0, "fallback_lng": -158.0,
"fallback_heading": 270,
"fallback_desc": "Pacific deployment"
},
# --- Yokosuka, Japan (CFAY) ---
"CVN-73": {
"name": "USS George Washington (CVN-73)",
"wiki": "https://en.wikipedia.org/wiki/USS_George_Washington_(CVN-73)",
"homeport": "Yokosuka, Japan",
"homeport_lat": 35.2830, "homeport_lng": 139.6700,
"fallback_lat": 35.2830, "fallback_lng": 139.6700,
"fallback_heading": 180,
"homeport_lat": 35.28, "homeport_lng": 139.67,
"fallback_lat": 35.0, "fallback_lng": 139.0,
"fallback_heading": 0,
"fallback_desc": "Yokosuka, Japan (Forward deployed)"
},
"CVN-74": {
"name": "USS John C. Stennis (CVN-74)",
"wiki": "https://en.wikipedia.org/wiki/USS_John_C._Stennis",
"homeport": "Norfolk, VA",
"homeport_lat": 36.95, "homeport_lng": -76.33,
"fallback_lat": 36.95, "fallback_lng": -76.33,
"fallback_heading": 0,
"fallback_desc": "RCOH / Norfolk (maintenance)"
},
"CVN-75": {
"name": "USS Harry S. Truman (CVN-75)",
"wiki": "https://en.wikipedia.org/wiki/USS_Harry_S._Truman",
"homeport": "Norfolk, VA",
"homeport_lat": 36.95, "homeport_lng": -76.33,
"fallback_lat": 36.0, "fallback_lng": 15.0,
"fallback_heading": 90,
"fallback_desc": "Mediterranean deployment"
},
"CVN-76": {
"name": "USS Ronald Reagan (CVN-76)",
"wiki": "https://en.wikipedia.org/wiki/USS_Ronald_Reagan",
"homeport": "Bremerton, WA",
"homeport_lat": 47.56, "homeport_lng": -122.63,
"fallback_lat": 47.56, "fallback_lng": -122.63,
"fallback_heading": 0,
"fallback_desc": "Bremerton, WA (Homeport)"
},
"CVN-77": {
"name": "USS George H.W. Bush (CVN-77)",
"wiki": "https://en.wikipedia.org/wiki/USS_George_H.W._Bush",
"homeport": "Norfolk, VA",
"homeport_lat": 36.95, "homeport_lng": -76.33,
"fallback_lat": 36.95, "fallback_lng": -76.33,
"fallback_heading": 0,
"fallback_desc": "Norfolk, VA (Homeport)"
},
}
# -----------------------------------------------------------------
@@ -218,7 +206,7 @@ def _load_cache() -> Dict[str, dict]:
data = json.loads(CACHE_FILE.read_text())
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
return data
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
except Exception as e:
logger.warning(f"Failed to load carrier cache: {e}")
return {}
@@ -228,7 +216,7 @@ def _save_cache(positions: Dict[str, dict]):
try:
CACHE_FILE.write_text(json.dumps(positions, indent=2))
logger.info(f"Carrier cache saved: {len(positions)} carriers")
except (IOError, OSError) as e:
except Exception as e:
logger.warning(f"Failed to save carrier cache: {e}")
@@ -275,15 +263,15 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
try:
url = f"https://api.gdeltproject.org/api/v2/doc/doc?query={term}&mode=artlist&maxrecords=5&format=json&timespan=14d"
raw = fetch_with_curl(url, timeout=8)
if not raw or not hasattr(raw, 'text'):
if not raw:
continue
data = raw.json()
data = json.loads(raw)
articles = data.get("articles", [])
for art in articles:
title = art.get("title", "")
url = art.get("url", "")
results.append({"title": title, "url": url})
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
except Exception as e:
logger.debug(f"GDELT search failed for '{term}': {e}")
continue
@@ -314,8 +302,7 @@ def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
"lat": coords[0],
"lng": coords[1],
"desc": title[:100],
"source": "GDELT News API",
"source_url": article.get("url", "https://api.gdeltproject.org"),
"source": "GDELT OSINT",
"updated": datetime.now(timezone.utc).isoformat()
}
logger.info(f"Carrier update: {CARRIER_REGISTRY[hull]['name']}{coords} (from: {title[:80]})")
@@ -323,8 +310,13 @@ def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
return updates
def _load_carrier_fallbacks() -> Dict[str, dict]:
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
def update_carrier_positions():
"""Main update function — called on startup and every 12h."""
global _last_update
logger.info("Carrier tracker: updating positions from OSINT sources...")
# Start with fallback positions
positions: Dict[str, dict] = {}
for hull, info in CARRIER_REGISTRY.items():
positions[hull] = {
@@ -334,15 +326,15 @@ def _load_carrier_fallbacks() -> Dict[str, dict]:
"heading": info["fallback_heading"],
"desc": info["fallback_desc"],
"wiki": info["wiki"],
"source": "USNI News Fleet & Marine Tracker",
"source_url": "https://news.usni.org/category/fleet-tracker",
"source": "Static OSINT estimate",
"updated": datetime.now(timezone.utc).isoformat()
}
# Overlay cached positions from previous runs (may have GDELT data)
# Load cached positions (may have better data from previous runs)
cached = _load_cache()
for hull, cached_pos in cached.items():
if hull in positions:
# Only use cache if it has a real OSINT source (not just static)
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get("source", "").startswith("News"):
positions[hull].update({
"lat": cached_pos["lat"],
@@ -351,29 +343,8 @@ def _load_carrier_fallbacks() -> Dict[str, dict]:
"source": cached_pos.get("source", "Cached OSINT"),
"updated": cached_pos.get("updated", "")
})
return positions
def update_carrier_positions():
"""Main update function — called on startup and every 12h.
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
"""
global _last_update
# --- Phase 1: instant fallback + cache ---
positions = _load_carrier_fallbacks()
with _positions_lock:
# Only overwrite if positions are currently empty (first startup).
# If we already have data from a previous cycle, keep it while GDELT runs.
if not _carrier_positions:
_carrier_positions.update(positions)
_last_update = datetime.now(timezone.utc)
logger.info(f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)")
# --- Phase 2: slow GDELT enrichment ---
# Try GDELT news for fresh positions
try:
articles = _fetch_gdelt_carrier_news()
news_positions = _parse_carrier_positions_from_news(articles)
@@ -381,10 +352,10 @@ def update_carrier_positions():
if hull in positions:
positions[hull].update(pos)
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
except Exception as e:
logger.warning(f"GDELT carrier fetch failed: {e}")
# Save and update the global state with enriched positions
# Save and update the global state
with _positions_lock:
_carrier_positions.clear()
_carrier_positions.update(positions)
@@ -399,55 +370,6 @@ def update_carrier_positions():
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
def _deconflict_positions(result: List[dict]) -> List[dict]:
"""Offset carriers that share identical coordinates so they don't stack.
At port: offset along the pier axis (~500m / 0.004° apart).
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
so they're visibly separate but clearly operating together.
"""
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
from collections import defaultdict
groups: dict[str, list[int]] = defaultdict(list)
for i, c in enumerate(result):
key = f"{round(c['lat'], 2)},{round(c['lng'], 2)}"
groups[key].append(i)
for indices in groups.values():
if len(indices) < 2:
continue
n = len(indices)
# Determine if this is a port (near a homeport) or at sea
sample = result[indices[0]]
at_port = any(
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
and abs(sample["lng"] - info.get("homeport_lng", 0)) < 0.05
for info in CARRIER_REGISTRY.values()
)
if at_port:
# Use each carrier's distinct homeport pier coordinates
for idx in indices:
carrier = result[idx]
hull = None
for h, info in CARRIER_REGISTRY.items():
if info["name"] == carrier["name"]:
hull = h
break
if hull:
info = CARRIER_REGISTRY[hull]
carrier["lat"] = info["homeport_lat"]
carrier["lng"] = info["homeport_lng"]
else:
# At sea: spread in a line perpendicular to travel (~0.08° apart)
spacing = 0.08 # ~9km — close enough to see they're together
start_offset = -(n - 1) * spacing / 2
for j, idx in enumerate(indices):
result[idx]["lng"] += start_offset + j * spacing
return result
def get_carrier_positions() -> List[dict]:
"""Return current carrier positions for the data pipeline."""
with _positions_lock:
@@ -459,7 +381,7 @@ def get_carrier_positions() -> List[dict]:
"type": "carrier",
"lat": pos["lat"],
"lng": pos["lng"],
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
"heading": pos.get("heading", 0),
"sog": 0,
"cog": 0,
"country": "United States",
@@ -467,10 +389,9 @@ def get_carrier_positions() -> List[dict]:
"wiki": pos.get("wiki", info.get("wiki", "")),
"estimated": True,
"source": pos.get("source", "OSINT estimated position"),
"source_url": pos.get("source_url", "https://news.usni.org/category/fleet-tracker"),
"last_osint_update": pos.get("updated", "")
})
return _deconflict_positions(result)
return result
# -----------------------------------------------------------------
+2 -6
View File
@@ -41,7 +41,7 @@ class BaseCCTVIngestor(ABC):
cursor = self.conn.cursor()
for cam in cameras:
cursor.execute("""
INSERT INTO cameras
INSERT INTO cameras
(id, source_agency, lat, lon, direction_facing, media_url, refresh_rate_seconds)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
@@ -59,10 +59,6 @@ class BaseCCTVIngestor(ABC):
self.conn.commit()
logger.info(f"Successfully ingested {len(cameras)} cameras from {self.__class__.__name__}")
except Exception as e:
try:
self.conn.rollback()
except Exception:
pass
logger.error(f"Failed to ingest cameras in {self.__class__.__name__}: {e}")
class TFLJamCamIngestor(BaseCCTVIngestor):
@@ -224,7 +220,7 @@ class GlobalOSMCrawlingIngestor(BaseCCTVIngestor):
direction_str = item.get("tags", {}).get("camera:direction", "0")
try:
bearing = int(float(direction_str))
except (ValueError, TypeError):
except:
bearing = 0
mapbox_key = "YOUR_MAPBOX_TOKEN_HERE"
-33
View File
@@ -1,33 +0,0 @@
# ─── ShadowBroker Backend Constants ──────────────────────────────────────────
# Centralized magic numbers. Import from here instead of hardcoding.
# ─── Flight Trails ──────────────────────────────────────────────────────────
FLIGHT_TRAIL_MAX_TRACKED = 2000 # Max concurrent tracked trails before LRU eviction
FLIGHT_TRAIL_POINTS_PER_FLIGHT = 200 # Max trail points kept per aircraft
TRACKED_TRAIL_TTL_S = 1800 # 30 min - trail TTL for tracked flights
DEFAULT_TRAIL_TTL_S = 300 # 5 min - trail TTL for non-tracked flights
# ─── Detection Thresholds ──────────────────────────────────────────────────
HOLD_PATTERN_DEGREES = 300 # Total heading change to flag holding pattern
GPS_JAMMING_NACP_THRESHOLD = 8 # NACp below this = degraded GPS signal
GPS_JAMMING_GRID_SIZE = 1.0 # 1 degree grid for aggregation
GPS_JAMMING_MIN_RATIO = 0.25 # 25% degraded aircraft to flag zone
# ─── Network & Circuit Breaker ──────────────────────────────────────────────
CIRCUIT_BREAKER_TTL_S = 120 # Skip domain for 2 min after total failure
DOMAIN_FAIL_TTL_S = 300 # Skip requests.get for 5 min, go straight to curl
CONNECT_TIMEOUT_S = 3 # Short connect timeout for fast firewall-block detection
# ─── Data Fetcher Intervals ────────────────────────────────────────────────
FAST_FETCH_INTERVAL_S = 60 # Flights, ships, satellites, military
SLOW_FETCH_INTERVAL_MIN = 30 # News, markets, space weather
CCTV_FETCH_INTERVAL_MIN = 1 # CCTV camera pipeline
LIVEUAMAP_FETCH_INTERVAL_HR = 12 # LiveUAMap scraper
# ─── External API ──────────────────────────────────────────────────────────
OPENSKY_RATE_LIMIT_S = 300 # Only re-fetch OpenSky every 5 minutes
OPENSKY_REQUEST_TIMEOUT_S = 15 # Timeout for OpenSky API calls
ROUTE_FETCH_TIMEOUT_S = 15 # Timeout for adsb.lol route lookups
# ─── Internet Outage Detection ─────────────────────────────────────────────
INTERNET_OUTAGE_MIN_SEVERITY = 0.10 # 10% drop minimum to show
File diff suppressed because it is too large Load Diff
-77
View File
@@ -1,77 +0,0 @@
"""Startup environment validation — called once in the FastAPI lifespan hook.
Ensures required env vars are present before the scheduler starts.
Logs warnings for optional keys that degrade functionality when missing.
"""
import os
import sys
import logging
logger = logging.getLogger(__name__)
# Keys grouped by criticality
_REQUIRED = {
# Empty for now — add keys here only if the app literally cannot function without them
}
_CRITICAL_WARN = {
"ADMIN_KEY": "Authentication for /api/settings and /api/system/update — endpoints are UNPROTECTED without it!",
}
_OPTIONAL = {
"AIS_API_KEY": "AIS vessel streaming (ships layer will be empty without it)",
"OPENSKY_CLIENT_ID": "OpenSky OAuth2 — gap-fill flights in Africa/Asia/LatAm",
"OPENSKY_CLIENT_SECRET": "OpenSky OAuth2 — gap-fill flights in Africa/Asia/LatAm",
"LTA_ACCOUNT_KEY": "Singapore LTA traffic cameras (CCTV layer)",
}
def validate_env(*, strict: bool = True) -> bool:
"""Validate environment variables at startup.
Args:
strict: If True, exit the process on missing required keys.
If False, only log errors (useful for tests).
Returns:
True if all required keys are present, False otherwise.
"""
all_ok = True
# Required keys — must be set
for key, desc in _REQUIRED.items():
value = os.environ.get(key, "").strip()
if not value:
logger.error(
"❌ REQUIRED env var %s is not set. %s\n"
" Set it in .env or via Docker secrets (%s_FILE).",
key, desc, key,
)
all_ok = False
if not all_ok and strict:
logger.critical("Startup aborted — required environment variables are missing.")
sys.exit(1)
# Critical-warn keys — app works but security/functionality is degraded
for key, desc in _CRITICAL_WARN.items():
value = os.environ.get(key, "").strip()
if not value:
logger.critical(
"🔓 CRITICAL: env var %s is not set — %s\n"
" This is safe for local dev but MUST be set in production.",
key, desc,
)
# Optional keys — warn if missing
for key, desc in _OPTIONAL.items():
value = os.environ.get(key, "").strip()
if not value:
logger.warning(
"⚠️ Optional env var %s is not set — %s", key, desc
)
if all_ok:
logger.info("✅ Environment validation passed.")
return all_ok
-46
View File
@@ -1,46 +0,0 @@
"""Shared in-memory data store for all fetcher modules.
Central location for latest_data, source_timestamps, and the data lock.
Every fetcher imports from here instead of maintaining its own copy.
"""
import threading
import logging
from datetime import datetime
logger = logging.getLogger("services.data_fetcher")
# In-memory store
latest_data = {
"last_updated": None,
"news": [],
"stocks": {},
"oil": {},
"flights": [],
"ships": [],
"military_flights": [],
"tracked_flights": [],
"cctv": [],
"weather": None,
"earthquakes": [],
"uavs": [],
"frontlines": None,
"gdelt": [],
"liveuamap": [],
"kiwisdr": [],
"space_weather": None,
"internet_outages": [],
"firms_fires": [],
"datacenters": []
}
# Per-source freshness timestamps
source_timestamps = {}
def _mark_fresh(*keys):
"""Record the current UTC time for one or more data source keys."""
now = datetime.utcnow().isoformat()
for k in keys:
source_timestamps[k] = now
# Thread lock for safe reads/writes to latest_data
_data_lock = threading.Lock()
@@ -1,144 +0,0 @@
"""Earth-observation fetchers — earthquakes, FIRMS fires, space weather, weather radar."""
import csv
import io
import logging
import heapq
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Earthquakes (USGS)
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=1)
def fetch_earthquakes():
quakes = []
try:
url = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"
response = fetch_with_curl(url, timeout=10)
if response.status_code == 200:
features = response.json().get("features", [])
for f in features[:50]:
mag = f["properties"]["mag"]
lng, lat, depth = f["geometry"]["coordinates"]
quakes.append({
"id": f["id"], "mag": mag,
"lat": lat, "lng": lng,
"place": f["properties"]["place"]
})
except Exception as e:
logger.error(f"Error fetching earthquakes: {e}")
with _data_lock:
latest_data["earthquakes"] = quakes
if quakes:
_mark_fresh("earthquakes")
# ---------------------------------------------------------------------------
# NASA FIRMS Fires
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=2)
def fetch_firms_fires():
"""Fetch global fire/thermal anomalies from NASA FIRMS (NOAA-20 VIIRS, 24h, no key needed)."""
fires = []
try:
url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_24h.csv"
response = fetch_with_curl(url, timeout=30)
if response.status_code == 200:
reader = csv.DictReader(io.StringIO(response.text))
all_rows = []
for row in reader:
try:
lat = float(row.get("latitude", 0))
lng = float(row.get("longitude", 0))
frp = float(row.get("frp", 0))
conf = row.get("confidence", "nominal")
daynight = row.get("daynight", "")
bright = float(row.get("bright_ti4", 0))
all_rows.append({
"lat": lat, "lng": lng, "frp": frp,
"brightness": bright, "confidence": conf,
"daynight": daynight,
"acq_date": row.get("acq_date", ""),
"acq_time": row.get("acq_time", ""),
})
except (ValueError, TypeError):
continue
fires = heapq.nlargest(5000, all_rows, key=lambda x: x["frp"])
logger.info(f"FIRMS fires: {len(fires)} hotspots (from {response.status_code})")
except Exception as e:
logger.error(f"Error fetching FIRMS fires: {e}")
with _data_lock:
latest_data["firms_fires"] = fires
if fires:
_mark_fresh("firms_fires")
# ---------------------------------------------------------------------------
# Space Weather (NOAA SWPC)
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=1)
def fetch_space_weather():
"""Fetch NOAA SWPC Kp index and recent solar events."""
try:
kp_resp = fetch_with_curl("https://services.swpc.noaa.gov/json/planetary_k_index_1m.json", timeout=10)
kp_value = None
kp_text = "QUIET"
if kp_resp.status_code == 200:
kp_data = kp_resp.json()
if kp_data:
latest_kp = kp_data[-1]
kp_value = float(latest_kp.get("kp_index", 0))
if kp_value >= 7:
kp_text = f"STORM G{min(int(kp_value) - 4, 5)}"
elif kp_value >= 5:
kp_text = f"STORM G{min(int(kp_value) - 4, 5)}"
elif kp_value >= 4:
kp_text = "ACTIVE"
elif kp_value >= 3:
kp_text = "UNSETTLED"
events = []
ev_resp = fetch_with_curl("https://services.swpc.noaa.gov/json/edited_events.json", timeout=10)
if ev_resp.status_code == 200:
all_events = ev_resp.json()
for ev in all_events[-10:]:
events.append({
"type": ev.get("type", ""),
"begin": ev.get("begin", ""),
"end": ev.get("end", ""),
"classtype": ev.get("classtype", ""),
})
with _data_lock:
latest_data["space_weather"] = {
"kp_index": kp_value,
"kp_text": kp_text,
"events": events,
}
_mark_fresh("space_weather")
logger.info(f"Space weather: Kp={kp_value} ({kp_text}), {len(events)} events")
except Exception as e:
logger.error(f"Error fetching space weather: {e}")
# ---------------------------------------------------------------------------
# Weather Radar (RainViewer)
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=1)
def fetch_weather():
try:
url = "https://api.rainviewer.com/public/weather-maps.json"
response = fetch_with_curl(url, timeout=10)
if response.status_code == 200:
data = response.json()
if "radar" in data and "past" in data["radar"]:
latest_time = data["radar"]["past"][-1]["time"]
with _data_lock:
latest_data["weather"] = {"time": latest_time, "host": data.get("host", "https://tilecache.rainviewer.com")}
_mark_fresh("weather")
except Exception as e:
logger.error(f"Error fetching weather: {e}")
-58
View File
@@ -1,58 +0,0 @@
"""Financial data fetchers — defense stocks and oil prices.
Uses yfinance for ticker data with concurrent execution for performance.
"""
import logging
import concurrent.futures
import yfinance as yf
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
logger = logging.getLogger(__name__)
def _fetch_single_ticker(symbol: str, period: str = "2d"):
"""Fetch a single yfinance ticker. Returns (symbol, data_dict) or (symbol, None)."""
try:
ticker = yf.Ticker(symbol)
hist = ticker.history(period=period)
if len(hist) >= 1:
current_price = hist['Close'].iloc[-1]
prev_close = hist['Close'].iloc[0] if len(hist) > 1 else current_price
change_percent = ((current_price - prev_close) / prev_close) * 100 if prev_close else 0
return symbol, {
"price": round(float(current_price), 2),
"change_percent": round(float(change_percent), 2),
"up": bool(change_percent >= 0)
}
except Exception as e:
logger.warning(f"Could not fetch data for {symbol}: {e}")
return symbol, None
@with_retry(max_retries=1, base_delay=1)
def fetch_defense_stocks():
tickers = ["RTX", "LMT", "NOC", "GD", "BA", "PLTR"]
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool:
results = pool.map(lambda t: _fetch_single_ticker(t, "2d"), tickers)
stocks_data = {sym: data for sym, data in results if data}
with _data_lock:
latest_data['stocks'] = stocks_data
_mark_fresh("stocks")
except Exception as e:
logger.error(f"Error fetching stocks: {e}")
@with_retry(max_retries=1, base_delay=1)
def fetch_oil_prices():
tickers = {"WTI Crude": "CL=F", "Brent Crude": "BZ=F"}
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
results = pool.map(lambda item: (_fetch_single_ticker(item[1], "5d")[1], item[0]), tickers.items())
oil_data = {name: data for data, name in results if data}
with _data_lock:
latest_data['oil'] = oil_data
_mark_fresh("oil")
except Exception as e:
logger.error(f"Error fetching oil: {e}")
-724
View File
@@ -1,724 +0,0 @@
"""Commercial flight fetching — ADS-B, OpenSky, supplemental sources, routes,
trail accumulation, GPS jamming detection, and holding pattern detection."""
import re
import os
import time
import math
import json
import logging
import threading
import concurrent.futures
import requests
from datetime import datetime
from cachetools import TTLCache
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.plane_alert import enrich_with_plane_alert, enrich_with_tracked_names
from services.fetchers.retry import with_retry
logger = logging.getLogger("services.data_fetcher")
# Pre-compiled regex patterns for airline code extraction (used in hot loop)
_RE_AIRLINE_CODE_1 = re.compile(r'^([A-Z]{3})\d')
_RE_AIRLINE_CODE_2 = re.compile(r'^([A-Z]{3})[A-Z\d]')
# ---------------------------------------------------------------------------
# OpenSky Network API Client (OAuth2)
# ---------------------------------------------------------------------------
class OpenSkyClient:
def __init__(self, client_id, client_secret):
self.client_id = client_id
self.client_secret = client_secret
self.token = None
self.expires_at = 0
def get_token(self):
if self.token and time.time() < self.expires_at - 60:
return self.token
url = "https://auth.opensky-network.org/auth/realms/opensky-network/protocol/openid-connect/token"
data = {
"grant_type": "client_credentials",
"client_id": self.client_id,
"client_secret": self.client_secret
}
try:
r = requests.post(url, data=data, timeout=10)
if r.status_code == 200:
res = r.json()
self.token = res.get("access_token")
self.expires_at = time.time() + res.get("expires_in", 1800)
logger.info("OpenSky OAuth2 token refreshed.")
return self.token
else:
logger.error(f"OpenSky Auth Failed: {r.status_code} {r.text}")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
logger.error(f"OpenSky Auth Exception: {e}")
return None
opensky_client = OpenSkyClient(
client_id=os.environ.get("OPENSKY_CLIENT_ID", ""),
client_secret=os.environ.get("OPENSKY_CLIENT_SECRET", "")
)
# Throttling and caching for OpenSky (400 req/day limit)
last_opensky_fetch = 0
cached_opensky_flights = []
# ---------------------------------------------------------------------------
# Supplemental ADS-B sources for blind-spot gap-filling
# ---------------------------------------------------------------------------
_BLIND_SPOT_REGIONS = [
{"name": "Yekaterinburg", "lat": 56.8, "lon": 60.6, "radius_nm": 250},
{"name": "Novosibirsk", "lat": 55.0, "lon": 82.9, "radius_nm": 250},
{"name": "Krasnoyarsk", "lat": 56.0, "lon": 92.9, "radius_nm": 250},
{"name": "Vladivostok", "lat": 43.1, "lon": 131.9, "radius_nm": 250},
{"name": "Urumqi", "lat": 43.8, "lon": 87.6, "radius_nm": 250},
{"name": "Chengdu", "lat": 30.6, "lon": 104.1, "radius_nm": 250},
{"name": "Lagos-Accra", "lat": 6.5, "lon": 3.4, "radius_nm": 250},
{"name": "Addis Ababa", "lat": 9.0, "lon": 38.7, "radius_nm": 250},
]
_SUPPLEMENTAL_FETCH_INTERVAL = 120
last_supplemental_fetch = 0
cached_supplemental_flights = []
# Helicopter type codes (backend classification)
_HELI_TYPES_BACKEND = {
"R22", "R44", "R66", "B06", "B06T", "B204", "B205", "B206", "B212", "B222", "B230",
"B407", "B412", "B427", "B429", "B430", "B505", "B525",
"AS32", "AS35", "AS50", "AS55", "AS65",
"EC20", "EC25", "EC30", "EC35", "EC45", "EC55", "EC75",
"H125", "H130", "H135", "H145", "H155", "H160", "H175", "H215", "H225",
"S55", "S58", "S61", "S64", "S70", "S76", "S92",
"A109", "A119", "A139", "A169", "A189", "AW09",
"MD52", "MD60", "MDHI", "MD90", "NOTR",
"B47G", "HUEY", "GAMA", "CABR", "EXE",
}
# Private jet ICAO type designator codes
PRIVATE_JET_TYPES = {
"G150", "G200", "G280", "GLEX", "G500", "G550", "G600", "G650", "G700",
"GLF2", "GLF3", "GLF4", "GLF5", "GLF6", "GL5T", "GL7T", "GV", "GIV",
"CL30", "CL35", "CL60", "BD70", "BD10", "GL5T", "GL7T",
"CRJ1", "CRJ2",
"C25A", "C25B", "C25C", "C500", "C501", "C510", "C525", "C526",
"C550", "C560", "C56X", "C680", "C68A", "C700", "C750",
"FA10", "FA20", "FA50", "FA7X", "FA8X", "F900", "F2TH", "ASTR",
"E35L", "E545", "E550", "E55P", "LEGA", "PH10", "PH30",
"LJ23", "LJ24", "LJ25", "LJ28", "LJ31", "LJ35", "LJ36",
"LJ40", "LJ45", "LJ55", "LJ60", "LJ70", "LJ75",
"H25A", "H25B", "H25C", "HA4T", "BE40", "PRM1",
"HDJT", "PC24", "EA50", "SF50", "GALX",
}
# Flight trails state
flight_trails = {} # {icao_hex: {points: [[lat, lng, alt, ts], ...], last_seen: ts}}
_trails_lock = threading.Lock()
_MAX_TRACKED_TRAILS = 2000
# Routes cache
dynamic_routes_cache = TTLCache(maxsize=5000, ttl=7200)
routes_fetch_in_progress = False
_routes_lock = threading.Lock()
def _fetch_supplemental_sources(seen_hex: set) -> list:
"""Fetch from airplanes.live and adsb.fi to fill blind-spot gaps."""
global last_supplemental_fetch, cached_supplemental_flights
now = time.time()
if now - last_supplemental_fetch < _SUPPLEMENTAL_FETCH_INTERVAL:
return [f for f in cached_supplemental_flights
if f.get("hex", "").lower().strip() not in seen_hex]
new_supplemental = []
supplemental_hex = set()
def _fetch_airplaneslive(region):
try:
url = (f"https://api.airplanes.live/v2/point/"
f"{region['lat']}/{region['lon']}/{region['radius_nm']}")
res = fetch_with_curl(url, timeout=10)
if res.status_code == 200:
data = res.json()
return data.get("ac", [])
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.debug(f"airplanes.live {region['name']} failed: {e}")
return []
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool:
results = list(pool.map(_fetch_airplaneslive, _BLIND_SPOT_REGIONS))
for region_flights in results:
for f in region_flights:
h = f.get("hex", "").lower().strip()
if h and h not in seen_hex and h not in supplemental_hex:
f["supplemental_source"] = "airplanes.live"
new_supplemental.append(f)
supplemental_hex.add(h)
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.warning(f"airplanes.live supplemental fetch failed: {e}")
ap_count = len(new_supplemental)
try:
for region in _BLIND_SPOT_REGIONS:
try:
url = (f"https://opendata.adsb.fi/api/v3/lat/"
f"{region['lat']}/lon/{region['lon']}/dist/{region['radius_nm']}")
res = fetch_with_curl(url, timeout=10)
if res.status_code == 200:
data = res.json()
for f in data.get("ac", []):
h = f.get("hex", "").lower().strip()
if h and h not in seen_hex and h not in supplemental_hex:
f["supplemental_source"] = "adsb.fi"
new_supplemental.append(f)
supplemental_hex.add(h)
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.debug(f"adsb.fi {region['name']} failed: {e}")
time.sleep(1.1)
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.warning(f"adsb.fi supplemental fetch failed: {e}")
fi_count = len(new_supplemental) - ap_count
cached_supplemental_flights = new_supplemental
last_supplemental_fetch = now
if new_supplemental:
_mark_fresh("supplemental_flights")
logger.info(f"Supplemental: +{len(new_supplemental)} new aircraft from blind-spot "
f"hotspots (airplanes.live: {ap_count}, adsb.fi: {fi_count})")
return new_supplemental
def fetch_routes_background(sampled):
global routes_fetch_in_progress
with _routes_lock:
if routes_fetch_in_progress:
return
routes_fetch_in_progress = True
try:
callsigns_to_query = []
for f in sampled:
c_sign = str(f.get("flight", "")).strip()
if c_sign and c_sign != "UNKNOWN":
callsigns_to_query.append({
"callsign": c_sign,
"lat": f.get("lat", 0),
"lng": f.get("lon", 0)
})
batch_size = 100
batches = [callsigns_to_query[i:i+batch_size] for i in range(0, len(callsigns_to_query), batch_size)]
for batch in batches:
try:
r = fetch_with_curl("https://api.adsb.lol/api/0/routeset", method="POST", json_data={"planes": batch}, timeout=15)
if r.status_code == 200:
route_data = r.json()
route_list = []
if isinstance(route_data, dict):
route_list = route_data.get("value", [])
elif isinstance(route_data, list):
route_list = route_data
for route in route_list:
callsign = route.get("callsign", "")
airports = route.get("_airports", [])
if airports and len(airports) >= 2:
orig_apt = airports[0]
dest_apt = airports[-1]
with _routes_lock:
dynamic_routes_cache[callsign] = {
"orig_name": f"{orig_apt.get('iata', '')}: {orig_apt.get('name', 'Unknown')}",
"dest_name": f"{dest_apt.get('iata', '')}: {dest_apt.get('name', 'Unknown')}",
"orig_loc": [orig_apt.get("lon", 0), orig_apt.get("lat", 0)],
"dest_loc": [dest_apt.get("lon", 0), dest_apt.get("lat", 0)],
}
time.sleep(0.25)
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.debug(f"Route batch request failed: {e}")
finally:
with _routes_lock:
routes_fetch_in_progress = False
def _classify_and_publish(all_adsb_flights):
"""Shared pipeline: normalize raw ADS-B data → classify → merge → publish to latest_data.
Called once immediately after adsb.lol returns (fast path, ~3-5s),
then again after OpenSky + supplemental gap-fill enrichment.
"""
flights = []
if not all_adsb_flights:
return
with _routes_lock:
already_running = routes_fetch_in_progress
if not already_running:
threading.Thread(target=fetch_routes_background, args=(all_adsb_flights,), daemon=True).start()
for f in all_adsb_flights:
try:
lat = f.get("lat")
lng = f.get("lon")
heading = f.get("track") or 0
if lat is None or lng is None:
continue
flight_str = str(f.get("flight", "UNKNOWN")).strip()
if not flight_str or flight_str == "UNKNOWN":
flight_str = str(f.get("hex", "Unknown"))
origin_loc = None
dest_loc = None
origin_name = "UNKNOWN"
dest_name = "UNKNOWN"
with _routes_lock:
cached_route = dynamic_routes_cache.get(flight_str)
if cached_route:
origin_name = cached_route["orig_name"]
dest_name = cached_route["dest_name"]
origin_loc = cached_route["orig_loc"]
dest_loc = cached_route["dest_loc"]
airline_code = ""
match = _RE_AIRLINE_CODE_1.match(flight_str)
if not match:
match = _RE_AIRLINE_CODE_2.match(flight_str)
if match:
airline_code = match.group(1)
alt_raw = f.get("alt_baro")
alt_value = 0
if isinstance(alt_raw, (int, float)):
alt_value = alt_raw * 0.3048
gs_knots = f.get("gs")
speed_knots = round(gs_knots, 1) if isinstance(gs_knots, (int, float)) else None
model_upper = f.get("t", "").upper()
if model_upper == "TWR":
continue
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
flights.append({
"callsign": flight_str,
"country": f.get("r", "N/A"),
"lng": float(lng),
"lat": float(lat),
"alt": alt_value,
"heading": heading,
"type": "flight",
"origin_loc": origin_loc,
"dest_loc": dest_loc,
"origin_name": origin_name,
"dest_name": dest_name,
"registration": f.get("r", "N/A"),
"model": f.get("t", "Unknown"),
"icao24": f.get("hex", ""),
"speed_knots": speed_knots,
"squawk": f.get("squawk", ""),
"airline_code": airline_code,
"aircraft_category": ac_category,
"nac_p": f.get("nac_p")
})
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
logger.error(f"Flight interpolation error: {loop_e}")
continue
# --- Classification ---
commercial = []
private_jets = []
private_ga = []
tracked = []
for f in flights:
enrich_with_plane_alert(f)
enrich_with_tracked_names(f)
callsign = f.get('callsign', '').strip().upper()
is_commercial_format = bool(re.match(r'^[A-Z]{3}\d{1,4}[A-Z]{0,2}$', callsign))
if f.get('alert_category'):
f['type'] = 'tracked_flight'
tracked.append(f)
elif f.get('airline_code') or is_commercial_format:
f['type'] = 'commercial_flight'
commercial.append(f)
elif f.get('model', '').upper() in PRIVATE_JET_TYPES:
f['type'] = 'private_jet'
private_jets.append(f)
else:
f['type'] = 'private_ga'
private_ga.append(f)
# --- Smart merge: protect against partial API failures ---
prev_commercial_count = len(latest_data.get('commercial_flights', []))
prev_total = prev_commercial_count + len(latest_data.get('private_jets', [])) + len(latest_data.get('private_flights', []))
new_total = len(commercial) + len(private_jets) + len(private_ga)
if new_total == 0:
logger.warning("No civilian flights found! Skipping overwrite to prevent clearing the map.")
elif prev_total > 100 and new_total < prev_total * 0.5:
logger.warning(f"Flight count dropped from {prev_total} to {new_total} (>50% loss). Keeping previous data to prevent flicker.")
else:
_now = time.time()
def _merge_category(new_list, old_list, max_stale_s=120):
by_icao = {}
for f in old_list:
icao = f.get('icao24', '')
if icao:
f.setdefault('_seen_at', _now)
if (_now - f.get('_seen_at', _now)) < max_stale_s:
by_icao[icao] = f
for f in new_list:
icao = f.get('icao24', '')
if icao:
f['_seen_at'] = _now
by_icao[icao] = f
else:
continue
return list(by_icao.values())
with _data_lock:
latest_data['commercial_flights'] = _merge_category(commercial, latest_data.get('commercial_flights', []))
latest_data['private_jets'] = _merge_category(private_jets, latest_data.get('private_jets', []))
latest_data['private_flights'] = _merge_category(private_ga, latest_data.get('private_flights', []))
_mark_fresh("commercial_flights", "private_jets", "private_flights")
with _data_lock:
if flights:
latest_data['flights'] = flights
# Merge tracked civilian flights with tracked military flights
with _data_lock:
existing_tracked = list(latest_data.get('tracked_flights', []))
fresh_tracked_map = {}
for t in tracked:
icao = t.get('icao24', '').upper()
if icao:
fresh_tracked_map[icao] = t
merged_tracked = []
seen_icaos = set()
for old_t in existing_tracked:
icao = old_t.get('icao24', '').upper()
if icao in fresh_tracked_map:
fresh = fresh_tracked_map[icao]
for key in ('alert_category', 'alert_operator', 'alert_special', 'alert_flag'):
if key in old_t and key not in fresh:
fresh[key] = old_t[key]
merged_tracked.append(fresh)
seen_icaos.add(icao)
else:
merged_tracked.append(old_t)
seen_icaos.add(icao)
for icao, t in fresh_tracked_map.items():
if icao not in seen_icaos:
merged_tracked.append(t)
with _data_lock:
latest_data['tracked_flights'] = merged_tracked
logger.info(f"Tracked flights: {len(merged_tracked)} total ({len(fresh_tracked_map)} fresh from civilian)")
# --- Trail Accumulation ---
def _accumulate_trail(f, now_ts, check_route=True):
hex_id = f.get('icao24', '').lower()
if not hex_id:
return 0, None
if check_route and f.get('origin_name', 'UNKNOWN') != 'UNKNOWN':
f['trail'] = []
return 0, hex_id
lat, lng, alt = f.get('lat'), f.get('lng'), f.get('alt', 0)
if lat is None or lng is None:
f['trail'] = flight_trails.get(hex_id, {}).get('points', [])
return 0, hex_id
point = [round(lat, 5), round(lng, 5), round(alt, 1), round(now_ts)]
if hex_id not in flight_trails:
flight_trails[hex_id] = {'points': [], 'last_seen': now_ts}
trail_data = flight_trails[hex_id]
if trail_data['points'] and trail_data['points'][-1][0] == point[0] and trail_data['points'][-1][1] == point[1]:
trail_data['last_seen'] = now_ts
else:
trail_data['points'].append(point)
trail_data['last_seen'] = now_ts
if len(trail_data['points']) > 200:
trail_data['points'] = trail_data['points'][-200:]
f['trail'] = trail_data['points']
return 1, hex_id
now_ts = datetime.utcnow().timestamp()
all_lists = [commercial, private_jets, private_ga, existing_tracked]
seen_hexes = set()
trail_count = 0
with _trails_lock:
for flist in all_lists:
for f in flist:
count, hex_id = _accumulate_trail(f, now_ts, check_route=True)
trail_count += count
if hex_id:
seen_hexes.add(hex_id)
for mf in latest_data.get('military_flights', []):
count, hex_id = _accumulate_trail(mf, now_ts, check_route=False)
trail_count += count
if hex_id:
seen_hexes.add(hex_id)
tracked_hexes = {t.get('icao24', '').lower() for t in latest_data.get('tracked_flights', [])}
stale_keys = []
for k, v in flight_trails.items():
cutoff = now_ts - 1800 if k in tracked_hexes else now_ts - 300
if v['last_seen'] < cutoff:
stale_keys.append(k)
for k in stale_keys:
del flight_trails[k]
if len(flight_trails) > _MAX_TRACKED_TRAILS:
sorted_keys = sorted(flight_trails.keys(), key=lambda k: flight_trails[k]['last_seen'])
evict_count = len(flight_trails) - _MAX_TRACKED_TRAILS
for k in sorted_keys[:evict_count]:
del flight_trails[k]
logger.info(f"Trail accumulation: {trail_count} active trails, {len(stale_keys)} pruned, {len(flight_trails)} total")
# --- GPS Jamming Detection ---
try:
jamming_grid = {}
raw_flights = latest_data.get('flights', [])
for rf in raw_flights:
rlat = rf.get('lat')
rlng = rf.get('lng') or rf.get('lon')
if rlat is None or rlng is None:
continue
nacp = rf.get('nac_p')
if nacp is None:
continue
grid_key = f"{int(rlat)},{int(rlng)}"
if grid_key not in jamming_grid:
jamming_grid[grid_key] = {"degraded": 0, "total": 0}
jamming_grid[grid_key]["total"] += 1
if nacp < 8:
jamming_grid[grid_key]["degraded"] += 1
jamming_zones = []
for gk, counts in jamming_grid.items():
if counts["total"] < 3:
continue
ratio = counts["degraded"] / counts["total"]
if ratio > 0.25:
lat_i, lng_i = gk.split(",")
severity = "low" if ratio < 0.5 else "medium" if ratio < 0.75 else "high"
jamming_zones.append({
"lat": int(lat_i) + 0.5,
"lng": int(lng_i) + 0.5,
"severity": severity,
"ratio": round(ratio, 2),
"degraded": counts["degraded"],
"total": counts["total"]
})
with _data_lock:
latest_data['gps_jamming'] = jamming_zones
if jamming_zones:
logger.info(f"GPS Jamming: {len(jamming_zones)} interference zones detected")
except (ValueError, TypeError, KeyError, ZeroDivisionError) as e:
logger.error(f"GPS Jamming detection error: {e}")
with _data_lock:
latest_data['gps_jamming'] = []
# --- Holding Pattern Detection ---
try:
holding_count = 0
all_flight_lists = [commercial, private_jets, private_ga,
latest_data.get('tracked_flights', []),
latest_data.get('military_flights', [])]
with _trails_lock:
trails_snapshot = {k: v.get('points', [])[:] for k, v in flight_trails.items()}
for flist in all_flight_lists:
for f in flist:
hex_id = f.get('icao24', '').lower()
trail = trails_snapshot.get(hex_id, [])
if len(trail) < 6:
f['holding'] = False
continue
pts = trail[-8:]
total_turn = 0.0
prev_bearing = 0.0
for i in range(1, len(pts)):
lat1, lng1 = math.radians(pts[i-1][0]), math.radians(pts[i-1][1])
lat2, lng2 = math.radians(pts[i][0]), math.radians(pts[i][1])
dlng = lng2 - lng1
x = math.sin(dlng) * math.cos(lat2)
y = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(dlng)
bearing = math.degrees(math.atan2(x, y)) % 360
if i > 1:
delta = abs(bearing - prev_bearing)
if delta > 180:
delta = 360 - delta
total_turn += delta
prev_bearing = bearing
f['holding'] = total_turn > 300
if f['holding']:
holding_count += 1
if holding_count:
logger.info(f"Holding patterns: {holding_count} aircraft circling")
except (ValueError, TypeError, KeyError, ZeroDivisionError) as e:
logger.error(f"Holding pattern detection error: {e}")
with _data_lock:
latest_data['last_updated'] = datetime.utcnow().isoformat()
def _fetch_adsb_lol_regions():
"""Fetch all adsb.lol regions in parallel (~3-5s). Returns raw aircraft list."""
regions = [
{"lat": 39.8, "lon": -98.5, "dist": 2000},
{"lat": 50.0, "lon": 15.0, "dist": 2000},
{"lat": 35.0, "lon": 105.0, "dist": 2000},
{"lat": -25.0, "lon": 133.0, "dist": 2000},
{"lat": 0.0, "lon": 20.0, "dist": 2500},
{"lat": -15.0, "lon": -60.0, "dist": 2000}
]
def _fetch_region(r):
url = f"https://api.adsb.lol/v2/lat/{r['lat']}/lon/{r['lon']}/dist/{r['dist']}"
try:
res = fetch_with_curl(url, timeout=10)
if res.status_code == 200:
data = res.json()
return data.get("ac", [])
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.warning(f"Region fetch failed for lat={r['lat']}: {e}")
return []
all_flights = []
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as pool:
results = pool.map(_fetch_region, regions)
for region_flights in results:
all_flights.extend(region_flights)
return all_flights
def _enrich_with_opensky_and_supplemental(adsb_flights):
"""Slow enrichment: merge OpenSky gap-fill + supplemental sources, then re-publish.
Runs in a background thread so the initial adsb.lol data is already visible.
"""
try:
seen_hex = set()
for f in adsb_flights:
h = f.get("hex")
if h:
seen_hex.add(h.lower().strip())
all_flights = list(adsb_flights) # copy to avoid mutating the original
# OpenSky Regional Fallback
now = time.time()
global last_opensky_fetch, cached_opensky_flights
if now - last_opensky_fetch > 300:
token = opensky_client.get_token()
if token:
opensky_regions = [
{"name": "Africa", "bbox": {"lamin": -35.0, "lomin": -20.0, "lamax": 38.0, "lomax": 55.0}},
{"name": "Asia", "bbox": {"lamin": 0.0, "lomin": 30.0, "lamax": 75.0, "lomax": 150.0}},
{"name": "South America", "bbox": {"lamin": -60.0, "lomin": -95.0, "lamax": 15.0, "lomax": -30.0}}
]
new_opensky_flights = []
for os_reg in opensky_regions:
try:
bb = os_reg["bbox"]
os_url = f"https://opensky-network.org/api/states/all?lamin={bb['lamin']}&lomin={bb['lomin']}&lamax={bb['lamax']}&lomax={bb['lomax']}"
headers = {"Authorization": f"Bearer {token}"}
os_res = requests.get(os_url, headers=headers, timeout=15)
if os_res.status_code == 200:
os_data = os_res.json()
states = os_data.get("states") or []
logger.info(f"OpenSky: Fetched {len(states)} states for {os_reg['name']}")
for s in states:
new_opensky_flights.append({
"hex": s[0],
"flight": s[1].strip() if s[1] else "UNKNOWN",
"r": s[2],
"lon": s[5],
"lat": s[6],
"alt_baro": (s[7] * 3.28084) if s[7] else 0,
"track": s[10] or 0,
"gs": (s[9] * 1.94384) if s[9] else 0,
"t": "Unknown",
"is_opensky": True
})
else:
logger.warning(f"OpenSky API {os_reg['name']} failed: {os_res.status_code}")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as ex:
logger.error(f"OpenSky fetching error for {os_reg['name']}: {ex}")
cached_opensky_flights = new_opensky_flights
last_opensky_fetch = now
# Merge OpenSky (dedup by hex)
for osf in cached_opensky_flights:
h = osf.get("hex")
if h and h.lower().strip() not in seen_hex:
all_flights.append(osf)
seen_hex.add(h.lower().strip())
# Supplemental gap-fill
try:
gap_fill = _fetch_supplemental_sources(seen_hex)
for f in gap_fill:
all_flights.append(f)
h = f.get("hex", "").lower().strip()
if h:
seen_hex.add(h)
if gap_fill:
logger.info(f"Gap-fill: added {len(gap_fill)} aircraft to pipeline")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.warning(f"Supplemental source fetch failed (non-fatal): {e}")
# Re-publish with enriched data
if len(all_flights) > len(adsb_flights):
logger.info(f"Enrichment: {len(all_flights) - len(adsb_flights)} additional aircraft from OpenSky + supplemental")
_classify_and_publish(all_flights)
except Exception as e:
logger.error(f"OpenSky/supplemental enrichment error: {e}")
@with_retry(max_retries=1, base_delay=1)
def fetch_flights():
"""Two-phase flight fetching:
Phase 1 (fast): Fetch adsb.lol classify publish immediately (~3-5s)
Phase 2 (background): Merge OpenSky + supplemental re-publish (~15-30s)
"""
try:
# Phase 1: adsb.lol — fast, parallel, publish immediately
adsb_flights = _fetch_adsb_lol_regions()
if adsb_flights:
logger.info(f"adsb.lol: {len(adsb_flights)} aircraft — publishing immediately")
_classify_and_publish(adsb_flights)
# Phase 2: kick off slow enrichment in background
threading.Thread(
target=_enrich_with_opensky_and_supplemental,
args=(adsb_flights,),
daemon=True,
).start()
else:
logger.warning("adsb.lol returned 0 aircraft")
except Exception as e:
logger.error(f"Error fetching flights: {e}")
-161
View File
@@ -1,161 +0,0 @@
"""Ship and geopolitics fetchers — AIS vessels, carriers, frontlines, GDELT, LiveUAmap."""
import csv
import io
import math
import logging
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Ships (AIS + Carriers)
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=1)
def fetch_ships():
"""Fetch real-time AIS vessel data and combine with OSINT carrier positions."""
from services.ais_stream import get_ais_vessels
from services.carrier_tracker import get_carrier_positions
ships = []
try:
carriers = get_carrier_positions()
ships.extend(carriers)
except Exception as e:
logger.error(f"Carrier tracker error (non-fatal): {e}")
carriers = []
try:
ais_vessels = get_ais_vessels()
ships.extend(ais_vessels)
except Exception as e:
logger.error(f"AIS stream error (non-fatal): {e}")
ais_vessels = []
# Enrich ships with yacht alert data (tracked superyachts)
from services.fetchers.yacht_alert import enrich_with_yacht_alert
for ship in ships:
enrich_with_yacht_alert(ship)
logger.info(f"Ships: {len(carriers)} carriers + {len(ais_vessels)} AIS vessels")
with _data_lock:
latest_data['ships'] = ships
_mark_fresh("ships")
# ---------------------------------------------------------------------------
# Airports (ourairports.com)
# ---------------------------------------------------------------------------
cached_airports = []
def find_nearest_airport(lat, lng, max_distance_nm=200):
"""Find the nearest large airport to a given lat/lng using haversine distance."""
if not cached_airports:
return None
best = None
best_dist = float('inf')
lat_r = math.radians(lat)
lng_r = math.radians(lng)
for apt in cached_airports:
apt_lat_r = math.radians(apt['lat'])
apt_lng_r = math.radians(apt['lng'])
dlat = apt_lat_r - lat_r
dlng = apt_lng_r - lng_r
a = math.sin(dlat / 2) ** 2 + math.cos(lat_r) * math.cos(apt_lat_r) * math.sin(dlng / 2) ** 2
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
dist_nm = 3440.065 * c
if dist_nm < best_dist:
best_dist = dist_nm
best = apt
if best and best_dist <= max_distance_nm:
return {
"iata": best['iata'], "name": best['name'],
"lat": best['lat'], "lng": best['lng'],
"distance_nm": round(best_dist, 1)
}
return None
def fetch_airports():
global cached_airports
if not cached_airports:
logger.info("Downloading global airports database from ourairports.com...")
try:
url = "https://ourairports.com/data/airports.csv"
response = fetch_with_curl(url, timeout=15)
if response.status_code == 200:
f = io.StringIO(response.text)
reader = csv.DictReader(f)
for row in reader:
if row['type'] == 'large_airport' and row['iata_code']:
cached_airports.append({
"id": row['ident'],
"name": row['name'],
"iata": row['iata_code'],
"lat": float(row['latitude_deg']),
"lng": float(row['longitude_deg']),
"type": "airport"
})
logger.info(f"Loaded {len(cached_airports)} large airports into cache.")
except Exception as e:
logger.error(f"Error fetching airports: {e}")
with _data_lock:
latest_data['airports'] = cached_airports
# ---------------------------------------------------------------------------
# Geopolitics & LiveUAMap
# ---------------------------------------------------------------------------
@with_retry(max_retries=1, base_delay=2)
def fetch_frontlines():
"""Fetch Ukraine frontline data (fast — single GitHub API call)."""
try:
from services.geopolitics import fetch_ukraine_frontlines
frontlines = fetch_ukraine_frontlines()
if frontlines:
with _data_lock:
latest_data['frontlines'] = frontlines
_mark_fresh("frontlines")
except Exception as e:
logger.error(f"Error fetching frontlines: {e}")
@with_retry(max_retries=1, base_delay=3)
def fetch_gdelt():
"""Fetch GDELT global military incidents (slow — downloads 32 ZIP files)."""
try:
from services.geopolitics import fetch_global_military_incidents
gdelt = fetch_global_military_incidents()
if gdelt is not None:
with _data_lock:
latest_data['gdelt'] = gdelt
_mark_fresh("gdelt")
except Exception as e:
logger.error(f"Error fetching GDELT: {e}")
def fetch_geopolitics():
"""Legacy wrapper — runs both sequentially. Used by recurring scheduler."""
fetch_frontlines()
fetch_gdelt()
def update_liveuamap():
logger.info("Running scheduled Liveuamap scraper...")
try:
from services.liveuamap_scraper import fetch_liveuamap
res = fetch_liveuamap()
if res:
with _data_lock:
latest_data['liveuamap'] = res
_mark_fresh("liveuamap")
except Exception as e:
logger.error(f"Liveuamap scraper error: {e}")
-176
View File
@@ -1,176 +0,0 @@
"""Infrastructure fetchers — internet outages (IODA), data centers, CCTV, KiwiSDR."""
import json
import time
import heapq
import logging
from pathlib import Path
from cachetools import TTLCache
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Internet Outages (IODA — Georgia Tech)
# ---------------------------------------------------------------------------
_region_geocode_cache: TTLCache = TTLCache(maxsize=2000, ttl=86400)
def _geocode_region(region_name: str, country_name: str) -> tuple:
"""Geocode a region using OpenStreetMap Nominatim (cached, respects rate limit)."""
cache_key = f"{region_name}|{country_name}"
if cache_key in _region_geocode_cache:
return _region_geocode_cache[cache_key]
try:
import urllib.parse
query = urllib.parse.quote(f"{region_name}, {country_name}")
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
if response.status_code == 200:
results = response.json()
if results:
lat = float(results[0]["lat"])
lon = float(results[0]["lon"])
_region_geocode_cache[cache_key] = (lat, lon)
return (lat, lon)
except Exception:
pass
_region_geocode_cache[cache_key] = None
return None
@with_retry(max_retries=1, base_delay=1)
def fetch_internet_outages():
"""Fetch regional internet outage alerts from IODA (Georgia Tech)."""
RELIABLE_DATASOURCES = {"bgp", "ping-slash24"}
outages = []
try:
now = int(time.time())
start = now - 86400
url = f"https://api.ioda.inetintel.cc.gatech.edu/v2/outages/alerts?from={start}&until={now}&limit=500"
response = fetch_with_curl(url, timeout=15)
if response.status_code == 200:
data = response.json()
alerts = data.get("data", [])
region_outages = {}
for alert in alerts:
entity = alert.get("entity", {})
etype = entity.get("type", "")
level = alert.get("level", "")
if level == "normal" or etype != "region":
continue
datasource = alert.get("datasource", "")
if datasource not in RELIABLE_DATASOURCES:
continue
code = entity.get("code", "")
name = entity.get("name", "")
attrs = entity.get("attrs", {})
country_code = attrs.get("country_code", "")
country_name = attrs.get("country_name", "")
value = alert.get("value", 0)
history_value = alert.get("historyValue", 0)
severity = 0
if history_value and history_value > 0:
severity = round((1 - value / history_value) * 100)
severity = max(0, min(severity, 100))
if severity < 10:
continue
if code not in region_outages or severity > region_outages[code]["severity"]:
region_outages[code] = {
"region_code": code,
"region_name": name,
"country_code": country_code,
"country_name": country_name,
"level": level,
"datasource": datasource,
"severity": severity,
}
geocoded = []
for rcode, r in region_outages.items():
coords = _geocode_region(r["region_name"], r["country_name"])
if coords:
r["lat"] = coords[0]
r["lng"] = coords[1]
geocoded.append(r)
outages = heapq.nlargest(100, geocoded, key=lambda x: x["severity"])
logger.info(f"Internet outages: {len(outages)} regions affected")
except Exception as e:
logger.error(f"Error fetching internet outages: {e}")
with _data_lock:
latest_data["internet_outages"] = outages
if outages:
_mark_fresh("internet_outages")
# ---------------------------------------------------------------------------
# Data Centers (local geocoded JSON)
# ---------------------------------------------------------------------------
_DC_GEOCODED_PATH = Path(__file__).parent.parent.parent / "data" / "datacenters_geocoded.json"
def fetch_datacenters():
"""Load geocoded data centers (5K+ street-level precise locations)."""
dcs = []
try:
if not _DC_GEOCODED_PATH.exists():
logger.warning(f"Geocoded DC file not found: {_DC_GEOCODED_PATH}")
return
raw = json.loads(_DC_GEOCODED_PATH.read_text(encoding="utf-8"))
for entry in raw:
lat = entry.get("lat")
lng = entry.get("lng")
if lat is None or lng is None:
continue
if not (-90 <= lat <= 90 and -180 <= lng <= 180):
continue
dcs.append({
"name": entry.get("name", "Unknown"),
"company": entry.get("company", ""),
"street": entry.get("street", ""),
"city": entry.get("city", ""),
"country": entry.get("country", ""),
"zip": entry.get("zip", ""),
"lat": lat, "lng": lng,
})
logger.info(f"Data centers: {len(dcs)} geocoded locations loaded")
except Exception as e:
logger.error(f"Error loading data centers: {e}")
with _data_lock:
latest_data["datacenters"] = dcs
if dcs:
_mark_fresh("datacenters")
# ---------------------------------------------------------------------------
# CCTV Cameras
# ---------------------------------------------------------------------------
def fetch_cctv():
try:
from services.cctv_pipeline import get_all_cameras
cameras = get_all_cameras()
with _data_lock:
latest_data["cctv"] = cameras
_mark_fresh("cctv")
except Exception as e:
logger.error(f"Error fetching cctv from DB: {e}")
with _data_lock:
latest_data["cctv"] = []
# ---------------------------------------------------------------------------
# KiwiSDR Receivers
# ---------------------------------------------------------------------------
@with_retry(max_retries=2, base_delay=2)
def fetch_kiwisdr():
try:
from services.kiwisdr_fetcher import fetch_kiwisdr_nodes
nodes = fetch_kiwisdr_nodes()
with _data_lock:
latest_data["kiwisdr"] = nodes
_mark_fresh("kiwisdr")
except Exception as e:
logger.error(f"Error fetching KiwiSDR nodes: {e}")
with _data_lock:
latest_data["kiwisdr"] = []
-220
View File
@@ -1,220 +0,0 @@
"""Military flight tracking and UAV detection from ADS-B data."""
import json
import logging
import requests
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.plane_alert import enrich_with_plane_alert
logger = logging.getLogger("services.data_fetcher")
# ---------------------------------------------------------------------------
# UAV classification — filters military drone transponders
# ---------------------------------------------------------------------------
_UAV_TYPE_CODES = {"Q9", "R4", "TB2", "MALE", "HALE", "HERM", "HRON"}
_UAV_CALLSIGN_PREFIXES = ("FORTE", "GHAWK", "REAP", "BAMS", "UAV", "UAS")
_UAV_MODEL_KEYWORDS = ("RQ-", "MQ-", "RQ4", "MQ9", "MQ4", "MQ1", "REAPER", "GLOBALHAWK", "TRITON", "PREDATOR", "HERMES", "HERON", "BAYRAKTAR")
_UAV_WIKI = {
"RQ4": "https://en.wikipedia.org/wiki/Northrop_Grumman_RQ-4_Global_Hawk",
"RQ-4": "https://en.wikipedia.org/wiki/Northrop_Grumman_RQ-4_Global_Hawk",
"MQ4": "https://en.wikipedia.org/wiki/Northrop_Grumman_MQ-4C_Triton",
"MQ-4": "https://en.wikipedia.org/wiki/Northrop_Grumman_MQ-4C_Triton",
"MQ9": "https://en.wikipedia.org/wiki/General_Atomics_MQ-9_Reaper",
"MQ-9": "https://en.wikipedia.org/wiki/General_Atomics_MQ-9_Reaper",
"MQ1": "https://en.wikipedia.org/wiki/General_Atomics_MQ-1C_Gray_Eagle",
"MQ-1": "https://en.wikipedia.org/wiki/General_Atomics_MQ-1C_Gray_Eagle",
"REAPER": "https://en.wikipedia.org/wiki/General_Atomics_MQ-9_Reaper",
"GLOBALHAWK": "https://en.wikipedia.org/wiki/Northrop_Grumman_RQ-4_Global_Hawk",
"TRITON": "https://en.wikipedia.org/wiki/Northrop_Grumman_MQ-4C_Triton",
"PREDATOR": "https://en.wikipedia.org/wiki/General_Atomics_MQ-1_Predator",
"HERMES": "https://en.wikipedia.org/wiki/Elbit_Hermes_900",
"HERON": "https://en.wikipedia.org/wiki/IAI_Heron",
"BAYRAKTAR": "https://en.wikipedia.org/wiki/Bayraktar_TB2",
}
def _classify_uav(model: str, callsign: str):
"""Check if an aircraft is a UAV based on type code, callsign prefix, or model keywords.
Returns (is_uav, uav_type, wiki_url) or (False, None, None)."""
model_up = model.upper().replace(" ", "")
callsign_up = callsign.upper().strip()
if model_up in _UAV_TYPE_CODES:
uav_type = "HALE Surveillance" if model_up in ("R4", "HALE") else "MALE ISR"
wiki = _UAV_WIKI.get(model_up, "")
return True, uav_type, wiki
for prefix in _UAV_CALLSIGN_PREFIXES:
if callsign_up.startswith(prefix):
uav_type = "HALE Surveillance" if prefix in ("FORTE", "GHAWK", "BAMS") else "MALE ISR"
wiki = _UAV_WIKI.get(prefix, "")
if prefix == "FORTE":
wiki = _UAV_WIKI["RQ4"]
elif prefix == "BAMS":
wiki = _UAV_WIKI["MQ4"]
return True, uav_type, wiki
for kw in _UAV_MODEL_KEYWORDS:
if kw in model_up:
if any(h in model_up for h in ("RQ4", "RQ-4", "GLOBALHAWK")):
return True, "HALE Surveillance", _UAV_WIKI.get(kw, "")
elif any(h in model_up for h in ("MQ4", "MQ-4", "TRITON")):
return True, "HALE Maritime Surveillance", _UAV_WIKI.get(kw, "")
elif any(h in model_up for h in ("MQ9", "MQ-9", "REAPER")):
return True, "MALE Strike/ISR", _UAV_WIKI.get(kw, "")
elif any(h in model_up for h in ("MQ1", "MQ-1", "PREDATOR")):
return True, "MALE ISR/Strike", _UAV_WIKI.get(kw, "")
elif "BAYRAKTAR" in model_up or "TB2" in model_up:
return True, "MALE Strike", _UAV_WIKI.get("BAYRAKTAR", "")
elif "HERMES" in model_up:
return True, "MALE ISR", _UAV_WIKI.get("HERMES", "")
elif "HERON" in model_up:
return True, "MALE ISR", _UAV_WIKI.get("HERON", "")
return True, "MALE ISR", _UAV_WIKI.get(kw, "")
return False, None, None
def fetch_military_flights():
military_flights = []
detected_uavs = []
try:
url = "https://api.adsb.lol/v2/mil"
response = fetch_with_curl(url, timeout=10)
if response.status_code == 200:
ac = response.json().get('ac', [])
for f in ac:
try:
lat = f.get("lat")
lng = f.get("lon")
heading = f.get("track") or 0
if lat is None or lng is None:
continue
model = str(f.get("t", "UNKNOWN")).upper()
callsign = str(f.get("flight", "MIL-UNKN")).strip()
if model == "TWR":
continue
alt_raw = f.get("alt_baro")
alt_value = 0
if isinstance(alt_raw, (int, float)):
alt_value = alt_raw * 0.3048
gs_knots = f.get("gs")
speed_knots = round(gs_knots, 1) if isinstance(gs_knots, (int, float)) else None
is_uav, uav_type, wiki_url = _classify_uav(model, callsign)
if is_uav:
detected_uavs.append({
"id": f"uav-{f.get('hex', '')}",
"callsign": callsign,
"aircraft_model": f.get("t", "Unknown"),
"lat": float(lat),
"lng": float(lng),
"alt": alt_value,
"heading": heading,
"speed_knots": speed_knots,
"country": f.get("flag", "Unknown"),
"uav_type": uav_type,
"wiki": wiki_url or "",
"type": "uav",
"registration": f.get("r", "N/A"),
"icao24": f.get("hex", ""),
"squawk": f.get("squawk", ""),
})
continue
mil_cat = "default"
if "H" in model and any(c.isdigit() for c in model):
mil_cat = "heli"
elif any(k in model for k in ["K35", "K46", "A33"]):
mil_cat = "tanker"
elif any(k in model for k in ["F16", "F35", "F22", "F15", "F18", "T38", "T6", "A10"]):
mil_cat = "fighter"
elif any(k in model for k in ["C17", "C5", "C130", "C30", "A400", "V22"]):
mil_cat = "cargo"
elif any(k in model for k in ["P8", "E3", "E8", "U2"]):
mil_cat = "recon"
military_flights.append({
"callsign": callsign,
"country": f.get("flag", "Military Asset"),
"lng": float(lng),
"lat": float(lat),
"alt": alt_value,
"heading": heading,
"type": "military_flight",
"military_type": mil_cat,
"origin_loc": None,
"dest_loc": None,
"origin_name": "UNKNOWN",
"dest_name": "UNKNOWN",
"registration": f.get("r", "N/A"),
"model": f.get("t", "Unknown"),
"icao24": f.get("hex", ""),
"speed_knots": speed_knots,
"squawk": f.get("squawk", "")
})
except Exception as loop_e:
logger.error(f"Mil flight interpolation error: {loop_e}")
continue
except Exception as e:
logger.error(f"Error fetching military flights: {e}")
if not military_flights and not detected_uavs:
logger.warning("No military flights retrieved — keeping previous data if available")
with _data_lock:
if latest_data.get('military_flights'):
return
with _data_lock:
latest_data['military_flights'] = military_flights
latest_data['uavs'] = detected_uavs
_mark_fresh("military_flights", "uavs")
logger.info(f"UAVs: {len(detected_uavs)} real drones detected via ADS-B")
# Cross-reference military flights with Plane-Alert DB
tracked_mil = []
remaining_mil = []
for mf in military_flights:
enrich_with_plane_alert(mf)
if mf.get('alert_category'):
mf['type'] = 'tracked_flight'
tracked_mil.append(mf)
else:
remaining_mil.append(mf)
with _data_lock:
latest_data['military_flights'] = remaining_mil
# Store tracked military flights — update positions for existing entries
with _data_lock:
existing_tracked = list(latest_data.get('tracked_flights', []))
fresh_mil_map = {}
for t in tracked_mil:
icao = t.get('icao24', '').upper()
if icao:
fresh_mil_map[icao] = t
updated_tracked = []
seen_icaos = set()
for old_t in existing_tracked:
icao = old_t.get('icao24', '').upper()
if icao in fresh_mil_map:
fresh = fresh_mil_map[icao]
for key in ('alert_category', 'alert_operator', 'alert_special', 'alert_flag'):
if key in old_t and key not in fresh:
fresh[key] = old_t[key]
updated_tracked.append(fresh)
seen_icaos.add(icao)
else:
updated_tracked.append(old_t)
seen_icaos.add(icao)
for icao, t in fresh_mil_map.items():
if icao not in seen_icaos:
updated_tracked.append(t)
with _data_lock:
latest_data['tracked_flights'] = updated_tracked
logger.info(f"Tracked flights: {len(updated_tracked)} total ({len(tracked_mil)} from military)")
-223
View File
@@ -1,223 +0,0 @@
"""News fetching, geocoding, clustering, and risk assessment."""
import re
import logging
import concurrent.futures
import requests
import feedparser
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
logger = logging.getLogger("services.data_fetcher")
# Keyword -> coordinate mapping for geocoding news articles
_KEYWORD_COORDS = {
"venezuela": (7.119, -66.589),
"brazil": (-14.235, -51.925),
"argentina": (-38.416, -63.616),
"colombia": (4.570, -74.297),
"mexico": (23.634, -102.552),
"united states": (38.907, -77.036),
" usa ": (38.907, -77.036),
" us ": (38.907, -77.036),
"washington": (38.907, -77.036),
"canada": (56.130, -106.346),
"ukraine": (49.487, 31.272),
"kyiv": (50.450, 30.523),
"russia": (61.524, 105.318),
"moscow": (55.755, 37.617),
"israel": (31.046, 34.851),
"gaza": (31.416, 34.333),
"iran": (32.427, 53.688),
"lebanon": (33.854, 35.862),
"syria": (34.802, 38.996),
"yemen": (15.552, 48.516),
"china": (35.861, 104.195),
"beijing": (39.904, 116.407),
"taiwan": (23.697, 120.960),
"north korea": (40.339, 127.510),
"south korea": (35.907, 127.766),
"pyongyang": (39.039, 125.762),
"seoul": (37.566, 126.978),
"japan": (36.204, 138.252),
"tokyo": (35.676, 139.650),
"afghanistan": (33.939, 67.709),
"pakistan": (30.375, 69.345),
"india": (20.593, 78.962),
" uk ": (55.378, -3.435),
"london": (51.507, -0.127),
"france": (46.227, 2.213),
"paris": (48.856, 2.352),
"germany": (51.165, 10.451),
"berlin": (52.520, 13.405),
"sudan": (12.862, 30.217),
"congo": (-4.038, 21.758),
"south africa": (-30.559, 22.937),
"nigeria": (9.082, 8.675),
"egypt": (26.820, 30.802),
"zimbabwe": (-19.015, 29.154),
"kenya": (-1.292, 36.821),
"libya": (26.335, 17.228),
"mali": (17.570, -3.996),
"niger": (17.607, 8.081),
"somalia": (5.152, 46.199),
"ethiopia": (9.145, 40.489),
"australia": (-25.274, 133.775),
"middle east": (31.500, 34.800),
"europe": (48.800, 2.300),
"africa": (0.000, 25.000),
"america": (38.900, -77.000),
"south america": (-14.200, -51.900),
"asia": (34.000, 100.000),
"california": (36.778, -119.417),
"texas": (31.968, -99.901),
"florida": (27.994, -81.760),
"new york": (40.712, -74.006),
"virginia": (37.431, -78.656),
"british columbia": (53.726, -127.647),
"ontario": (51.253, -85.323),
"quebec": (52.939, -73.549),
"delhi": (28.704, 77.102),
"new delhi": (28.613, 77.209),
"mumbai": (19.076, 72.877),
"shanghai": (31.230, 121.473),
"hong kong": (22.319, 114.169),
"istanbul": (41.008, 28.978),
"dubai": (25.204, 55.270),
"singapore": (1.352, 103.819),
"bangkok": (13.756, 100.501),
"jakarta": (-6.208, 106.845),
}
@with_retry(max_retries=1, base_delay=2)
def fetch_news():
from services.news_feed_config import get_feeds
feed_config = get_feeds()
feeds = {f["name"]: f["url"] for f in feed_config}
source_weights = {f["name"]: f["weight"] for f in feed_config}
clusters = {}
_cluster_grid = {}
def _fetch_feed(item):
source_name, url = item
try:
xml_data = fetch_with_curl(url, timeout=10).text
return source_name, feedparser.parse(xml_data)
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.warning(f"Feed {source_name} failed: {e}")
return source_name, None
with concurrent.futures.ThreadPoolExecutor(max_workers=len(feeds)) as pool:
feed_results = list(pool.map(_fetch_feed, feeds.items()))
for source_name, feed in feed_results:
if not feed:
continue
for entry in feed.entries[:5]:
title = entry.get('title', '')
summary = entry.get('summary', '')
_seismic_kw = ["earthquake", "seismic", "quake", "tremor", "magnitude", "richter"]
_text_lower = (title + " " + summary).lower()
if any(kw in _text_lower for kw in _seismic_kw):
continue
if source_name == "GDACS":
alert_level = entry.get("gdacs_alertlevel", "Green")
if alert_level == "Red": risk_score = 10
elif alert_level == "Orange": risk_score = 7
else: risk_score = 4
else:
risk_keywords = ['war', 'missile', 'strike', 'attack', 'crisis', 'tension', 'military', 'conflict', 'defense', 'clash', 'nuclear']
text = (title + " " + summary).lower()
risk_score = 1
for kw in risk_keywords:
if kw in text:
risk_score += 2
risk_score = min(10, risk_score)
keyword_coords = _KEYWORD_COORDS
lat, lng = None, None
if 'georss_point' in entry:
geo_parts = entry['georss_point'].split()
if len(geo_parts) == 2:
lat, lng = float(geo_parts[0]), float(geo_parts[1])
elif 'where' in entry and hasattr(entry['where'], 'coordinates'):
coords = entry['where'].coordinates
lat, lng = coords[1], coords[0]
if lat is None:
# text may not be defined yet for GDACS path
text = (title + " " + summary).lower()
padded_text = f" {text} "
for kw, coords in keyword_coords.items():
if kw.startswith(" ") or kw.endswith(" "):
if kw in padded_text:
lat, lng = coords
break
else:
if re.search(r'\b' + re.escape(kw) + r'\b', text):
lat, lng = coords
break
if lat is not None:
key = None
cell_x, cell_y = int(lng // 4), int(lat // 4)
for dx in range(-1, 2):
for dy in range(-1, 2):
for ckey in _cluster_grid.get((cell_x + dx, cell_y + dy), []):
parts = ckey.split(",")
elat, elng = float(parts[0]), float(parts[1])
if ((lat - elat)**2 + (lng - elng)**2)**0.5 < 4.0:
key = ckey
break
if key:
break
if key:
break
if key is None:
key = f"{lat},{lng}"
_cluster_grid.setdefault((cell_x, cell_y), []).append(key)
else:
key = title
if key not in clusters:
clusters[key] = []
clusters[key].append({
"title": title,
"link": entry.get('link', ''),
"published": entry.get('published', ''),
"source": source_name,
"risk_score": risk_score,
"coords": [lat, lng] if lat is not None else None
})
news_items = []
for key, articles in clusters.items():
articles.sort(key=lambda x: (x['risk_score'], source_weights.get(x["source"], 0)), reverse=True)
max_risk = articles[0]['risk_score']
top_article = articles[0]
news_items.append({
"title": top_article["title"],
"link": top_article["link"],
"published": top_article["published"],
"source": top_article["source"],
"risk_score": max_risk,
"coords": top_article["coords"],
"cluster_count": len(articles),
"articles": articles,
"machine_assessment": None
})
news_items.sort(key=lambda x: x['risk_score'], reverse=True)
with _data_lock:
latest_data['news'] = news_items
_mark_fresh("news")
-205
View File
@@ -1,205 +0,0 @@
"""Plane-Alert DB — load and enrich aircraft with tracked metadata."""
import os
import json
import logging
logger = logging.getLogger("services.data_fetcher")
# Exact category -> color mapping for all 53 known categories.
# O(1) dict lookup — no keyword scanning, no false positives.
_CATEGORY_COLOR: dict[str, str] = {
# YELLOW — Military / Intelligence / Defense
"USAF": "yellow",
"Other Air Forces": "yellow",
"Toy Soldiers": "yellow",
"Oxcart": "yellow",
"United States Navy": "yellow",
"GAF": "yellow",
"Hired Gun": "yellow",
"United States Marine Corps": "yellow",
"Gunship": "yellow",
"RAF": "yellow",
"Other Navies": "yellow",
"Special Forces": "yellow",
"Zoomies": "yellow",
"Royal Navy Fleet Air Arm": "yellow",
"Army Air Corps": "yellow",
"Aerobatic Teams": "yellow",
"UAV": "yellow",
"Ukraine": "yellow",
"Nuclear": "yellow",
# LIME — Emergency / Medical / Rescue / Fire
"Flying Doctors": "#32cd32",
"Aerial Firefighter": "#32cd32",
"Coastguard": "#32cd32",
# BLUE — Government / Law Enforcement / Civil
"Police Forces": "blue",
"Governments": "blue",
"Quango": "blue",
"UK National Police Air Service": "blue",
"CAP": "blue",
# BLACK — Privacy / PIA
"PIA": "black",
# RED — Dictator / Oligarch
"Dictator Alert": "red",
"Da Comrade": "red",
"Oligarch": "red",
# HOT PINK — High Value Assets / VIP / Celebrity
"Head of State": "#ff1493",
"Royal Aircraft": "#ff1493",
"Don't you know who I am?": "#ff1493",
"As Seen on TV": "#ff1493",
"Bizjets": "#ff1493",
"Vanity Plate": "#ff1493",
"Football": "#ff1493",
# ORANGE — Joe Cool
"Joe Cool": "orange",
# WHITE — Climate Crisis
"Climate Crisis": "white",
# PURPLE — General Tracked / Other Notable
"Historic": "purple",
"Jump Johnny Jump": "purple",
"Ptolemy would be proud": "purple",
"Distinctive": "purple",
"Dogs with Jobs": "purple",
"You came here in that thing?": "purple",
"Big Hello": "purple",
"Watch Me Fly": "purple",
"Perfectly Serviceable Aircraft": "purple",
"Jesus he Knows me": "purple",
"Gas Bags": "purple",
"Radiohead": "purple",
}
def _category_to_color(cat: str) -> str:
"""O(1) exact lookup. Unknown categories default to purple."""
return _CATEGORY_COLOR.get(cat, "purple")
_PLANE_ALERT_DB: dict = {}
# ---------------------------------------------------------------------------
# POTUS Fleet — override colors and operator names for presidential aircraft.
# ---------------------------------------------------------------------------
_POTUS_FLEET: dict[str, dict] = {
"ADFDF8": {"color": "#ff1493", "operator": "Air Force One (82-8000)", "category": "Head of State", "wiki": "Air_Force_One", "fleet": "AF1"},
"ADFDF9": {"color": "#ff1493", "operator": "Air Force One (92-9000)", "category": "Head of State", "wiki": "Air_Force_One", "fleet": "AF1"},
"ADFEB7": {"color": "blue", "operator": "Air Force Two (98-0001)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"ADFEB8": {"color": "blue", "operator": "Air Force Two (98-0002)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"ADFEB9": {"color": "blue", "operator": "Air Force Two (99-0003)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"ADFEBA": {"color": "blue", "operator": "Air Force Two (99-0004)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"AE4AE6": {"color": "blue", "operator": "Air Force Two (09-0015)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"AE4AE8": {"color": "blue", "operator": "Air Force Two (09-0016)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"AE4AEA": {"color": "blue", "operator": "Air Force Two (09-0017)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"AE4AEC": {"color": "blue", "operator": "Air Force Two (19-0018)", "category": "Governments", "wiki": "Air_Force_Two", "fleet": "AF2"},
"AE0865": {"color": "#ff1493", "operator": "Marine One (VH-3D)", "category": "Head of State", "wiki": "Marine_One", "fleet": "M1"},
"AE5E76": {"color": "#ff1493", "operator": "Marine One (VH-92A)", "category": "Head of State", "wiki": "Marine_One", "fleet": "M1"},
"AE5E77": {"color": "#ff1493", "operator": "Marine One (VH-92A)", "category": "Head of State", "wiki": "Marine_One", "fleet": "M1"},
"AE5E79": {"color": "#ff1493", "operator": "Marine One (VH-92A)", "category": "Head of State", "wiki": "Marine_One", "fleet": "M1"},
}
def _load_plane_alert_db():
"""Load plane_alert_db.json (exported from SQLite) into memory."""
global _PLANE_ALERT_DB
json_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"data", "plane_alert_db.json"
)
if not os.path.exists(json_path):
logger.warning(f"Plane-Alert DB not found at {json_path}")
return
try:
with open(json_path, "r", encoding="utf-8") as fh:
raw = json.load(fh)
for icao_hex, info in raw.items():
info["color"] = _category_to_color(info.get("category", ""))
override = _POTUS_FLEET.get(icao_hex)
if override:
info["color"] = override["color"]
info["operator"] = override["operator"]
info["category"] = override["category"]
info["wiki"] = override.get("wiki", "")
info["potus_fleet"] = override.get("fleet", "")
_PLANE_ALERT_DB[icao_hex] = info
logger.info(f"Plane-Alert DB loaded: {len(_PLANE_ALERT_DB)} aircraft")
except (IOError, OSError, json.JSONDecodeError, ValueError, KeyError) as e:
logger.error(f"Failed to load Plane-Alert DB: {e}")
_load_plane_alert_db()
def enrich_with_plane_alert(flight: dict) -> dict:
"""If flight's icao24 is in the Plane-Alert DB, add alert metadata."""
icao = flight.get("icao24", "").strip().upper()
if icao and icao in _PLANE_ALERT_DB:
info = _PLANE_ALERT_DB[icao]
flight["alert_category"] = info["category"]
flight["alert_color"] = info["color"]
flight["alert_operator"] = info["operator"]
flight["alert_type"] = info["ac_type"]
flight["alert_tags"] = info["tags"]
flight["alert_link"] = info["link"]
if info.get("wiki"):
flight["alert_wiki"] = info["wiki"]
if info.get("potus_fleet"):
flight["potus_fleet"] = info["potus_fleet"]
if info["registration"]:
flight["registration"] = info["registration"]
return flight
_TRACKED_NAMES_DB: dict = {}
def _load_tracked_names():
global _TRACKED_NAMES_DB
json_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"data", "tracked_names.json"
)
if not os.path.exists(json_path):
return
try:
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
for name, info in data.get("details", {}).items():
cat = info.get("category", "Other")
for reg in info.get("registrations", []):
reg_clean = reg.strip().upper()
if reg_clean:
_TRACKED_NAMES_DB[reg_clean] = {"name": name, "category": cat}
logger.info(f"Tracked Names DB loaded: {len(_TRACKED_NAMES_DB)} registrations")
except (IOError, OSError, json.JSONDecodeError, ValueError, KeyError) as e:
logger.error(f"Failed to load Tracked Names DB: {e}")
_load_tracked_names()
def enrich_with_tracked_names(flight: dict) -> dict:
"""If flight's registration matches our Excel extraction, tag it as tracked."""
icao = flight.get("icao24", "").strip().upper()
if icao in _POTUS_FLEET:
return flight
reg = flight.get("registration", "").strip().upper()
callsign = flight.get("callsign", "").strip().upper()
match = None
if reg and reg in _TRACKED_NAMES_DB:
match = _TRACKED_NAMES_DB[reg]
elif callsign and callsign in _TRACKED_NAMES_DB:
match = _TRACKED_NAMES_DB[callsign]
if match:
name = match["name"]
flight["alert_operator"] = name
flight["alert_category"] = match["category"]
name_lower = name.lower()
is_gov = any(w in name_lower for w in ['state of ', 'government', 'republic', 'ministry', 'department', 'federal', 'cia'])
is_law = any(w in name_lower for w in ['police', 'marshal', 'sheriff', 'douane', 'customs', 'patrol', 'gendarmerie', 'guardia', 'law enforcement'])
is_med = any(w in name_lower for w in ['fire', 'bomberos', 'ambulance', 'paramedic', 'medevac', 'rescue', 'hospital', 'medical', 'lifeflight'])
if is_gov or is_law:
flight["alert_color"] = "blue"
elif is_med:
flight["alert_color"] = "#32cd32"
elif "alert_color" not in flight:
flight["alert_color"] = "pink"
return flight
-49
View File
@@ -1,49 +0,0 @@
"""Retry decorator with exponential backoff + jitter for network-bound fetcher functions.
Usage:
@with_retry(max_retries=3, base_delay=2)
def fetch_something():
...
"""
import time
import random
import logging
import functools
logger = logging.getLogger(__name__)
def with_retry(max_retries: int = 3, base_delay: float = 2.0, max_delay: float = 30.0):
"""Decorator: retries the wrapped function on any exception with exponential backoff + jitter.
Args:
max_retries: Number of retry attempts after the initial failure.
base_delay: Base delay (seconds) for exponential backoff (2 4 8 ).
max_delay: Cap on the delay between retries.
"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
last_exc = None
for attempt in range(1 + max_retries):
try:
return func(*args, **kwargs)
except Exception as exc:
last_exc = exc
if attempt < max_retries:
delay = min(base_delay * (2 ** attempt), max_delay)
jitter = random.uniform(0, delay * 0.25)
total = delay + jitter
logger.warning(
"%s failed (attempt %d/%d): %s — retrying in %.1fs",
func.__name__, attempt + 1, max_retries + 1, exc, total,
)
time.sleep(total)
else:
logger.error(
"%s failed after %d attempts: %s",
func.__name__, max_retries + 1, exc,
)
raise last_exc # type: ignore[misc]
return wrapper
return decorator
-394
View File
@@ -1,394 +0,0 @@
"""Satellite tracking — CelesTrak/TLE fetch, SGP4 propagation, intel classification.
CelesTrak Fair Use Policy (https://celestrak.org/NORAD/elements/):
- Do NOT request the same data more than once every 24 hours
- Use If-Modified-Since headers for conditional requests
- No parallel/concurrent connections one request at a time
- Set a descriptive User-Agent
"""
import math
import time
import json
import re
import logging
import requests
from pathlib import Path
from datetime import datetime, timedelta
from sgp4.api import Satrec, WGS72, jday
from services.network_utils import fetch_with_curl
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
logger = logging.getLogger("services.data_fetcher")
def _gmst(jd_ut1):
"""Greenwich Mean Sidereal Time in radians from Julian Date."""
t = (jd_ut1 - 2451545.0) / 36525.0
gmst_sec = 67310.54841 + (876600.0 * 3600 + 8640184.812866) * t + 0.093104 * t * t - 6.2e-6 * t * t * t
gmst_rad = (gmst_sec % 86400) / 86400.0 * 2 * math.pi
return gmst_rad
# Satellite GP data cache
# CelesTrak fair use: fetch at most once per 24 hours (86400s).
# SGP4 propagation runs every 60s using cached TLEs — positions stay live.
_CELESTRAK_FETCH_INTERVAL = 86400 # 24 hours
_sat_gp_cache = {"data": None, "last_fetch": 0, "source": "none", "last_modified": None}
_sat_classified_cache = {"data": None, "gp_fetch_ts": 0}
_SAT_CACHE_PATH = Path(__file__).parent.parent.parent / "data" / "sat_gp_cache.json"
_SAT_CACHE_META_PATH = Path(__file__).parent.parent.parent / "data" / "sat_gp_cache_meta.json"
def _load_sat_cache():
"""Load satellite GP data from local disk cache."""
try:
if _SAT_CACHE_PATH.exists():
import os
age_hours = (time.time() - os.path.getmtime(str(_SAT_CACHE_PATH))) / 3600
if age_hours < 48:
with open(_SAT_CACHE_PATH, "r") as f:
data = json.load(f)
if isinstance(data, list) and len(data) > 10:
logger.info(f"Satellites: Loaded {len(data)} records from disk cache ({age_hours:.1f}h old)")
# Restore last_modified from metadata
_load_cache_meta()
return data
else:
logger.info(f"Satellites: Disk cache is {age_hours:.0f}h old, will try fresh fetch")
except (IOError, OSError, json.JSONDecodeError, ValueError, KeyError) as e:
logger.warning(f"Satellites: Failed to load disk cache: {e}")
return None
def _save_sat_cache(data):
"""Save satellite GP data to local disk cache."""
try:
_SAT_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(_SAT_CACHE_PATH, "w") as f:
json.dump(data, f)
_save_cache_meta()
logger.info(f"Satellites: Saved {len(data)} records to disk cache")
except (IOError, OSError) as e:
logger.warning(f"Satellites: Failed to save disk cache: {e}")
def _load_cache_meta():
"""Load cache metadata (Last-Modified timestamp) from disk."""
try:
if _SAT_CACHE_META_PATH.exists():
with open(_SAT_CACHE_META_PATH, "r") as f:
meta = json.load(f)
_sat_gp_cache["last_modified"] = meta.get("last_modified")
except (IOError, OSError, json.JSONDecodeError, ValueError, KeyError):
pass
def _save_cache_meta():
"""Save cache metadata to disk."""
try:
with open(_SAT_CACHE_META_PATH, "w") as f:
json.dump({"last_modified": _sat_gp_cache.get("last_modified")}, f)
except (IOError, OSError):
pass
# Satellite intelligence classification database
_SAT_INTEL_DB = [
("USA 224", {"country": "USA", "mission": "military_recon", "sat_type": "KH-11 Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/KH-11_KENNEN"}),
("USA 245", {"country": "USA", "mission": "military_recon", "sat_type": "KH-11 Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/KH-11_KENNEN"}),
("USA 290", {"country": "USA", "mission": "military_recon", "sat_type": "KH-11 Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/KH-11_KENNEN"}),
("USA 314", {"country": "USA", "mission": "military_recon", "sat_type": "KH-11 Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/KH-11_KENNEN"}),
("USA 338", {"country": "USA", "mission": "military_recon", "sat_type": "Keyhole Successor", "wiki": "https://en.wikipedia.org/wiki/KH-11_KENNEN"}),
("TOPAZ", {"country": "Russia", "mission": "military_recon", "sat_type": "Optical Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/Persona_(satellite)"}),
("PERSONA", {"country": "Russia", "mission": "military_recon", "sat_type": "Optical Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/Persona_(satellite)"}),
("KONDOR", {"country": "Russia", "mission": "military_sar", "sat_type": "SAR Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/Kondor_(satellite)"}),
("BARS-M", {"country": "Russia", "mission": "military_recon", "sat_type": "Mapping Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/Bars-M"}),
("YAOGAN", {"country": "China", "mission": "military_recon", "sat_type": "Remote Sensing / ELINT", "wiki": "https://en.wikipedia.org/wiki/Yaogan"}),
("GAOFEN", {"country": "China", "mission": "military_recon", "sat_type": "High-Res Imaging", "wiki": "https://en.wikipedia.org/wiki/Gaofen"}),
("JILIN", {"country": "China", "mission": "commercial_imaging", "sat_type": "Video / Imaging", "wiki": "https://en.wikipedia.org/wiki/Jilin-1"}),
("OFEK", {"country": "Israel", "mission": "military_recon", "sat_type": "Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/Ofeq"}),
("CSO", {"country": "France", "mission": "military_recon", "sat_type": "Optical Reconnaissance", "wiki": "https://en.wikipedia.org/wiki/CSO_(satellite)"}),
("IGS", {"country": "Japan", "mission": "military_recon", "sat_type": "Intelligence Gathering", "wiki": "https://en.wikipedia.org/wiki/Information_Gathering_Satellite"}),
("CAPELLA", {"country": "USA", "mission": "sar", "sat_type": "SAR Imaging", "wiki": "https://en.wikipedia.org/wiki/Capella_Space"}),
("ICEYE", {"country": "Finland", "mission": "sar", "sat_type": "SAR Microsatellite", "wiki": "https://en.wikipedia.org/wiki/ICEYE"}),
("COSMO-SKYMED", {"country": "Italy", "mission": "sar", "sat_type": "SAR Constellation", "wiki": "https://en.wikipedia.org/wiki/COSMO-SkyMed"}),
("TANDEM", {"country": "Germany", "mission": "sar", "sat_type": "SAR Interferometry", "wiki": "https://en.wikipedia.org/wiki/TanDEM-X"}),
("PAZ", {"country": "Spain", "mission": "sar", "sat_type": "SAR Imaging", "wiki": "https://en.wikipedia.org/wiki/PAZ_(satellite)"}),
("WORLDVIEW", {"country": "USA", "mission": "commercial_imaging", "sat_type": "Maxar High-Res", "wiki": "https://en.wikipedia.org/wiki/WorldView-3"}),
("GEOEYE", {"country": "USA", "mission": "commercial_imaging", "sat_type": "Maxar Imaging", "wiki": "https://en.wikipedia.org/wiki/GeoEye-1"}),
("PLEIADES", {"country": "France", "mission": "commercial_imaging", "sat_type": "Airbus Imaging", "wiki": "https://en.wikipedia.org/wiki/Pl%C3%A9iades_(satellite)"}),
("SPOT", {"country": "France", "mission": "commercial_imaging", "sat_type": "Airbus Medium-Res", "wiki": "https://en.wikipedia.org/wiki/SPOT_(satellite)"}),
("PLANET", {"country": "USA", "mission": "commercial_imaging", "sat_type": "PlanetScope", "wiki": "https://en.wikipedia.org/wiki/Planet_Labs"}),
("SKYSAT", {"country": "USA", "mission": "commercial_imaging", "sat_type": "Planet Video", "wiki": "https://en.wikipedia.org/wiki/SkySat"}),
("BLACKSKY", {"country": "USA", "mission": "commercial_imaging", "sat_type": "BlackSky Imaging", "wiki": "https://en.wikipedia.org/wiki/BlackSky"}),
("NROL", {"country": "USA", "mission": "sigint", "sat_type": "Classified NRO", "wiki": "https://en.wikipedia.org/wiki/National_Reconnaissance_Office"}),
("MENTOR", {"country": "USA", "mission": "sigint", "sat_type": "SIGINT / ELINT", "wiki": "https://en.wikipedia.org/wiki/Mentor_(satellite)"}),
("LUCH", {"country": "Russia", "mission": "sigint", "sat_type": "Relay / SIGINT", "wiki": "https://en.wikipedia.org/wiki/Luch_(satellite)"}),
("SHIJIAN", {"country": "China", "mission": "sigint", "sat_type": "ELINT / Tech Demo", "wiki": "https://en.wikipedia.org/wiki/Shijian"}),
("NAVSTAR", {"country": "USA", "mission": "navigation", "sat_type": "GPS", "wiki": "https://en.wikipedia.org/wiki/GPS_satellite_blocks"}),
("GLONASS", {"country": "Russia", "mission": "navigation", "sat_type": "GLONASS", "wiki": "https://en.wikipedia.org/wiki/GLONASS"}),
("BEIDOU", {"country": "China", "mission": "navigation", "sat_type": "BeiDou", "wiki": "https://en.wikipedia.org/wiki/BeiDou"}),
("GALILEO", {"country": "EU", "mission": "navigation", "sat_type": "Galileo", "wiki": "https://en.wikipedia.org/wiki/Galileo_(satellite_navigation)"}),
("SBIRS", {"country": "USA", "mission": "early_warning", "sat_type": "Missile Warning", "wiki": "https://en.wikipedia.org/wiki/Space-Based_Infrared_System"}),
("TUNDRA", {"country": "Russia", "mission": "early_warning", "sat_type": "Missile Warning", "wiki": "https://en.wikipedia.org/wiki/Tundra_(satellite)"}),
("ISS", {"country": "Intl", "mission": "space_station", "sat_type": "Space Station", "wiki": "https://en.wikipedia.org/wiki/International_Space_Station"}),
("TIANGONG", {"country": "China", "mission": "space_station", "sat_type": "Space Station", "wiki": "https://en.wikipedia.org/wiki/Tiangong_space_station"}),
]
def _parse_tle_to_gp(name, norad_id, line1, line2):
"""Convert TLE two-line element to CelesTrak GP-style dict."""
try:
incl = float(line2[8:16].strip())
raan = float(line2[17:25].strip())
ecc = float("0." + line2[26:33].strip())
argp = float(line2[34:42].strip())
ma = float(line2[43:51].strip())
mm = float(line2[52:63].strip())
bstar_str = line1[53:61].strip()
if bstar_str:
mantissa = float(bstar_str[:-2]) / 1e5
exponent = int(bstar_str[-2:])
bstar = mantissa * (10 ** exponent)
else:
bstar = 0.0
epoch_yr = int(line1[18:20])
epoch_day = float(line1[20:32].strip())
year = 2000 + epoch_yr if epoch_yr < 57 else 1900 + epoch_yr
epoch_dt = datetime(year, 1, 1) + timedelta(days=epoch_day - 1)
return {
"OBJECT_NAME": name,
"NORAD_CAT_ID": norad_id,
"MEAN_MOTION": mm,
"ECCENTRICITY": ecc,
"INCLINATION": incl,
"RA_OF_ASC_NODE": raan,
"ARG_OF_PERICENTER": argp,
"MEAN_ANOMALY": ma,
"BSTAR": bstar,
"EPOCH": epoch_dt.strftime("%Y-%m-%dT%H:%M:%S"),
}
except (ValueError, TypeError, IndexError, KeyError):
return None
def _fetch_satellites_from_tle_api():
"""Fallback: fetch satellite TLEs from tle.ivanstanojevic.me when CelesTrak is blocked."""
search_terms = set()
for key, _ in _SAT_INTEL_DB:
term = key.split()[0] if len(key.split()) > 1 and key.split()[0] in ("USA", "NROL") else key
search_terms.add(term)
all_results = []
seen_ids = set()
for term in search_terms:
try:
url = f"https://tle.ivanstanojevic.me/api/tle/?search={term}&page_size=100&format=json"
response = fetch_with_curl(url, timeout=8)
if response.status_code != 200:
continue
data = response.json()
for member in data.get("member", []):
gp = _parse_tle_to_gp(
member.get("name", "UNKNOWN"),
member.get("satelliteId"),
member.get("line1", ""),
member.get("line2", ""),
)
if gp:
sat_id = gp.get("NORAD_CAT_ID")
if sat_id not in seen_ids:
seen_ids.add(sat_id)
all_results.append(gp)
time.sleep(1) # Polite delay between requests
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.debug(f"TLE fallback search '{term}' failed: {e}")
return all_results
def fetch_satellites():
sats = []
try:
now_ts = time.time()
if _sat_gp_cache["data"] is None or (now_ts - _sat_gp_cache["last_fetch"]) > _CELESTRAK_FETCH_INTERVAL:
gp_urls = [
"https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
"https://celestrak.com/NORAD/elements/gp.php?GROUP=active&FORMAT=json",
]
# Build conditional request headers (CelesTrak fair use)
headers = {}
if _sat_gp_cache.get("last_modified"):
headers["If-Modified-Since"] = _sat_gp_cache["last_modified"]
for url in gp_urls:
try:
response = fetch_with_curl(url, timeout=15, headers=headers)
if response.status_code == 304:
# Data unchanged — reset timer without re-downloading
_sat_gp_cache["last_fetch"] = now_ts
logger.info(f"Satellites: CelesTrak returned 304 Not Modified (data unchanged)")
break
if response.status_code == 200:
gp_data = response.json()
if isinstance(gp_data, list) and len(gp_data) > 100:
_sat_gp_cache["data"] = gp_data
_sat_gp_cache["last_fetch"] = now_ts
_sat_gp_cache["source"] = "celestrak"
# Store Last-Modified header for future conditional requests
if hasattr(response, 'headers'):
lm = response.headers.get("Last-Modified")
if lm:
_sat_gp_cache["last_modified"] = lm
_save_sat_cache(gp_data)
logger.info(f"Satellites: Downloaded {len(gp_data)} GP records from CelesTrak")
break
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.warning(f"Satellites: Failed to fetch from {url}: {e}")
continue
if _sat_gp_cache["data"] is None:
logger.info("Satellites: CelesTrak unreachable, trying TLE fallback API...")
try:
fallback_data = _fetch_satellites_from_tle_api()
if fallback_data and len(fallback_data) > 10:
_sat_gp_cache["data"] = fallback_data
_sat_gp_cache["last_fetch"] = now_ts
_sat_gp_cache["source"] = "tle_api"
_save_sat_cache(fallback_data)
logger.info(f"Satellites: Got {len(fallback_data)} records from TLE fallback API")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.error(f"Satellites: TLE fallback also failed: {e}")
if _sat_gp_cache["data"] is None:
disk_data = _load_sat_cache()
if disk_data:
_sat_gp_cache["data"] = disk_data
_sat_gp_cache["last_fetch"] = now_ts - (_CELESTRAK_FETCH_INTERVAL - 300)
_sat_gp_cache["source"] = "disk_cache"
data = _sat_gp_cache["data"]
if not data:
logger.warning("No satellite GP data available from any source")
with _data_lock:
latest_data["satellites"] = sats
return
if _sat_classified_cache["gp_fetch_ts"] == _sat_gp_cache["last_fetch"] and _sat_classified_cache["data"]:
classified = _sat_classified_cache["data"]
logger.info(f"Satellites: Using cached classification ({len(classified)} sats, TLEs unchanged)")
else:
classified = []
for sat in data:
name = sat.get("OBJECT_NAME", "UNKNOWN").upper()
intel = None
for key, meta in _SAT_INTEL_DB:
if key.upper() in name:
intel = dict(meta)
break
if not intel:
continue
entry = {
"id": sat.get("NORAD_CAT_ID"),
"name": sat.get("OBJECT_NAME", "UNKNOWN"),
"MEAN_MOTION": sat.get("MEAN_MOTION"),
"ECCENTRICITY": sat.get("ECCENTRICITY"),
"INCLINATION": sat.get("INCLINATION"),
"RA_OF_ASC_NODE": sat.get("RA_OF_ASC_NODE"),
"ARG_OF_PERICENTER": sat.get("ARG_OF_PERICENTER"),
"MEAN_ANOMALY": sat.get("MEAN_ANOMALY"),
"BSTAR": sat.get("BSTAR"),
"EPOCH": sat.get("EPOCH"),
}
entry.update(intel)
classified.append(entry)
_sat_classified_cache["data"] = classified
_sat_classified_cache["gp_fetch_ts"] = _sat_gp_cache["last_fetch"]
logger.info(f"Satellites: {len(classified)} intel-classified out of {len(data)} total in catalog")
all_sats = classified
now = datetime.utcnow()
jd, fr = jday(now.year, now.month, now.day, now.hour, now.minute, now.second + now.microsecond / 1e6)
for s in all_sats:
try:
mean_motion = s.get('MEAN_MOTION')
ecc = s.get('ECCENTRICITY')
incl = s.get('INCLINATION')
raan = s.get('RA_OF_ASC_NODE')
argp = s.get('ARG_OF_PERICENTER')
ma = s.get('MEAN_ANOMALY')
bstar = s.get('BSTAR', 0)
epoch_str = s.get('EPOCH')
norad_id = s.get('id', 0)
if mean_motion is None or ecc is None or incl is None:
continue
epoch_dt = datetime.strptime(epoch_str[:19], '%Y-%m-%dT%H:%M:%S')
epoch_jd, epoch_fr = jday(epoch_dt.year, epoch_dt.month, epoch_dt.day,
epoch_dt.hour, epoch_dt.minute, epoch_dt.second)
sat_obj = Satrec()
sat_obj.sgp4init(
WGS72, 'i', norad_id,
(epoch_jd + epoch_fr) - 2433281.5,
bstar, 0.0, 0.0, ecc,
math.radians(argp), math.radians(incl),
math.radians(ma),
mean_motion * 2 * math.pi / 1440.0,
math.radians(raan)
)
e, r, v = sat_obj.sgp4(jd, fr)
if e != 0:
continue
x, y, z = r
gmst = _gmst(jd + fr)
lng_rad = math.atan2(y, x) - gmst
lat_rad = math.atan2(z, math.sqrt(x*x + y*y))
alt_km = math.sqrt(x*x + y*y + z*z) - 6371.0
s['lat'] = round(math.degrees(lat_rad), 4)
lng_deg = math.degrees(lng_rad) % 360
s['lng'] = round(lng_deg - 360 if lng_deg > 180 else lng_deg, 4)
s['alt_km'] = round(alt_km, 1)
vx, vy, vz = v
omega_e = 7.2921159e-5
vx_g = vx + omega_e * y
vy_g = vy - omega_e * x
vz_g = vz
cos_lat = math.cos(lat_rad)
sin_lat = math.sin(lat_rad)
cos_lng = math.cos(lng_rad + gmst)
sin_lng = math.sin(lng_rad + gmst)
v_east = -sin_lng * vx_g + cos_lng * vy_g
v_north = -sin_lat * cos_lng * vx_g - sin_lat * sin_lng * vy_g + cos_lat * vz_g
ground_speed_kms = math.sqrt(v_east**2 + v_north**2)
s['speed_knots'] = round(ground_speed_kms * 1943.84, 1)
heading_rad = math.atan2(v_east, v_north)
s['heading'] = round(math.degrees(heading_rad) % 360, 1)
sat_name = s.get('name', '')
usa_match = re.search(r'USA[\s\-]*(\d+)', sat_name)
if usa_match:
s['wiki'] = f"https://en.wikipedia.org/wiki/USA-{usa_match.group(1)}"
for k in ('MEAN_MOTION', 'ECCENTRICITY', 'INCLINATION',
'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
'BSTAR', 'EPOCH', 'tle1', 'tle2'):
s.pop(k, None)
sats.append(s)
except (ValueError, TypeError, KeyError, AttributeError, ZeroDivisionError):
continue
logger.info(f"Satellites: {len(classified)} classified, {len(sats)} positioned")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, json.JSONDecodeError, OSError) as e:
logger.error(f"Error fetching satellites: {e}")
if sats:
with _data_lock:
latest_data["satellites"] = sats
latest_data["satellite_source"] = _sat_gp_cache.get("source", "none")
_mark_fresh("satellites")
else:
with _data_lock:
if not latest_data.get("satellites"):
latest_data["satellites"] = []
latest_data["satellite_source"] = "none"
-62
View File
@@ -1,62 +0,0 @@
"""Yacht-Alert DB — load and enrich AIS vessels with tracked yacht metadata."""
import os
import json
import logging
logger = logging.getLogger("services.data_fetcher")
# Category -> color mapping
_CATEGORY_COLOR: dict[str, str] = {
"Tech Billionaire": "#FF69B4",
"Celebrity / Mogul": "#FF69B4",
"Oligarch Watch": "#FF2020",
}
def _category_to_color(cat: str) -> str:
"""Map category to display color. Defaults to hot pink."""
return _CATEGORY_COLOR.get(cat, "#FF69B4")
_YACHT_ALERT_DB: dict = {}
def _load_yacht_alert_db():
"""Load yacht_alert_db.json into memory at import time."""
global _YACHT_ALERT_DB
json_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"data", "yacht_alert_db.json"
)
if not os.path.exists(json_path):
logger.warning(f"Yacht-Alert DB not found at {json_path}")
return
try:
with open(json_path, "r", encoding="utf-8") as fh:
raw = json.load(fh)
for mmsi_str, info in raw.items():
info["color"] = _category_to_color(info.get("category", ""))
_YACHT_ALERT_DB[mmsi_str] = info
logger.info(f"Yacht-Alert DB loaded: {len(_YACHT_ALERT_DB)} vessels")
except (IOError, OSError, json.JSONDecodeError, ValueError, KeyError) as e:
logger.error(f"Failed to load Yacht-Alert DB: {e}")
_load_yacht_alert_db()
def enrich_with_yacht_alert(ship: dict) -> dict:
"""If ship's MMSI is in the Yacht-Alert DB, attach owner/alert metadata."""
mmsi = str(ship.get("mmsi", "")).strip()
if mmsi and mmsi in _YACHT_ALERT_DB:
info = _YACHT_ALERT_DB[mmsi]
ship["yacht_alert"] = True
ship["yacht_owner"] = info["owner"]
ship["yacht_name"] = info["name"]
ship["yacht_category"] = info["category"]
ship["yacht_color"] = info["color"]
ship["yacht_builder"] = info.get("builder", "")
ship["yacht_length"] = info.get("length_m", 0)
ship["yacht_year"] = info.get("year", 0)
ship["yacht_link"] = info.get("link", "")
return ship
+47 -219
View File
@@ -1,6 +1,5 @@
import requests
import logging
import zipfile
from cachetools import cached, TTLCache
from datetime import datetime
from services.network_utils import fetch_with_curl
@@ -66,7 +65,7 @@ def fetch_ukraine_frontlines():
logger.error(f"Failed to fetch parsed Github Raw GeoJSON: {res_geo.status_code}")
else:
logger.error(f"Failed to fetch Github Tree for Deepstatemap: {res_tree.status_code}")
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
except Exception as e:
logger.error(f"Error fetching DeepStateMap: {e}")
return None
@@ -82,15 +81,13 @@ def _extract_domain(url):
if host.startswith('www.'):
host = host[4:]
return host
except (ValueError, AttributeError, KeyError): # non-critical
except Exception:
return url[:40]
def _url_to_headline(url):
"""Extract a human-readable headline from a URL path.
e.g. 'https://nytimes.com/2026/03/us-strikes-iran-nuclear-sites.html' -> 'Us Strikes Iran Nuclear Sites'
Falls back to domain name if the URL slug is gibberish (hex IDs, UUIDs, etc.).
e.g. 'https://nytimes.com/2026/03/us-strikes-iran-nuclear-sites.html' -> 'Us Strikes Iran Nuclear Sites (nytimes.com)'
"""
import re
try:
from urllib.parse import urlparse, unquote
parsed = urlparse(url)
@@ -103,151 +100,43 @@ def _url_to_headline(url):
if not path:
return domain
# Try the last path segment first, then walk backwards
segments = [s for s in path.split('/') if s]
slug = ''
for seg in reversed(segments):
# Remove file extensions
for ext in ['.html', '.htm', '.php', '.asp', '.aspx', '.shtml']:
if seg.lower().endswith(ext):
seg = seg[:-len(ext)]
# Skip segments that are clearly not headlines
if _is_gibberish(seg):
continue
slug = seg
break
if not slug:
return domain
# Take the last path segment (usually the slug)
slug = path.split('/')[-1]
# Remove file extensions
for ext in ['.html', '.htm', '.php', '.asp', '.aspx', '.shtml']:
if slug.lower().endswith(ext):
slug = slug[:-len(ext)]
# If slug is purely numeric or a short ID, try the second-to-last segment
import re
if re.match(r'^[a-z]?\d{5,}$', slug, re.IGNORECASE):
segments = path.split('/')
if len(segments) >= 2:
slug = segments[-2]
for ext in ['.html', '.htm', '.php']:
if slug.lower().endswith(ext):
slug = slug[:-len(ext)]
# Remove common ID patterns at start/end
slug = re.sub(r'^[\d]+-', '', slug) # leading "13847569-"
slug = re.sub(r'-[\da-f]{6,}$', '', slug) # trailing hex IDs
slug = re.sub(r'[-_]c-\d+$', '', slug) # trailing "-c-21803431"
slug = re.sub(r'^p=\d+$', '', slug) # WordPress ?p=1234
slug = re.sub(r'^[\d]+-', '', slug) # leading numbers like "13847569-"
slug = re.sub(r'-[\da-f]{6,}$', '', slug) # trailing hex IDs
slug = re.sub(r'[-_]c-\d+$', '', slug) # trailing "-c-21803431"
slug = re.sub(r'^p=\d+$', '', slug) # WordPress ?p=1234
# Convert slug separators to spaces
slug = slug.replace('-', ' ').replace('_', ' ')
# Clean up multiple spaces
slug = re.sub(r'\s+', ' ', slug).strip()
# Final gibberish check after cleanup
if len(slug) < 8 or _is_gibberish(slug.replace(' ', '-')):
# If slug is still just a number or too short, fall back to domain
if len(slug) < 5 or re.match(r'^\d+$', slug):
return domain
# Title case and truncate
headline = slug.title()
if len(headline) > 90:
headline = headline[:87] + '...'
return headline
except (ValueError, AttributeError, KeyError): # non-critical
if len(headline) > 80:
headline = headline[:77] + '...'
return f"{headline} ({domain})"
except Exception:
return url[:60]
def _is_gibberish(text):
"""Detect if a URL segment is gibberish (hex IDs, UUIDs, numeric IDs, etc.)
rather than a real human-readable slug like 'us-strikes-iran'."""
import re
t = text.strip()
if not t:
return True
# Pure numbers
if re.match(r'^\d+$', t):
return True
# UUID pattern (with or without dashes)
if re.match(r'^[0-9a-f]{8}[_-]?[0-9a-f]{4}[_-]?[0-9a-f]{4}[_-]?[0-9a-f]{4}[_-]?[0-9a-f]{12}$', t, re.I):
return True
# Hex-heavy string: more than 40% hex digits among alphanumeric chars
alnum = re.sub(r'[^a-zA-Z0-9]', '', t)
if alnum:
hex_chars = sum(1 for c in alnum if c in '0123456789abcdefABCDEF')
if hex_chars / len(alnum) > 0.4 and len(alnum) > 6:
return True
# Mostly digits with a few alpha (like "article8efa6c53")
digits = sum(1 for c in alnum if c.isdigit())
if alnum and digits / len(alnum) > 0.5:
return True
# Too short to be a headline slug
if len(t) < 5:
return True
# Query-param style segments
if '=' in t:
return True
return False
# Persistent cache for article titles — survives across GDELT cache refreshes
_article_title_cache = {}
def _fetch_article_title(url):
"""Fetch the real headline from an article's HTML <title> or og:title tag.
Returns the title string, or None if it can't be fetched.
Uses a persistent cache to avoid refetching."""
if url in _article_title_cache:
return _article_title_cache[url]
import re
try:
# Only read the first 32KB — the <title> is always in <head>
resp = requests.get(url, timeout=4, headers={
'User-Agent': 'Mozilla/5.0 (compatible; OSINT Dashboard/1.0)'
}, stream=True)
if resp.status_code != 200:
_article_title_cache[url] = None
return None
chunk = resp.raw.read(32768).decode('utf-8', errors='replace')
resp.close()
title = None
# Try og:title first (usually the cleanest)
og_match = re.search(r'<meta[^>]+property=["\']og:title["\'][^>]+content=["\']([^"\'>]+)["\']', chunk, re.I)
if not og_match:
og_match = re.search(r'<meta[^>]+content=["\']([^"\'>]+)["\'][^>]+property=["\']og:title["\']', chunk, re.I)
if og_match:
title = og_match.group(1).strip()
# Fall back to <title> tag
if not title:
title_match = re.search(r'<title[^>]*>([^<]+)</title>', chunk, re.I)
if title_match:
title = title_match.group(1).strip()
if title:
# Clean up HTML entities
import html as html_mod
title = html_mod.unescape(title)
# Remove site name suffixes like " | CNN" or " - BBC News"
title = re.sub(r'\s*[|\-–—]\s*[^|\-–—]{2,30}$', '', title).strip()
# Truncate very long titles
if len(title) > 120:
title = title[:117] + '...'
if len(title) > 10:
_article_title_cache[url] = title
return title
_article_title_cache[url] = None
return None
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, AttributeError): # non-critical
_article_title_cache[url] = None
return None
def _batch_fetch_titles(urls):
"""Fetch real article titles for a list of URLs in parallel.
Returns a dict of url -> title (or None if fetch failed)."""
from concurrent.futures import ThreadPoolExecutor
results = {}
with ThreadPoolExecutor(max_workers=16) as executor:
futures = {executor.submit(_fetch_article_title, u): u for u in urls}
for future in futures:
url = futures[future]
try:
results[url] = future.result()
except Exception: # non-critical: optional title enrichment
results[url] = None
return results
def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_index):
"""Parse a single GDELT export ZIP and append conflict features.
loc_index maps loc_key -> index in features list for fast duplicate merging.
@@ -309,7 +198,7 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
})
except (ValueError, IndexError):
continue
except (IOError, OSError, ValueError, KeyError, zipfile.BadZipFile) as e:
except Exception as e:
logger.warning(f"Failed to parse GDELT export zip: {e}")
def _download_gdelt_export(url):
@@ -318,72 +207,16 @@ def _download_gdelt_export(url):
res = fetch_with_curl(url, timeout=15)
if res.status_code == 200:
return res.content
except (ConnectionError, TimeoutError, OSError): # non-critical
except Exception:
pass
return None
def _build_feature_html(features, fetched_titles=None):
"""Build URL + headline arrays for frontend rendering.
Uses fetched_titles (real article titles) when available, falls back to URL slug parsing."""
import html as html_mod
for f in features:
urls = f["properties"].pop("_urls", [])
f["properties"].pop("_domains", None)
headlines = []
for u in urls:
real_title = fetched_titles.get(u) if fetched_titles else None
headlines.append(real_title if real_title else _url_to_headline(u))
f["properties"]["_urls_list"] = urls
f["properties"]["_headlines_list"] = headlines
if urls:
links = []
for u, h in zip(urls, headlines):
safe_url = u if u.startswith(('http://', 'https://')) else 'about:blank'
safe_h = html_mod.escape(h)
links.append(f'<div style="margin-bottom:6px;"><a href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_h}</a></div>')
f["properties"]["html"] = ''.join(links)
else:
f["properties"]["html"] = html_mod.escape(f["properties"]["name"])
f.pop("_loc_key", None)
def _enrich_gdelt_titles_background(features, all_article_urls):
"""Background thread: fetch real article titles then update features in-place."""
import html as html_mod
try:
logger.info(f"[BG] Fetching real article titles for {len(all_article_urls)} URLs...")
fetched_titles = _batch_fetch_titles(all_article_urls)
fetched_count = sum(1 for v in fetched_titles.values() if v)
logger.info(f"[BG] Resolved {fetched_count}/{len(all_article_urls)} article titles")
# Update features in-place with real titles
for f in features:
urls = f["properties"].get("_urls_list", [])
if not urls:
continue
headlines = []
for u in urls:
real_title = fetched_titles.get(u)
headlines.append(real_title if real_title else _url_to_headline(u))
f["properties"]["_headlines_list"] = headlines
links = []
for u, h in zip(urls, headlines):
safe_url = u if u.startswith(('http://', 'https://')) else 'about:blank'
safe_h = html_mod.escape(h)
links.append(f'<div style="margin-bottom:6px;"><a href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_h}</a></div>')
f["properties"]["html"] = ''.join(links)
logger.info(f"[BG] GDELT title enrichment complete")
except Exception as e:
logger.error(f"[BG] GDELT title enrichment failed: {e}")
@cached(gdelt_cache)
def fetch_global_military_incidents():
"""
Fetches global military/conflict incidents from GDELT Events Export files.
Aggregates the last ~8 hours of 15-minute exports to build ~1000 incidents.
Returns immediately with URL-slug headlines; enriches with real titles in background.
"""
import threading
from datetime import timedelta
from concurrent.futures import ThreadPoolExecutor
@@ -445,29 +278,24 @@ def fetch_global_military_incidents():
if zip_bytes:
_parse_gdelt_export_zip(zip_bytes, CONFLICT_CODES, seen_locs, features, loc_index)
# Collect all unique article URLs
all_article_urls = set()
# Build URL + headline arrays for frontend rendering
for f in features:
for u in f["properties"].get("_urls", []):
if u:
all_article_urls.add(u)
# Build HTML immediately with URL-slug headlines (instant, no network)
_build_feature_html(features)
logger.info(f"GDELT parsed: {len(features)} conflict locations from {successful} files (titles enriching in background)")
# Kick off background thread to enrich with real article titles
# Features list is shared — background thread updates in-place
t = threading.Thread(
target=_enrich_gdelt_titles_background,
args=(features, all_article_urls),
daemon=True,
)
t.start()
urls = f["properties"].pop("_urls", [])
f["properties"].pop("_domains", None)
headlines = [_url_to_headline(u) for u in urls]
f["properties"]["_urls_list"] = urls
f["properties"]["_headlines_list"] = headlines
# Keep html as fallback
if urls:
links = [f'<div style="margin-bottom:6px;"><a href="{u}" target="_blank">{h}</a></div>' for u, h in zip(urls, headlines)]
f["properties"]["html"] = ''.join(links)
else:
f["properties"]["html"] = f["properties"]["name"]
f.pop("_loc_key", None)
logger.info(f"GDELT multi-file parsed: {len(features)} conflict locations from {successful} files")
return features
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
except Exception as e:
logger.error(f"Error fetching GDELT data: {e}")
return []
-98
View File
@@ -1,98 +0,0 @@
"""
KiwiSDR public receiver list fetcher.
Scrapes the kiwisdr.com public page for active SDR receivers worldwide.
Data is embedded as HTML comments inside each entry div.
"""
import re
import logging
import requests
from cachetools import TTLCache, cached
logger = logging.getLogger(__name__)
kiwisdr_cache = TTLCache(maxsize=1, ttl=600) # 10-minute cache
def _parse_comment(html: str, field: str) -> str:
"""Extract a field value from HTML comment like <!-- field=value -->"""
m = re.search(rf'<!--\s*{field}=(.*?)\s*-->', html)
return m.group(1).strip() if m else ""
def _parse_gps(html: str):
"""Extract lat/lon from <!-- gps=(lat, lon) --> comment."""
m = re.search(r'<!--\s*gps=\(([^,]+),\s*([^)]+)\)\s*-->', html)
if m:
try:
return float(m.group(1)), float(m.group(2))
except ValueError:
return None, None
return None, None
@cached(kiwisdr_cache)
def fetch_kiwisdr_nodes() -> list[dict]:
"""Fetch and parse the KiwiSDR public receiver list."""
from services.network_utils import fetch_with_curl
try:
res = fetch_with_curl("http://kiwisdr.com/.public/", timeout=20)
if not res or res.status_code != 200:
logger.error(f"KiwiSDR fetch failed: HTTP {res.status_code if res else 'no response'}")
return []
html = res.text
# Split by entry divs
entries = re.findall(r"<div class='cl-entry[^']*'>(.*?)</div>\s*</div>", html, re.DOTALL)
nodes = []
for entry in entries:
lat, lon = _parse_gps(entry)
if lat is None or lon is None:
continue
if abs(lat) > 90 or abs(lon) > 180:
continue
offline = _parse_comment(entry, "offline")
if offline == "yes":
continue
name = _parse_comment(entry, "name") or "Unknown SDR"
users_str = _parse_comment(entry, "users")
users_max_str = _parse_comment(entry, "users_max")
bands = _parse_comment(entry, "bands")
antenna = _parse_comment(entry, "antenna")
location = _parse_comment(entry, "loc")
# Extract the URL from the href
url_match = re.search(r"href='(https?://[^']+)'", entry)
url = url_match.group(1) if url_match else ""
try:
users = int(users_str) if users_str else 0
except ValueError:
users = 0
try:
users_max = int(users_max_str) if users_max_str else 0
except ValueError:
users_max = 0
nodes.append({
"name": name[:120], # Truncate long names
"lat": round(lat, 5),
"lon": round(lon, 5),
"url": url,
"users": users,
"users_max": users_max,
"bands": bands,
"antenna": antenna[:200] if antenna else "",
"location": location[:100] if location else "",
})
logger.info(f"KiwiSDR: parsed {len(nodes)} online receivers")
return nodes
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
logger.error(f"KiwiSDR fetch exception: {e}")
return []
+5 -5
View File
@@ -23,7 +23,7 @@ def fetch_liveuamap():
with sync_playwright() as p:
# Launching with a real user agent to bypass Turnstile
browser = p.chromium.launch(headless=True, args=["--disable-blink-features=AutomationControlled"])
browser = p.chromium.launch(headless=False, args=["--disable-blink-features=AutomationControlled"])
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
viewport={"width": 1920, "height": 1080},
@@ -40,7 +40,7 @@ def fetch_liveuamap():
# Wait for the map canvas or markers script to load, max 10s wait
try:
page.wait_for_timeout(5000)
except (TimeoutError, OSError): # non-critical: page load delay
except:
pass
html = page.content()
@@ -56,8 +56,8 @@ def fetch_liveuamap():
# process below
html = f"var ovens={ovens_json};"
m = re.search(r"var\s+ovens=(.*?);", html, re.DOTALL)
except (ValueError, KeyError, OSError) as e: # non-critical: JS eval fallback
logger.debug(f"Could not evaluate ovens JS variable for {region['name']}: {e}")
except:
pass
if m:
json_str = m.group(1).strip()
@@ -81,7 +81,7 @@ def fetch_liveuamap():
"link": marker.get("link", region["url"]),
"region": region["name"]
})
except (json.JSONDecodeError, ValueError, KeyError) as e:
except Exception as e:
logger.error(f"Error parsing JSON for {region['name']}: {e}")
except Exception as e:
+33 -72
View File
@@ -3,21 +3,10 @@ import json
import subprocess
import shutil
import time
import threading
import requests
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
logger = logging.getLogger(__name__)
# Reusable session with connection pooling and retry logic.
# Only retry once (total=1) to fail fast — the curl fallback is the real safety net.
_session = requests.Session()
_retry = Retry(total=1, backoff_factor=0.3, status_forcelist=[502, 503, 504])
_session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
# Find bash for curl fallback — Git bash's curl has the TLS features
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
_BASH_PATH = shutil.which("bash") or "bash"
@@ -26,14 +15,6 @@ _BASH_PATH = shutil.which("bash") or "bash"
_domain_fail_cache: dict[str, float] = {}
_DOMAIN_FAIL_TTL = 300 # 5 minutes
# Circuit breaker: track domains where BOTH requests AND curl fail
# If a domain failed completely within the last 2 minutes, skip it entirely
_circuit_breaker: dict[str, float] = {}
_CIRCUIT_BREAKER_TTL = 120 # 2 minutes
# Lock protecting _domain_fail_cache and _circuit_breaker mutations
_cb_lock = threading.Lock()
class _DummyResponse:
"""Minimal response object matching requests.Response interface."""
def __init__(self, status_code, text):
@@ -64,66 +45,46 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None)
domain = urlparse(url).netloc
# Circuit breaker: if domain failed completely <2min ago, fail fast
with _cb_lock:
if domain in _circuit_breaker and (time.time() - _circuit_breaker[domain]) < _CIRCUIT_BREAKER_TTL:
raise Exception(f"Circuit breaker open for {domain} (failed <{_CIRCUIT_BREAKER_TTL}s ago)")
# Check if this domain recently failed with requests — skip straight to curl
with _cb_lock:
_skip_requests = domain in _domain_fail_cache and (time.time() - _domain_fail_cache[domain]) < _DOMAIN_FAIL_TTL
if not _skip_requests:
if domain in _domain_fail_cache and (time.time() - _domain_fail_cache[domain]) < _DOMAIN_FAIL_TTL:
pass # Fall through to curl below
else:
try:
# Use a short connect timeout (3s) so firewall blocks fail fast,
# but allow the full timeout for reading the response body.
req_timeout = (min(3, timeout), timeout)
import requests
if method == "POST":
res = _session.post(url, json=json_data, timeout=req_timeout, headers=default_headers)
res = requests.post(url, json=json_data, timeout=timeout, headers=default_headers)
else:
res = _session.get(url, timeout=req_timeout, headers=default_headers)
res = requests.get(url, timeout=timeout, headers=default_headers)
res.raise_for_status()
# Clear failure caches on success
with _cb_lock:
_domain_fail_cache.pop(domain, None)
_circuit_breaker.pop(domain, None)
# Clear failure cache on success
_domain_fail_cache.pop(domain, None)
return res
except (requests.RequestException, ConnectionError, TimeoutError, OSError) as e:
except Exception as e:
logger.warning(f"Python requests failed for {url} ({e}), falling back to bash curl...")
with _cb_lock:
_domain_fail_cache[domain] = time.time()
_domain_fail_cache[domain] = time.time()
# Curl fallback — reached from both _skip_requests and requests-exception paths
_CURL_PATH = shutil.which("curl") or "curl"
cmd = [_CURL_PATH, "-s", "-w", "\n%{http_code}"]
for k, v in default_headers.items():
cmd += ["-H", f"{k}: {v}"]
if method == "POST" and json_data:
cmd += ["-X", "POST", "-H", "Content-Type: application/json",
"--data-binary", "@-"]
cmd.append(url)
try:
stdin_data = json.dumps(json_data) if (method == "POST" and json_data) else None
res = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout + 5,
input=stdin_data
)
if res.returncode == 0 and res.stdout.strip():
# Parse HTTP status code from -w output (last line)
lines = res.stdout.rstrip().rsplit("\n", 1)
body = lines[0] if len(lines) > 1 else res.stdout
http_code = int(lines[-1]) if len(lines) > 1 and lines[-1].strip().isdigit() else 200
if http_code < 400:
with _cb_lock:
_circuit_breaker.pop(domain, None) # Clear circuit breaker on success
return _DummyResponse(http_code, body)
# Build curl command string for bash execution
header_flags = " ".join(f'-H "{k}: {v}"' for k, v in default_headers.items())
if method == "POST" and json_data:
payload = json.dumps(json_data).replace('"', '\\"')
curl_cmd = f'curl -s -w "\\n%{{http_code}}" {header_flags} -X POST -H "Content-Type: application/json" -d "{payload}" "{url}"'
else:
logger.error(f"bash curl fallback failed: exit={res.returncode} stderr={res.stderr[:200]}")
with _cb_lock:
_circuit_breaker[domain] = time.time()
curl_cmd = f'curl -s -w "\\n%{{http_code}}" {header_flags} "{url}"'
try:
res = subprocess.run(
[_BASH_PATH, "-c", curl_cmd],
capture_output=True, text=True, timeout=timeout + 5
)
if res.returncode == 0 and res.stdout.strip():
# Parse HTTP status code from -w output (last line)
lines = res.stdout.rstrip().rsplit("\n", 1)
body = lines[0] if len(lines) > 1 else res.stdout
http_code = int(lines[-1]) if len(lines) > 1 and lines[-1].strip().isdigit() else 200
return _DummyResponse(http_code, body)
else:
logger.error(f"bash curl fallback failed: exit={res.returncode} stderr={res.stderr[:200]}")
return _DummyResponse(500, "")
except Exception as curl_e:
logger.error(f"bash curl fallback exception: {curl_e}")
return _DummyResponse(500, "")
except (subprocess.SubprocessError, ConnectionError, TimeoutError, OSError) as curl_e:
logger.error(f"bash curl fallback exception: {curl_e}")
with _cb_lock:
_circuit_breaker[domain] = time.time()
return _DummyResponse(500, "")
-74
View File
@@ -1,74 +0,0 @@
"""
News feed configuration manages the user-customisable RSS feed list.
Feeds are stored in backend/config/news_feeds.json and persist across restarts.
"""
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
CONFIG_PATH = Path(__file__).parent.parent / "config" / "news_feeds.json"
MAX_FEEDS = 20
DEFAULT_FEEDS = [
{"name": "NPR", "url": "https://feeds.npr.org/1004/rss.xml", "weight": 4},
{"name": "BBC", "url": "http://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
{"name": "AlJazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml", "weight": 2},
{"name": "NYT", "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "weight": 1},
{"name": "GDACS", "url": "https://www.gdacs.org/xml/rss.xml", "weight": 5},
{"name": "NHK", "url": "https://www3.nhk.or.jp/nhkworld/rss/world.xml", "weight": 3},
{"name": "CNA", "url": "https://www.channelnewsasia.com/rssfeed/8395986", "weight": 3},
{"name": "Mercopress", "url": "https://en.mercopress.com/rss/", "weight": 3},
]
def get_feeds() -> list[dict]:
"""Load feeds from config file, falling back to defaults."""
try:
if CONFIG_PATH.exists():
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
feeds = data.get("feeds", []) if isinstance(data, dict) else data
if isinstance(feeds, list) and len(feeds) > 0:
return feeds
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
logger.warning(f"Failed to read news feed config: {e}")
return list(DEFAULT_FEEDS)
def save_feeds(feeds: list[dict]) -> bool:
"""Validate and save feeds to config file. Returns True on success."""
if not isinstance(feeds, list):
return False
if len(feeds) > MAX_FEEDS:
return False
# Validate each feed entry
for f in feeds:
if not isinstance(f, dict):
return False
name = f.get("name", "").strip()
url = f.get("url", "").strip()
weight = f.get("weight", 3)
if not name or not url:
return False
if not isinstance(weight, (int, float)) or weight < 1 or weight > 5:
return False
# Normalise
f["name"] = name
f["url"] = url
f["weight"] = int(weight)
try:
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
CONFIG_PATH.write_text(
json.dumps({"feeds": feeds}, indent=2, ensure_ascii=False),
encoding="utf-8",
)
return True
except (IOError, OSError) as e:
logger.error(f"Failed to write news feed config: {e}")
return False
def reset_feeds() -> bool:
"""Reset feeds to defaults."""
return save_feeds(list(DEFAULT_FEEDS))
+3 -3
View File
@@ -72,7 +72,7 @@ def get_top_broadcastify_feeds():
logger.info(f"Successfully scraped {len(feeds)} top feeds from Broadcastify.")
return feeds
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
except Exception as e:
logger.error(f"Broadcastify Scrape Exception: {e}")
return []
@@ -92,7 +92,7 @@ def get_openmhz_systems():
# Return list of systems
return data.get('systems', []) if isinstance(data, dict) else []
return []
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
except Exception as e:
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
return []
@@ -112,7 +112,7 @@ def get_recent_openmhz_calls(sys_name: str):
data = res.json()
return data.get('calls', []) if isinstance(data, dict) else []
return []
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
except Exception as e:
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
return []
+21 -43
View File
@@ -1,8 +1,6 @@
import logging
import time
import concurrent.futures
from urllib.parse import quote
import requests as _requests
from cachetools import TTLCache
from services.network_utils import fetch_with_curl
@@ -12,46 +10,26 @@ logger = logging.getLogger(__name__)
# Key: rounded lat/lng grid (0.1 degree ≈ 11km)
dossier_cache = TTLCache(maxsize=500, ttl=86400)
# Nominatim requires max 1 req/sec — track last call time
_nominatim_last_call = 0.0
def _reverse_geocode(lat: float, lng: float) -> dict:
global _nominatim_last_call
url = (
f"https://nominatim.openstreetmap.org/reverse?"
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
)
headers = {"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard; contact@shadowbroker.app)"}
for attempt in range(2):
# Enforce Nominatim's 1 req/sec policy
elapsed = time.time() - _nominatim_last_call
if elapsed < 1.1:
time.sleep(1.1 - elapsed)
_nominatim_last_call = time.time()
try:
# Use requests directly — fetch_with_curl raises on non-200 which breaks 429 handling
res = _requests.get(url, timeout=10, headers=headers)
if res.status_code == 200:
data = res.json()
addr = data.get("address", {})
return {
"city": addr.get("city") or addr.get("town") or addr.get("village") or addr.get("county") or "",
"state": addr.get("state") or addr.get("region") or "",
"country": addr.get("country") or "",
"country_code": (addr.get("country_code") or "").upper(),
"display_name": data.get("display_name", ""),
}
elif res.status_code == 429:
logger.warning(f"Nominatim 429 rate-limited, retrying after 2s (attempt {attempt+1})")
time.sleep(2)
continue
else:
logger.warning(f"Nominatim returned {res.status_code}")
except (_requests.RequestException, ConnectionError, TimeoutError, OSError) as e:
logger.warning(f"Reverse geocode failed: {e}")
try:
res = fetch_with_curl(url, timeout=10)
if res.status_code == 200:
data = res.json()
addr = data.get("address", {})
return {
"city": addr.get("city") or addr.get("town") or addr.get("village") or addr.get("county") or "",
"state": addr.get("state") or addr.get("region") or "",
"country": addr.get("country") or "",
"country_code": (addr.get("country_code") or "").upper(),
"display_name": data.get("display_name", ""),
}
except Exception as e:
logger.warning(f"Reverse geocode failed: {e}")
return {}
@@ -66,7 +44,7 @@ def _fetch_country_data(country_code: str) -> dict:
res = fetch_with_curl(url, timeout=10)
if res.status_code == 200:
return res.json()
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
except Exception as e:
logger.warning(f"RestCountries failed for {country_code}: {e}")
return {}
@@ -96,7 +74,7 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
"leader": r.get("leaderLabel", {}).get("value", "Unknown"),
"government_type": r.get("govTypeLabel", {}).get("value", "Unknown"),
}
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
except Exception as e:
logger.warning(f"Wikidata SPARQL failed for {country_name}: {e}")
return {"leader": "Unknown", "government_type": "Unknown"}
@@ -122,7 +100,7 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
"extract": data.get("extract", ""),
"thumbnail": data.get("thumbnail", {}).get("source", ""),
}
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError): # Intentional: optional enrichment
except Exception:
continue
return {}
@@ -158,22 +136,22 @@ def get_region_dossier(lat: float, lng: float) -> dict:
try:
country_data = country_fut.result(timeout=12)
except Exception: # Intentional: optional enrichment
except Exception:
logger.warning("Country data fetch timed out or failed")
country_data = {}
try:
leader_data = leader_fut.result(timeout=12)
except Exception: # Intentional: optional enrichment
except Exception:
logger.warning("Leader data fetch timed out or failed")
leader_data = {"leader": "Unknown", "government_type": "Unknown"}
try:
local_data = local_fut.result(timeout=12)
except Exception: # Intentional: optional enrichment
except Exception:
logger.warning("Local wiki fetch timed out or failed")
local_data = {}
try:
country_wiki_data = country_wiki_fut.result(timeout=12)
except Exception: # Intentional: optional enrichment
except Exception:
country_wiki_data = {}
# If no local data but we have country wiki summary, use that
-26
View File
@@ -1,26 +0,0 @@
from pydantic import BaseModel
from typing import Optional, Dict, List, Any
class HealthResponse(BaseModel):
status: str
last_updated: Optional[str] = None
sources: Dict[str, int]
freshness: Dict[str, str]
uptime_seconds: int
class RefreshResponse(BaseModel):
status: str
class AisFeedResponse(BaseModel):
status: str
ingested: int = 0
class RouteResponse(BaseModel):
orig_loc: Optional[list] = None
dest_loc: Optional[list] = None
origin_name: Optional[str] = None
dest_name: Optional[str] = None
-82
View File
@@ -1,82 +0,0 @@
"""
Sentinel-2 satellite imagery search via Microsoft Planetary Computer STAC API.
Free, keyless search for metadata + thumbnails. Used in the right-click dossier.
"""
import logging
import requests
from datetime import datetime, timedelta
from cachetools import TTLCache
logger = logging.getLogger(__name__)
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
def search_sentinel2_scene(lat: float, lng: float) -> dict:
"""Search for the latest Sentinel-2 L2A scene covering a point."""
cache_key = f"{round(lat, 2)}_{round(lng, 2)}"
if cache_key in _sentinel_cache:
return _sentinel_cache[cache_key]
try:
from pystac_client import Client
catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
end = datetime.utcnow()
start = end - timedelta(days=30)
search = catalog.search(
collections=["sentinel-2-l2a"],
intersects={"type": "Point", "coordinates": [lng, lat]},
datetime=f"{start.isoformat()}Z/{end.isoformat()}Z",
sortby=[{"field": "datetime", "direction": "desc"}],
max_items=3,
query={"eo:cloud_cover": {"lt": 30}},
)
items = list(search.items())
if not items:
result = {"found": False, "message": "No clear scenes in last 30 days"}
_sentinel_cache[cache_key] = result
return result
item = items[0]
# Try to sign item first for Azure blob URLs
try:
import planetary_computer
item = planetary_computer.sign_item(item)
except ImportError:
pass # planetary_computer not installed, try unsigned URLs
except (ConnectionError, TimeoutError, ValueError) as e:
logger.warning(f"Sentinel-2 signing failed: {e}")
# Get the rendered_preview (full-res PNG) and thumbnail separately
rendered = item.assets.get("rendered_preview")
thumbnail = item.assets.get("thumbnail")
# Full-res image URL — what opens when user clicks
fullres_url = rendered.href if rendered else (thumbnail.href if thumbnail else None)
# Thumbnail URL — what shows in the popup card
thumb_url = thumbnail.href if thumbnail else (rendered.href if rendered else None)
result = {
"found": True,
"scene_id": item.id,
"datetime": item.datetime.isoformat() if item.datetime else None,
"cloud_cover": item.properties.get("eo:cloud_cover"),
"thumbnail_url": thumb_url,
"fullres_url": fullres_url,
"bbox": list(item.bbox) if item.bbox else None,
"platform": item.properties.get("platform", "Sentinel-2"),
}
_sentinel_cache[cache_key] = result
return result
except ImportError:
logger.warning("pystac-client not installed — Sentinel-2 search unavailable")
return {"found": False, "error": "pystac-client not installed"}
except (requests.RequestException, ConnectionError, TimeoutError, ValueError) as e:
logger.error(f"Sentinel-2 search failed for ({lat}, {lng}): {e}")
return {"found": False, "error": str(e)}
+17
View File
@@ -0,0 +1,17 @@
import sys
import logging
logging.basicConfig(level=logging.DEBUG)
# Add backend directory to sys path so we can import modules
sys.path.append(r'f:\Codebase\Oracle\live-risk-dashboard\backend')
from services.data_fetcher import fetch_flights, latest_data
print("Testing fetch_flights...")
try:
fetch_flights()
print("Commercial flights count:", len(latest_data.get('commercial_flights', [])))
print("Private jets count:", len(latest_data.get('private_jets', [])))
except Exception as e:
import traceback
traceback.print_exc()
+38
View File
@@ -0,0 +1,38 @@
import json
from playwright.sync_api import sync_playwright
def scrape_liveuamap():
print("Launching playwright...")
with sync_playwright() as p:
# User agents are important for headless browsing
browser = p.chromium.launch(headless=True)
page = browser.new_page(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
def handle_response(response):
try:
if not response.url.endswith(('js', 'css', 'png', 'jpg', 'woff2', 'svg', 'ico')):
print(f"Intercepted API Call: {response.url}")
except Exception:
pass
page.on("response", handle_response)
print("Navigating to liveuamap...")
try:
page.goto("https://liveuamap.com/", timeout=30000, wait_until="domcontentloaded")
page.wait_for_timeout(5000)
print("Grabbing all script tags...")
scripts = page.evaluate("() => Array.from(document.querySelectorAll('script')).map(s => s.innerText)")
for i, s in enumerate(scripts):
if 'JSON.parse' in s or 'markers' in s or 'JSON' in s:
with open(f"script_{i}.txt", "w", encoding="utf-8") as f:
f.write(s)
except Exception as e:
print("Playwright timeout or error:", e)
print("Closing browser...")
browser.close()
if __name__ == "__main__":
scrape_liveuamap()
+59
View File
@@ -0,0 +1,59 @@
import requests
import json
import time
import cloudscraper
def scrape_openmhz_systems():
print("Testing OpenMHZ undocumented API with Cloudscraper...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}
scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'platform': 'windows', 'desktop': True})
try:
# Step 1: Hit the public systems list that the front-end map uses
res = scraper.get("https://api.openmhz.com/systems", headers=headers, timeout=15)
json_data = res.json()
systems = json_data.get('systems', []) if isinstance(json_data, dict) else []
print(f"Successfully spoofed OpenMHZ frontend. Found {len(systems)} active police/fire systems.")
if not systems:
return
# Inspect the first system (usually a major city)
city = systems[0]
sys_name = city.get('shortName')
print(f"Targeting System: {city.get('name')} ({sys_name})")
if not sys_name:
return
time.sleep(2) # Ethical delay
# Step 2: Query the recent calls for this specific system
# The frontend queries: https://api.openmhz.com/<system_name>/calls
calls_url = f"https://api.openmhz.com/{sys_name}/calls"
print(f"Fetching recent bursts: {calls_url}")
call_res = scraper.get(calls_url, headers=headers, timeout=15)
if call_res.status_code == 200:
call_json = call_res.json()
calls = call_json.get('calls', []) if isinstance(call_json, dict) else []
if calls and len(calls) > 0:
print(f"Intercepted {len(calls)} audio bursts.")
latest = calls[0]
print("LATEST INTERCEPT:")
print(f"Talkgroup: {latest.get('talkgroupNum')}")
print(f"Audio URL: {latest.get('url')}")
else:
print("No recent calls found for this system.")
else:
print(f"Failed to fetch calls. HTTP {call_res.status_code}")
except Exception as e:
print(f"Scrape Exception: {e}")
if __name__ == "__main__":
scrape_openmhz_systems()
+19
View File
@@ -0,0 +1,19 @@
import requests
def test_openmhz():
print("Testing OpenMHZ...")
res = requests.get("https://api.openmhz.com/systems")
if res.status_code == 200:
data = res.json()
print(f"OpenMHZ returned {len(data)} systems.")
print(f"Example: {data[0]['name']} ({data[0]['shortName']})")
else:
print(f"OpenMHZ Failed: {res.status_code}")
def test_scanner_radio():
print("Testing Scanner Radio...")
# Gordon Edwards app often uses something like this
# We will just try broadcastify public page scrape as a secondary fallback
pass
test_openmhz()
+55
View File
@@ -0,0 +1,55 @@
import feedparser
import requests
import re
feeds = {
"NPR": "https://feeds.npr.org/1004/rss.xml",
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"
}
keyword_coords = {
"venezuela": (7.119, -66.589), "brazil": (-14.235, -51.925), "argentina": (-38.416, -63.616),
"colombia": (4.570, -74.297), "mexico": (23.634, -102.552), "united states": (38.907, -77.036),
" usa ": (38.907, -77.036), " us ": (38.907, -77.036), "washington": (38.907, -77.036),
"canada": (56.130, -106.346), "ukraine": (49.487, 31.272), "kyiv": (50.450, 30.523),
"russia": (61.524, 105.318), "moscow": (55.755, 37.617), "israel": (31.046, 34.851),
"gaza": (31.416, 34.333), "iran": (32.427, 53.688), "lebanon": (33.854, 35.862),
"syria": (34.802, 38.996), "yemen": (15.552, 48.516), "china": (35.861, 104.195),
"beijing": (39.904, 116.407), "taiwan": (23.697, 120.960), "north korea": (40.339, 127.510),
"south korea": (35.907, 127.766), "pyongyang": (39.039, 125.762), "seoul": (37.566, 126.978),
"japan": (36.204, 138.252), "afghanistan": (33.939, 67.709), "pakistan": (30.375, 69.345),
"india": (20.593, 78.962), " uk ": (55.378, -3.435), "london": (51.507, -0.127),
"france": (46.227, 2.213), "paris": (48.856, 2.352), "germany": (51.165, 10.451),
"berlin": (52.520, 13.405), "sudan": (12.862, 30.217), "congo": (-4.038, 21.758),
"south africa": (-30.559, 22.937), "nigeria": (9.082, 8.675), "egypt": (26.820, 30.802),
"zimbabwe": (-19.015, 29.154), "australia": (-25.274, 133.775), "middle east": (31.500, 34.800),
"europe": (48.800, 2.300), "africa": (0.000, 25.000), "america": (38.900, -77.000),
"south america": (-14.200, -51.900), "asia": (34.000, 100.000),
"california": (36.778, -119.417), "texas": (31.968, -99.901), "florida": (27.994, -81.760),
"new york": (40.712, -74.006), "virginia": (37.431, -78.656),
"british columbia": (53.726, -127.647), "ontario": (51.253, -85.323), "quebec": (52.939, -73.549),
"delhi": (28.704, 77.102), "new delhi": (28.613, 77.209), "mumbai": (19.076, 72.877),
"shanghai": (31.230, 121.473), "hong kong": (22.319, 114.169), "istanbul": (41.008, 28.978),
"dubai": (25.204, 55.270), "singapore": (1.352, 103.819)
}
for name, url in feeds.items():
r = requests.get(url)
feed = feedparser.parse(r.text)
for entry in feed.entries[:10]:
title = entry.get('title', '')
summary = entry.get('summary', '')
text = (title + " " + summary).lower()
padded_text = f" {text} "
matched_kw = None
for kw, coords in keyword_coords.items():
if kw.startswith(" ") or kw.endswith(" "):
if kw in padded_text:
matched_kw = kw
break
else:
if re.search(r'\b' + re.escape(kw) + r'\b', text):
matched_kw = kw
break
print(f"[{name}] {title}\n Matched: {matched_kw}\n Text: {text}\n")
+67
View File
@@ -0,0 +1,67 @@
import requests
from bs4 import BeautifulSoup
import json
def scrape_broadcastify_top():
print("Scraping Broadcastify Top Feeds...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
try:
# The top 50 feeds page provides a wealth of listening data
res = requests.get("https://www.broadcastify.com/listen/top", headers=headers, timeout=10)
if res.status_code != 200:
print(f"Failed HTTP {res.status_code}")
return []
soup = BeautifulSoup(res.text, 'html.parser')
# The table of feeds is in a standard class
table = soup.find('table', {'class': 'btable'})
if not table:
print("Could not find feeds table.")
return []
feeds = []
rows = table.find_all('tr')[1:] # Skip header
for row in rows:
cols = row.find_all('td')
if len(cols) >= 5:
# Top layout: [Listeners, Feed ID (hidden), Location, Feed Name, Category, Genre]
listeners_str = cols[0].text.strip().replace(',', '')
listeners = int(listeners_str) if listeners_str.isdigit() else 0
# The link is usually in the Feed Name column
link_tag = cols[2].find('a')
if not link_tag:
continue
href = link_tag.get('href', '')
feed_id = href.split('/')[-1] if '/listen/feed/' in href else None
if not feed_id:
continue
location = cols[1].text.strip()
name = cols[2].text.strip()
feeds.append({
"id": feed_id,
"listeners": listeners,
"location": location,
"name": name,
"stream_url": f"https://broadcastify.cdnstream1.com/{feed_id}"
})
print(f"Successfully scraped {len(feeds)} top feeds.")
return feeds
except Exception as e:
print(f"Scrape error: {e}")
return []
if __name__ == "__main__":
top_feeds = scrape_broadcastify_top()
print(json.dumps(top_feeds[:3], indent=2))
-257
View File
@@ -1,257 +0,0 @@
"""Self-update module — downloads latest GitHub release, backs up current files,
extracts the update over the project, and restarts the app.
Public API:
perform_update(project_root) -> dict (download + backup + extract)
schedule_restart(project_root) (spawn detached start script, then exit)
"""
import os
import sys
import logging
import shutil
import subprocess
import tempfile
import time
import zipfile
from datetime import datetime
from pathlib import Path
import requests
logger = logging.getLogger(__name__)
GITHUB_RELEASES_URL = "https://api.github.com/repos/BigBodyCobain/Shadowbroker/releases/latest"
# ---------------------------------------------------------------------------
# Protected patterns — files/dirs that must NEVER be overwritten during update
# ---------------------------------------------------------------------------
_PROTECTED_DIRS = {"venv", "node_modules", ".next", "__pycache__", ".git"}
_PROTECTED_EXTENSIONS = {".db", ".sqlite"}
_PROTECTED_NAMES = {
".env",
"ais_cache.json",
"carrier_cache.json",
"geocode_cache.json",
}
def _is_protected(rel_path: str) -> bool:
"""Return True if *rel_path* (forward-slash separated) should be skipped."""
parts = rel_path.replace("\\", "/").split("/")
name = parts[-1]
# Check directory components
for part in parts[:-1]:
if part in _PROTECTED_DIRS:
return True
# Check filename
if name in _PROTECTED_NAMES:
return True
_, ext = os.path.splitext(name)
if ext.lower() in _PROTECTED_EXTENSIONS:
return True
return False
# ---------------------------------------------------------------------------
# Download
# ---------------------------------------------------------------------------
def _download_release(temp_dir: str) -> tuple:
"""Fetch latest release info and download the zip asset.
Returns (zip_path, version_tag, download_url).
"""
logger.info("Fetching latest release info from GitHub...")
resp = requests.get(GITHUB_RELEASES_URL, timeout=15)
resp.raise_for_status()
release = resp.json()
tag = release.get("tag_name", "unknown")
assets = release.get("assets", [])
# Find the .zip asset
zip_url = None
for asset in assets:
url = asset.get("browser_download_url", "")
if url.endswith(".zip"):
zip_url = url
break
if not zip_url:
raise RuntimeError("No .zip asset found in the latest release")
logger.info(f"Downloading {zip_url} ...")
zip_path = os.path.join(temp_dir, "update.zip")
with requests.get(zip_url, stream=True, timeout=120) as dl:
dl.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in dl.iter_content(chunk_size=1024 * 64):
f.write(chunk)
if not zipfile.is_zipfile(zip_path):
raise RuntimeError("Downloaded file is not a valid ZIP archive")
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
logger.info(f"Downloaded {size_mb:.1f} MB — ZIP validated OK")
return zip_path, tag, zip_url
# ---------------------------------------------------------------------------
# Backup
# ---------------------------------------------------------------------------
def _backup_current(project_root: str, temp_dir: str) -> str:
"""Create a backup zip of backend/ and frontend/ in temp_dir."""
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = os.path.join(temp_dir, f"backup_{stamp}.zip")
logger.info(f"Backing up current files to {backup_path} ...")
dirs_to_backup = ["backend", "frontend"]
count = 0
with zipfile.ZipFile(backup_path, "w", zipfile.ZIP_DEFLATED) as zf:
for dir_name in dirs_to_backup:
dir_path = os.path.join(project_root, dir_name)
if not os.path.isdir(dir_path):
continue
for root, dirs, files in os.walk(dir_path):
# Prune protected directories from walk
dirs[:] = [d for d in dirs if d not in _PROTECTED_DIRS]
for fname in files:
full = os.path.join(root, fname)
rel = os.path.relpath(full, project_root)
if _is_protected(rel):
continue
try:
zf.write(full, rel)
count += 1
except (PermissionError, OSError) as e:
logger.warning(f"Backup skip (locked): {rel}{e}")
logger.info(f"Backup complete: {count} files archived")
return backup_path
# ---------------------------------------------------------------------------
# Extract & Copy
# ---------------------------------------------------------------------------
def _extract_and_copy(zip_path: str, project_root: str, temp_dir: str) -> int:
"""Extract the update zip and copy files over the project, skipping protected files.
Returns count of files copied.
"""
extract_dir = os.path.join(temp_dir, "extracted")
logger.info("Extracting update zip...")
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(extract_dir)
# Detect wrapper folder: if extracted root has a single directory that
# itself contains frontend/ or backend/, use it as the real base.
base = extract_dir
entries = [e for e in os.listdir(base) if not e.startswith(".")]
if len(entries) == 1:
candidate = os.path.join(base, entries[0])
if os.path.isdir(candidate):
sub = os.listdir(candidate)
if "frontend" in sub or "backend" in sub:
base = candidate
logger.info(f"Detected wrapper folder: {entries[0]}")
copied = 0
skipped = 0
for root, _dirs, files in os.walk(base):
for fname in files:
src = os.path.join(root, fname)
rel = os.path.relpath(src, base).replace("\\", "/")
if _is_protected(rel):
skipped += 1
continue
dst = os.path.join(project_root, rel)
os.makedirs(os.path.dirname(dst), exist_ok=True)
try:
shutil.copy2(src, dst)
copied += 1
except (PermissionError, OSError) as e:
logger.warning(f"Copy failed (skipping): {rel}{e}")
skipped += 1
logger.info(f"Update applied: {copied} files copied, {skipped} skipped/protected")
return copied
# ---------------------------------------------------------------------------
# Restart
# ---------------------------------------------------------------------------
def schedule_restart(project_root: str):
"""Spawn a detached process that re-runs start.bat / start.sh after a short
delay, then forcefully exit the current Python process."""
tmp = tempfile.mkdtemp(prefix="sb_restart_")
if sys.platform == "win32":
script = os.path.join(tmp, "restart.bat")
with open(script, "w") as f:
f.write("@echo off\n")
f.write("timeout /t 3 /nobreak >nul\n")
f.write(f'cd /d "{project_root}"\n')
f.write("call start.bat\n")
CREATE_NEW_PROCESS_GROUP = 0x00000200
DETACHED_PROCESS = 0x00000008
subprocess.Popen(
["cmd", "/c", script],
creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP,
close_fds=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
else:
script = os.path.join(tmp, "restart.sh")
with open(script, "w") as f:
f.write("#!/bin/bash\n")
f.write("sleep 3\n")
f.write(f'cd "{project_root}"\n')
f.write("bash start.sh\n")
os.chmod(script, 0o755)
subprocess.Popen(
["bash", script],
start_new_session=True,
close_fds=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
logger.info("Restart script spawned — exiting current process")
os._exit(0)
# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------
def perform_update(project_root: str) -> dict:
"""Download the latest release, back up current files, and extract the update.
Returns a dict with status info on success, or {"status": "error", "message": ...}
on failure. Does NOT trigger restart caller should call schedule_restart()
separately after the HTTP response has been sent.
"""
temp_dir = tempfile.mkdtemp(prefix="sb_update_")
try:
zip_path, version, url = _download_release(temp_dir)
backup_path = _backup_current(project_root, temp_dir)
copied = _extract_and_copy(zip_path, project_root, temp_dir)
return {
"status": "ok",
"version": version,
"files_updated": copied,
"backup_path": backup_path,
"message": f"Updated to {version}{copied} files replaced. Restarting...",
}
except Exception as e:
logger.error(f"Update failed: {e}", exc_info=True)
return {
"status": "error",
"message": str(e),
}
+59
View File
@@ -0,0 +1,59 @@
import requests
import time
import math
import random
def test_fetch_and_triangulate():
t0 = time.time()
url = "https://api.adsb.lol/v2/lat/39.8/lon/-98.5/dist/1000"
try:
r = requests.get(url, timeout=10)
data = r.json()
print(f"Downloaded in {time.time() - t0:.2f}s")
if "ac" in data:
sampled = data["ac"]
print("Flights:", len(sampled))
else:
print("No 'ac' in response:", data)
# Load airports (mock for test)
airports = [{"lat": random.uniform(-90, 90), "lng": random.uniform(-180, 180), "iata": f"A{i}"} for i in range(4000)]
t1 = time.time()
for f in sampled:
lat = f.get("lat")
lng = f.get("lon")
heading = f.get("track", 0)
if lat is None or lng is None: continue
# Project 15 degrees (~1000 miles) backwards and forwards
dist_deg = 15.0
h_rad = math.radians(heading)
dy = math.cos(h_rad) * dist_deg
dx = math.sin(h_rad) * dist_deg
cos_lat = max(0.2, math.cos(math.radians(lat)))
origin_lat = lat - dy
origin_lng = lng - (dx / cos_lat)
dest_lat = lat + dy
dest_lng = lng + (dx / cos_lat)
# Find closest origin airport
best_o, min_o = None, float('inf')
for a in airports:
d = (a['lat'] - origin_lat)**2 + (a['lng'] - origin_lng)**2
if d < min_o: min_o = d; best_o = a
# Find closest dest airport
best_d, min_d = None, float('inf')
for a in airports:
d = (a['lat'] - dest_lat)**2 + (a['lng'] - dest_lng)**2
if d < min_d: min_d = d; best_d = a
print(f"Triangulated 500 flights against {len(airports)} airports in {time.time() - t1:.2f}s")
except Exception as e:
print("Error:", e)
test_fetch_and_triangulate()
+13
View File
@@ -0,0 +1,13 @@
from services.data_fetcher import fetch_airports, fetch_flights, cached_airports, latest_data
fetch_airports()
# We patch logger to see what happens inside fetch_flights
import logging
logging.basicConfig(level=logging.DEBUG)
# let's run fetch_flights
fetch_flights()
flights = latest_data.get('flights', [])
print(f"Total flights: {len(flights)}")
+45
View File
@@ -0,0 +1,45 @@
import json
import subprocess
import os
import time
proxy_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ais_proxy.js")
API_KEY = "75cc39af03c9cc23c90e8a7b3c3bc2b2a507c5fb"
print(f"Proxy script: {proxy_script}")
print(f"Exists: {os.path.exists(proxy_script)}")
process = subprocess.Popen(
['node', proxy_script, API_KEY],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, # Separate stderr!
text=True,
bufsize=1
)
print("Process started, reading stdout...")
count = 0
start = time.time()
for line in iter(process.stdout.readline, ''):
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
msg_type = data.get("MessageType", "?")
mmsi = data.get("MetaData", {}).get("MMSI", 0)
count += 1
if count <= 5:
print(f" MSG {count}: type={msg_type} mmsi={mmsi}")
if count == 20:
elapsed = time.time() - start
print(f"\nReceived {count} messages in {elapsed:.1f}s — proxy is working!")
process.terminate()
break
except json.JSONDecodeError as e:
print(f" BAD JSON: {line[:100]}... err={e}")
if count == 0:
# Check stderr
stderr_out = process.stderr.read()
print(f"Zero messages received. stderr: {stderr_out[:500]}")
+54
View File
@@ -0,0 +1,54 @@
import json
import subprocess
import os
import time
import sys
proxy_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ais_proxy.js")
API_KEY = "75cc39af03c9cc23c90e8a7b3c3bc2b2a507c5fb"
print(f"Proxy script: {proxy_script}")
process = subprocess.Popen(
['node', proxy_script, API_KEY],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1
)
import threading
def read_stderr():
for line in iter(process.stderr.readline, ''):
print(f"[STDERR] {line.strip()}", file=sys.stderr)
t = threading.Thread(target=read_stderr, daemon=True)
t.start()
print("Process started, reading stdout for 15 seconds...")
count = 0
start = time.time()
while time.time() - start < 15:
line = process.stdout.readline()
if not line:
if process.poll() is not None:
print(f"Process exited with code {process.returncode}")
break
continue
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
msg_type = data.get("MessageType", "?")
mmsi = data.get("MetaData", {}).get("MMSI", 0)
count += 1
if count <= 5:
print(f" MSG {count}: type={msg_type} mmsi={mmsi}")
except json.JSONDecodeError as e:
print(f" BAD LINE: {line[:80]}...")
elapsed = time.time() - start
print(f"\nTotal {count} messages in {elapsed:.1f}s")
process.terminate()
+13
View File
@@ -0,0 +1,13 @@
import requests
import traceback
try:
print("Testing adsb.lol...")
r = requests.get("https://api.adsb.lol/v2/lat/39.8/lon/-98.5/dist/100", timeout=15)
print(f"Status: {r.status_code}")
d = r.json()
print(f"Aircraft: {len(d.get('ac', []))}")
except Exception as e:
print(f"Error type: {type(e).__name__}")
print(f"Error: {e}")
traceback.print_exc()
+11
View File
@@ -0,0 +1,11 @@
import json
import urllib.request
import time
time.sleep(5)
try:
data = urllib.request.urlopen('http://localhost:8000/api/live-data').read()
d = json.loads(data)
print(f"News: {len(d.get('news', []))} | Earthquakes: {len(d.get('earthquakes', []))} | Satellites: {len(d.get('satellites', []))} | CCTV: {len(d.get('cctv', []))}")
except Exception as e:
print(f"Error fetching API: {e}")
+56
View File
@@ -0,0 +1,56 @@
import requests
import json
# Step 1: Fetch some real flights from adsb.lol
print("Fetching real flights from adsb.lol...")
r = requests.get("https://api.adsb.lol/v2/lat/39.8/lon/-98.5/dist/250", timeout=10)
data = r.json()
ac = data.get("ac", [])
print("Got", len(ac), "aircraft")
# Step 2: Build a batch of real callsigns
planes = []
for f in ac[:20]: # Just 20 real flights
cs = str(f.get("flight", "")).strip()
lat = f.get("lat")
lon = f.get("lon")
if cs and lat and lon:
planes.append({"callsign": cs, "lat": lat, "lng": lon})
print("Built batch of", len(planes), "planes")
print("Sample plane:", json.dumps(planes[0]) if planes else "NONE")
# Step 3: Test routeset with real data
if planes:
payload = {"planes": planes}
print("Payload size:", len(json.dumps(payload)), "bytes")
r2 = requests.post("https://api.adsb.lol/api/0/routeset", json=payload, timeout=15)
print("Routeset HTTP:", r2.status_code)
if r2.status_code == 200:
result = r2.json()
print("Response type:", type(result).__name__)
print("Routes found:", len(result) if isinstance(result, list) else "dict")
if isinstance(result, list) and len(result) > 0:
print("First route:", json.dumps(result[0], indent=2))
else:
print("Error body:", r2.text[:500])
# Step 4: Test with bigger batch
print("\n--- Testing with 100 real flights ---")
planes100 = []
for f in ac[:120]:
cs = str(f.get("flight", "")).strip()
lat = f.get("lat")
lon = f.get("lon")
if cs and lat and lon:
planes100.append({"callsign": cs, "lat": lat, "lng": lon})
planes100 = planes100[:100]
print("Built batch of", len(planes100), "planes")
r3 = requests.post("https://api.adsb.lol/api/0/routeset", json={"planes": planes100}, timeout=15)
print("Routeset HTTP:", r3.status_code)
if r3.status_code == 200:
result3 = r3.json()
print("Routes found:", len(result3) if isinstance(result3, list) else "dict")
else:
print("Error body:", r3.text[:500])
+10
View File
@@ -0,0 +1,10 @@
from services.cctv_pipeline import init_db, TFLJamCamIngestor, LTASingaporeIngestor
init_db()
print("Initialized DB")
tfl = TFLJamCamIngestor()
print(f"TFL Cameras: {len(tfl.fetch_data())}")
nyc = LTASingaporeIngestor()
print(f"SGP Cameras: {len(nyc.fetch_data())}")
+24
View File
@@ -0,0 +1,24 @@
import requests
try:
print('Testing Seattle SDOT...')
r_sea = requests.get('https://data.seattle.gov/resource/65fc-btcc.json?$limit=5', headers={'X-App-Token': 'f2jdDBw5JMXPFOQyk64SKlPkn'})
print(r_sea.status_code)
try:
print(r_sea.json()[0])
except:
pass
except:
pass
try:
print('Testing NYC 511...')
r_nyc = requests.get('https://webcams.nyctmc.org/api/cameras', timeout=5)
print(r_nyc.status_code)
try:
print(len(r_nyc.json()))
print(r_nyc.json()[0])
except:
pass
except:
pass
+10
View File
@@ -0,0 +1,10 @@
import json, urllib.request
data = json.loads(urllib.request.urlopen('http://localhost:8000/api/live-data').read())
print(f"Commercial flights: {len(data.get('commercial_flights', []))}")
print(f"Private flights: {len(data.get('private_flights', []))}")
print(f"Private jets: {len(data.get('private_jets', []))}")
print(f"Military flights: {len(data.get('military_flights', []))}")
print(f"Tracked flights: {len(data.get('tracked_flights', []))}")
print(f"Ships: {len(data.get('ships', []))}")
print(f"CCTV: {len(data.get('cctv', []))}")
+38
View File
@@ -0,0 +1,38 @@
import json
import urllib.request
try:
data = json.loads(urllib.request.urlopen('http://localhost:8000/api/live-data').read())
# Tracked flights
tracked = data.get('tracked_flights', [])
print(f"=== TRACKED FLIGHTS: {len(tracked)} ===")
if tracked:
colors = {}
for t in tracked:
c = t.get('alert_color', 'NONE')
colors[c] = colors.get(c, 0) + 1
print(f" Colors: {colors}")
print(f" Sample: {json.dumps(tracked[0], indent=2)[:500]}")
# Ships
ships = data.get('ships', [])
print(f"\n=== SHIPS: {len(ships)} ===")
types = {}
for s in ships:
t = s.get('type', 'unknown')
types[t] = types.get(t, 0) + 1
print(f" Types: {types}")
if ships:
print(f" Sample: {json.dumps(ships[0], indent=2)[:300]}")
# News
news = data.get('news', [])
print(f"\n=== NEWS: {len(news)} ===")
# Earthquakes
quakes = data.get('earthquakes', [])
print(f"=== EARTHQUAKES: {len(quakes)} ===")
except Exception as e:
print(f"Error: {e}")
+23
View File
@@ -0,0 +1,23 @@
import json
import urllib.request
try:
data = json.loads(urllib.request.urlopen('http://localhost:8000/api/live-data').read())
tracked = data.get('tracked_flights', [])
colors = {}
for t in tracked:
c = t.get('alert_color', 'NONE')
colors[c] = colors.get(c, 0) + 1
print(f"TRACKED FLIGHTS: {len(tracked)} | Colors: {colors}")
ships = data.get('ships', [])
types = {}
for s in ships:
t = s.get('type', 'unknown')
types[t] = types.get(t, 0) + 1
print(f"SHIPS: {len(ships)} | Types: {types}")
print(f"NEWS: {len(data.get('news', []))} | EARTHQUAKES: {len(data.get('earthquakes', []))} | CCTV: {len(data.get('cctv', []))}")
except Exception as e:
print(f"Error: {e}")
+10
View File
@@ -0,0 +1,10 @@
import requests, json
url = "https://api.us.socrata.com/api/catalog/v1?domains=data.cityofnewyork.us&q=camera"
try:
r = requests.get(url)
res = r.json().get('results', [])
for d in res:
print(f"{d['resource']['id']} - {d['resource']['name']}")
except Exception as e:
print(e)
+36
View File
@@ -0,0 +1,36 @@
import json, urllib.request
data = json.loads(urllib.request.urlopen('http://localhost:8000/api/live-data').read())
# Check trail data
comm = data.get('commercial_flights', [])
mil = data.get('military_flights', [])
tracked = data.get('tracked_flights', [])
pvt = data.get('private_flights', [])
# Count flights with trails
comm_trails = [f for f in comm if f.get('trail') and len(f['trail']) > 0]
mil_trails = [f for f in mil if f.get('trail') and len(f['trail']) > 0]
tracked_trails = [f for f in tracked if f.get('trail') and len(f['trail']) > 0]
pvt_trails = [f for f in pvt if f.get('trail') and len(f['trail']) > 0]
print(f"Commercial: {len(comm)} total, {len(comm_trails)} with trails")
print(f"Military: {len(mil)} total, {len(mil_trails)} with trails")
print(f"Tracked: {len(tracked)} total, {len(tracked_trails)} with trails")
print(f"Private: {len(pvt)} total, {len(pvt_trails)} with trails")
# Show a sample trail
if mil_trails:
f = mil_trails[0]
print(f"\nSample trail ({f['callsign']}):")
print(f" Points: {len(f['trail'])}")
if f['trail']:
print(f" First: {f['trail'][0]}")
print(f" Last: {f['trail'][-1]}")
# Check for grounded planes
grounded = [f for f in comm if f.get('alt', 999) <= 500 and f.get('speed_knots', 999) < 30]
print(f"\nGrounded commercial: {len(grounded)}")
if grounded:
g = grounded[0]
print(f" Example: {g['callsign']} alt={g.get('alt')} speed={g.get('speed_knots')}")
+13
View File
@@ -0,0 +1,13 @@
import sqlite3
try:
conn = sqlite3.connect('cctv.db')
conn.row_factory = sqlite3.Row
cur = conn.cursor()
cur.execute("SELECT source_agency, COUNT(*) as count FROM cameras WHERE id LIKE 'OSM-%' GROUP BY source_agency")
rows = cur.fetchall()
print('OSM Cameras by City:')
for r in rows:
print(f"{r['source_agency']}: {r['count']}")
except Exception as e:
print('DB Error:', e)
+12
View File
@@ -0,0 +1,12 @@
import json
import urllib.request
import time
time.sleep(5)
try:
data = urllib.request.urlopen('http://localhost:8000/api/live-data').read()
d = json.loads(data)
ships = d.get('ships', [])
print(f"Ships: {len(ships)}")
except Exception as e:
print(f"Error fetching API: {e}")
+13
View File
@@ -0,0 +1,13 @@
import requests, json
print("Searching Socrata NYC/Seattle Cameras...")
try:
url = "https://api.us.socrata.com/api/catalog/v1?q=traffic cameras&limit=100"
r = requests.get(url)
res = r.json().get('results', [])
for d in res:
domain = d['metadata']['domain'].lower()
if 'seattle' in domain or 'newyork' in domain or 'nyc' in domain:
print(f"{d['resource']['id']} - {d['resource']['name']} ({domain})")
except Exception as e:
print(e)
+61
View File
@@ -0,0 +1,61 @@
"""Test trace endpoints with explicit output."""
import json, subprocess
hex_code = "a34bac" # DOJ166
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
date_str = now.strftime("%Y/%m/%d")
hex_prefix = hex_code[-2:]
# Test 1: adsb.fi trace_full
url1 = f"https://globe.adsb.fi/data/traces/{date_str}/{hex_prefix}/trace_full_{hex_code}.json"
print(f"URL1: {url1}")
r = subprocess.run(["curl", "-s", "--max-time", "10", url1], capture_output=True, text=True, timeout=15)
if r.stdout.strip().startswith("{"):
data = json.loads(r.stdout)
print(f"SUCCESS! Keys: {list(data.keys())}")
if 'trace' in data:
pts = data['trace']
print(f"Trace points: {len(pts)}")
if pts:
print(f"FIRST (takeoff): {pts[0]}")
print(f"LAST (now): {pts[-1]}")
else:
print(f"Not JSON (first 100 chars): {r.stdout[:100]}")
# That response was behind cloudflare, try adsb.lol instead
# Test 2: adsb.lol hex lookup
url2 = f"https://api.adsb.lol/v2/hex/{hex_code}"
print(f"\nURL2: {url2}")
r2 = subprocess.run(["curl", "-s", "--max-time", "10", url2], capture_output=True, text=True, timeout=15)
if r2.stdout.strip().startswith("{"):
data = json.loads(r2.stdout)
if 'ac' in data and data['ac']:
ac = data['ac'][0]
keys = sorted(ac.keys())
print(f"All keys ({len(keys)}): {keys}")
else:
print(f"Not JSON: {r2.stdout[:100]}")
# Test 3: Try adsb.lol trace
url3 = f"https://api.adsb.lol/trace/{hex_code}"
print(f"\nURL3: {url3}")
r3 = subprocess.run(["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--max-time", "10", url3], capture_output=True, text=True, timeout=15)
print(f"HTTP status: {r3.stdout}")
# Test 4: Try globe.adsb.lol format
url4 = f"https://globe.adsb.lol/data/traces/{date_str}/{hex_prefix}/trace_full_{hex_code}.json"
print(f"\nURL4: {url4}")
r4 = subprocess.run(["curl", "-s", "--max-time", "10", url4], capture_output=True, text=True, timeout=15)
if r4.stdout.strip().startswith("{"):
data = json.loads(r4.stdout)
print(f"SUCCESS! Keys: {list(data.keys())}")
if 'trace' in data:
pts = data['trace']
print(f"Trace points: {len(pts)}")
if pts:
print(f"FIRST (takeoff): {pts[0]}")
print(f"LAST (now): {pts[-1]}")
else:
print(f"Response: {r4.stdout[:150]}")
+8
View File
@@ -0,0 +1,8 @@
import asyncio, websockets
async def main():
try:
async with websockets.connect('wss://stream.aisstream.io/v0/stream') as ws:
print('Connected to AIS Stream!')
except Exception as e:
print(f"Error: {e}")
asyncio.run(main())
View File
-50
View File
@@ -1,50 +0,0 @@
import pytest
from unittest.mock import patch, MagicMock
@pytest.fixture(autouse=True)
def _suppress_background_services():
"""Prevent real scheduler/stream/tracker from starting during tests."""
with patch("services.data_fetcher.start_scheduler"), \
patch("services.data_fetcher.stop_scheduler"), \
patch("services.ais_stream.start_ais_stream"), \
patch("services.ais_stream.stop_ais_stream"), \
patch("services.carrier_tracker.start_carrier_tracker"), \
patch("services.carrier_tracker.stop_carrier_tracker"):
yield
@pytest.fixture()
def client(_suppress_background_services):
"""HTTPX test client against the FastAPI app (no real network)."""
from httpx import ASGITransport, AsyncClient
from main import app
import asyncio
transport = ASGITransport(app=app)
async def _make_client():
async with AsyncClient(transport=transport, base_url="http://test") as ac:
return ac
# Return a sync-usable wrapper
class SyncClient:
def __init__(self):
self._loop = asyncio.new_event_loop()
self._transport = ASGITransport(app=app)
def get(self, url, **kw):
return self._loop.run_until_complete(self._get(url, **kw))
async def _get(self, url, **kw):
async with AsyncClient(transport=self._transport, base_url="http://test") as ac:
return await ac.get(url, **kw)
def put(self, url, **kw):
return self._loop.run_until_complete(self._put(url, **kw))
async def _put(self, url, **kw):
async with AsyncClient(transport=self._transport, base_url="http://test") as ac:
return await ac.put(url, **kw)
return SyncClient()
-114
View File
@@ -1,114 +0,0 @@
"""Smoke tests for all API endpoints — verifies routes exist and return valid responses."""
import pytest
class TestHealthEndpoint:
def test_health_returns_200(self, client):
r = client.get("/api/health")
assert r.status_code == 200
data = r.json()
assert data["status"] == "ok"
assert "sources" in data
assert "freshness" in data
def test_health_has_uptime(self, client):
r = client.get("/api/health")
data = r.json()
assert "uptime_seconds" in data
assert isinstance(data["uptime_seconds"], (int, float))
class TestLiveDataEndpoints:
def test_live_data_returns_200(self, client):
r = client.get("/api/live-data")
assert r.status_code == 200
def test_live_data_fast_returns_200_or_304(self, client):
r = client.get("/api/live-data/fast")
assert r.status_code in (200, 304)
if r.status_code == 200:
data = r.json()
assert "freshness" in data
def test_live_data_slow_returns_200_or_304(self, client):
r = client.get("/api/live-data/slow")
assert r.status_code in (200, 304)
if r.status_code == 200:
data = r.json()
assert "freshness" in data
def test_fast_has_expected_keys(self, client):
r = client.get("/api/live-data/fast")
if r.status_code == 200:
data = r.json()
for key in ("commercial_flights", "military_flights", "ships", "satellites"):
assert key in data, f"Missing key: {key}"
def test_slow_has_expected_keys(self, client):
r = client.get("/api/live-data/slow")
if r.status_code == 200:
data = r.json()
for key in ("news", "stocks", "weather", "earthquakes"):
assert key in data, f"Missing key: {key}"
class TestDebugEndpoint:
def test_debug_latest_returns_list(self, client):
r = client.get("/api/debug-latest")
assert r.status_code == 200
data = r.json()
assert isinstance(data, list)
class TestSettingsEndpoints:
def test_get_api_keys(self, client):
r = client.get("/api/settings/api-keys")
assert r.status_code == 200
data = r.json()
assert isinstance(data, list)
def test_get_news_feeds(self, client):
r = client.get("/api/settings/news-feeds")
assert r.status_code == 200
data = r.json()
assert isinstance(data, list)
class TestRadioEndpoints:
def test_radio_top_returns_200(self, client):
r = client.get("/api/radio/top")
assert r.status_code == 200
def test_radio_openmhz_systems(self, client):
r = client.get("/api/radio/openmhz/systems")
assert r.status_code == 200
class TestQueryValidation:
def test_region_dossier_rejects_invalid_lat(self, client):
r = client.get("/api/region-dossier?lat=999&lng=0")
assert r.status_code == 422
def test_region_dossier_rejects_invalid_lng(self, client):
r = client.get("/api/region-dossier?lat=0&lng=999")
assert r.status_code == 422
def test_sentinel_rejects_invalid_coords(self, client):
r = client.get("/api/sentinel2/search?lat=-100&lng=0")
assert r.status_code == 422
def test_radio_nearest_rejects_invalid_lat(self, client):
r = client.get("/api/radio/nearest?lat=91&lng=0")
assert r.status_code == 422
class TestETagBehavior:
def test_fast_returns_etag_header(self, client):
r = client.get("/api/live-data/fast")
if r.status_code == 200:
assert "etag" in r.headers
def test_slow_returns_etag_header(self, client):
r = client.get("/api/live-data/slow")
if r.status_code == 200:
assert "etag" in r.headers
-159
View File
@@ -1,159 +0,0 @@
"""Tests for network_utils — fetch_with_curl, circuit breaker, domain fail cache."""
import time
import pytest
from unittest.mock import patch, MagicMock
from services.network_utils import fetch_with_curl, _circuit_breaker, _domain_fail_cache, _cb_lock, _DummyResponse
class TestDummyResponse:
"""Tests for the minimal response object used as curl fallback."""
def test_status_code_and_text(self):
resp = _DummyResponse(200, '{"ok": true}')
assert resp.status_code == 200
assert resp.text == '{"ok": true}'
def test_json_parsing(self):
resp = _DummyResponse(200, '{"key": "value", "num": 42}')
data = resp.json()
assert data["key"] == "value"
assert data["num"] == 42
def test_content_bytes(self):
resp = _DummyResponse(200, "hello")
assert resp.content == b"hello"
def test_raise_for_status_ok(self):
resp = _DummyResponse(200, "ok")
resp.raise_for_status() # Should not raise
def test_raise_for_status_error(self):
resp = _DummyResponse(500, "server error")
with pytest.raises(Exception, match="HTTP 500"):
resp.raise_for_status()
def test_raise_for_status_404(self):
resp = _DummyResponse(404, "not found")
with pytest.raises(Exception, match="HTTP 404"):
resp.raise_for_status()
class TestCircuitBreaker:
"""Tests for the circuit breaker and domain fail cache."""
def setup_method(self):
"""Clear caches before each test."""
with _cb_lock:
_circuit_breaker.clear()
_domain_fail_cache.clear()
def test_circuit_breaker_blocks_request(self):
"""If a domain is in circuit breaker, fetch_with_curl should fail fast."""
with _cb_lock:
_circuit_breaker["example.com"] = time.time()
with pytest.raises(Exception, match="Circuit breaker open"):
fetch_with_curl("https://example.com/test")
def test_circuit_breaker_expires_after_ttl(self):
"""Circuit breaker entries older than TTL should be ignored."""
with _cb_lock:
_circuit_breaker["expired.com"] = time.time() - 200 # > 120s TTL
# Should not raise — circuit breaker expired
# Will fail for other reasons (network) but won't raise circuit breaker
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = "ok"
mock_resp.raise_for_status = MagicMock()
with patch("services.network_utils._session") as mock_session:
mock_session.get.return_value = mock_resp
result = fetch_with_curl("https://expired.com/test")
assert result.status_code == 200
def test_domain_fail_cache_skips_to_curl(self):
"""If a domain recently failed with requests, skip straight to curl."""
with _cb_lock:
_domain_fail_cache["skip-to-curl.com"] = time.time()
# Mock subprocess to simulate curl success
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = '{"data": true}\n200'
mock_result.stderr = ''
with patch("subprocess.run", return_value=mock_result) as mock_run:
result = fetch_with_curl("https://skip-to-curl.com/api")
assert result.status_code == 200
assert result.json()["data"] is True
# Verify subprocess.run was called (curl fallback)
mock_run.assert_called_once()
def test_successful_request_clears_caches(self):
"""Successful requests should clear both domain_fail_cache and circuit_breaker."""
domain = "success-clears.com"
with _cb_lock:
_domain_fail_cache[domain] = time.time() - 400 # Expired, won't skip
_circuit_breaker[domain] = time.time() - 200 # Expired, won't block
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = "ok"
mock_resp.raise_for_status = MagicMock()
with patch("services.network_utils._session") as mock_session:
mock_session.get.return_value = mock_resp
fetch_with_curl(f"https://{domain}/test")
with _cb_lock:
assert domain not in _domain_fail_cache
assert domain not in _circuit_breaker
class TestFetchWithCurl:
"""Tests for the primary fetch_with_curl function."""
def setup_method(self):
with _cb_lock:
_circuit_breaker.clear()
_domain_fail_cache.clear()
def test_successful_get_returns_response(self):
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = '{"result": 42}'
mock_resp.raise_for_status = MagicMock()
with patch("services.network_utils._session") as mock_session:
mock_session.get.return_value = mock_resp
result = fetch_with_curl("https://api.example.com/data")
assert result.status_code == 200
def test_post_with_json_data(self):
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = '{"created": true}'
mock_resp.raise_for_status = MagicMock()
with patch("services.network_utils._session") as mock_session:
mock_session.post.return_value = mock_resp
result = fetch_with_curl("https://api.example.com/create",
method="POST", json_data={"name": "test"})
assert result.status_code == 200
mock_session.post.assert_called_once()
def test_custom_headers_merged(self):
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = "ok"
mock_resp.raise_for_status = MagicMock()
with patch("services.network_utils._session") as mock_session:
mock_session.get.return_value = mock_resp
fetch_with_curl("https://api.example.com/data",
headers={"Authorization": "Bearer token123"})
call_args = mock_session.get.call_args
headers = call_args.kwargs.get("headers", {})
assert "Authorization" in headers
assert headers["Authorization"] == "Bearer token123"
-72
View File
@@ -1,72 +0,0 @@
"""Tests for Pydantic response schemas."""
import pytest
from pydantic import ValidationError
from services.schemas import HealthResponse, RefreshResponse, AisFeedResponse, RouteResponse
class TestHealthResponse:
def test_valid_health_response(self):
data = {
"status": "ok",
"last_updated": "2024-01-01T00:00:00",
"sources": {"flights": 150, "ships": 42},
"freshness": {"flights": "2024-01-01T00:00:00", "ships": "2024-01-01T00:00:00"},
"uptime_seconds": 3600
}
resp = HealthResponse(**data)
assert resp.status == "ok"
assert resp.sources["flights"] == 150
assert resp.uptime_seconds == 3600
def test_health_response_optional_last_updated(self):
data = {
"status": "ok",
"sources": {},
"freshness": {},
"uptime_seconds": 0
}
resp = HealthResponse(**data)
assert resp.last_updated is None
def test_health_response_missing_required_field(self):
with pytest.raises(ValidationError):
HealthResponse(status="ok") # Missing sources, freshness, uptime_seconds
class TestRefreshResponse:
def test_valid_refresh(self):
resp = RefreshResponse(status="refreshing")
assert resp.status == "refreshing"
def test_missing_status(self):
with pytest.raises(ValidationError):
RefreshResponse()
class TestAisFeedResponse:
def test_valid_ais_feed(self):
resp = AisFeedResponse(status="ok", ingested=42)
assert resp.ingested == 42
def test_default_ingested_zero(self):
resp = AisFeedResponse(status="ok")
assert resp.ingested == 0
class TestRouteResponse:
def test_valid_route(self):
resp = RouteResponse(
orig_loc=[40.6413, -73.7781],
dest_loc=[51.4700, -0.4543],
origin_name="JFK",
dest_name="LHR"
)
assert resp.origin_name == "JFK"
assert len(resp.orig_loc) == 2
def test_all_optional(self):
resp = RouteResponse()
assert resp.orig_loc is None
assert resp.dest_loc is None
assert resp.origin_name is None
assert resp.dest_name is None
-97
View File
@@ -1,97 +0,0 @@
"""Tests for the shared in-memory data store."""
import threading
import time
import pytest
from services.fetchers._store import latest_data, source_timestamps, _mark_fresh, _data_lock
class TestLatestDataStructure:
"""Verify the store has the expected keys and default values."""
def test_has_all_required_keys(self):
expected_keys = {
"last_updated", "news", "stocks", "oil", "flights", "ships",
"military_flights", "tracked_flights", "cctv", "weather",
"earthquakes", "uavs", "frontlines", "gdelt", "liveuamap",
"kiwisdr", "space_weather", "internet_outages", "firms_fires",
"datacenters"
}
assert expected_keys.issubset(set(latest_data.keys()))
def test_list_keys_default_to_empty_list(self):
list_keys = ["news", "flights", "ships", "military_flights",
"tracked_flights", "cctv", "earthquakes", "uavs",
"gdelt", "liveuamap", "kiwisdr", "internet_outages",
"firms_fires", "datacenters"]
for key in list_keys:
assert isinstance(latest_data[key], list), f"{key} should default to list"
def test_dict_keys_default_to_empty_dict(self):
dict_keys = ["stocks", "oil"]
for key in dict_keys:
assert isinstance(latest_data[key], dict), f"{key} should default to dict"
class TestMarkFresh:
"""Tests for _mark_fresh timestamp helper."""
def test_records_timestamp_for_single_key(self):
_mark_fresh("test_key_1")
assert "test_key_1" in source_timestamps
assert isinstance(source_timestamps["test_key_1"], str)
def test_records_timestamps_for_multiple_keys(self):
_mark_fresh("multi_a", "multi_b", "multi_c")
assert "multi_a" in source_timestamps
assert "multi_b" in source_timestamps
assert "multi_c" in source_timestamps
def test_timestamps_are_iso_format(self):
_mark_fresh("iso_test")
ts = source_timestamps["iso_test"]
# ISO format: YYYY-MM-DDTHH:MM:SS.ffffff
assert "T" in ts
assert len(ts) >= 19 # At least YYYY-MM-DDTHH:MM:SS
def test_successive_calls_update_timestamp(self):
_mark_fresh("update_test")
ts1 = source_timestamps["update_test"]
time.sleep(0.01)
_mark_fresh("update_test")
ts2 = source_timestamps["update_test"]
assert ts2 >= ts1
class TestDataLock:
"""Verify the data lock works for thread safety."""
def test_lock_exists_and_is_a_lock(self):
assert isinstance(_data_lock, type(threading.Lock()))
def test_concurrent_writes_dont_corrupt(self):
"""Simulate concurrent writes to latest_data through the lock."""
errors = []
def writer(key, value, iterations=100):
try:
for _ in range(iterations):
with _data_lock:
latest_data[key] = value
# Read back immediately — should be our value
assert latest_data[key] == value
except Exception as e:
errors.append(e)
threads = [
threading.Thread(target=writer, args=("test_concurrent", [1, 2, 3])),
threading.Thread(target=writer, args=("test_concurrent", [4, 5, 6])),
threading.Thread(target=writer, args=("test_concurrent", [7, 8, 9])),
]
for t in threads:
t.start()
for t in threads:
t.join()
assert len(errors) == 0, f"Thread safety errors: {errors}"
# Restore default
latest_data["test_concurrent"] = []
File diff suppressed because it is too large Load Diff
+37
View File
@@ -0,0 +1,37 @@
import os
import zipfile
zip_name = 'ShadowBroker_v0.1.zip'
if os.path.exists(zip_name):
try:
os.remove(zip_name)
except Exception as e:
print(f"Failed to delete old zip: {e}")
def add_dir(zipf, dir_path, excludes):
for root, dirs, files in os.walk(dir_path):
dirs[:] = [d for d in dirs if d not in excludes]
for f in files:
file_path = os.path.join(root, f)
zipf.write(file_path, arcname=file_path)
try:
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
print("Zipping backend...")
add_dir(zipf, 'backend', {'venv', '__pycache__'})
print("Zipping frontend...")
add_dir(zipf, 'frontend', {'node_modules', '.next'})
print("Zipping root files...")
zipf.write('docker-compose.yml')
zipf.write('start.bat')
zipf.write('start.sh')
zipf.write('README.md')
final_size = os.path.getsize(zip_name) / (1024 * 1024)
print(f"\n✅ SUCCESS! Created {zip_name}. Final size: {final_size:.2f} MB")
except Exception as e:
print(f"\n❌ ERROR creating zip: {e}")
-116
View File
@@ -1,116 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMPOSE_FILE="$SCRIPT_DIR/docker-compose.yml"
ENGINE="${SHADOWBROKER_CONTAINER_ENGINE:-auto}"
COMPOSE_ARGS=()
COMPOSE_PROVIDER=""
find_docker_compose() {
if command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1; then
COMPOSE_CMD=(docker compose)
COMPOSE_PROVIDER="docker compose"
return 0
fi
if command -v docker-compose >/dev/null 2>&1; then
COMPOSE_CMD=(docker-compose)
COMPOSE_PROVIDER="docker-compose"
return 0
fi
return 1
}
find_podman_compose() {
if command -v podman >/dev/null 2>&1 && podman compose version >/dev/null 2>&1; then
COMPOSE_CMD=(podman compose)
COMPOSE_PROVIDER="podman compose"
return 0
fi
if command -v podman-compose >/dev/null 2>&1; then
COMPOSE_CMD=(podman-compose)
COMPOSE_PROVIDER="podman-compose"
return 0
fi
return 1
}
if [ ! -f "$COMPOSE_FILE" ]; then
echo "[!] ERROR: Missing compose file: $COMPOSE_FILE"
exit 1
fi
while [ "$#" -gt 0 ]; do
case "$1" in
--engine)
if [ "$#" -lt 2 ]; then
echo "[!] ERROR: --engine requires a value: docker, podman, or auto."
exit 1
fi
ENGINE="$2"
shift 2
;;
--engine=*)
ENGINE="${1#*=}"
shift
;;
*)
COMPOSE_ARGS+=("$1")
shift
;;
esac
done
if [ "${#COMPOSE_ARGS[@]}" -eq 0 ]; then
COMPOSE_ARGS=(up -d)
fi
if [ "${#COMPOSE_ARGS[@]}" -gt 0 ]; then
last_index=$((${#COMPOSE_ARGS[@]} - 1))
if [ "${COMPOSE_ARGS[$last_index]}" = "." ]; then
echo "[*] Ignoring trailing '.' argument."
unset "COMPOSE_ARGS[$last_index]"
fi
fi
if [ "${#COMPOSE_ARGS[@]}" -eq 0 ]; then
COMPOSE_ARGS=(up -d)
fi
COMPOSE_CMD=()
case "$ENGINE" in
auto)
find_docker_compose || find_podman_compose
;;
docker)
find_docker_compose
;;
podman)
find_podman_compose
;;
*)
echo "[!] ERROR: Unsupported engine '$ENGINE'. Use docker, podman, or auto."
exit 1
;;
esac
if [ "${#COMPOSE_CMD[@]}" -eq 0 ]; then
echo "[!] ERROR: No supported compose command found for engine '$ENGINE'."
echo " Install one of: docker compose, docker-compose, podman compose, or podman-compose."
exit 1
fi
if [ "$ENGINE" = "podman" ] && [ "$COMPOSE_PROVIDER" = "podman compose" ]; then
echo "[*] Using (podman): ${COMPOSE_CMD[*]}"
echo "[*] Note: 'podman compose' is Podman's wrapper command and may delegate to docker-compose based on your local Podman configuration."
else
echo "[*] Using ($ENGINE): ${COMPOSE_CMD[*]}"
fi
"${COMPOSE_CMD[@]}" -f "$COMPOSE_FILE" "${COMPOSE_ARGS[@]}"
+9 -33
View File
@@ -1,31 +1,21 @@
version: '3.8'
services:
backend:
build:
build:
context: ./backend
container_name: shadowbroker-backend
ports:
- "8000:8000"
environment:
- AIS_API_KEY=${AIS_API_KEY}
- OPENSKY_CLIENT_ID=${OPENSKY_CLIENT_ID}
- OPENSKY_CLIENT_SECRET=${OPENSKY_CLIENT_SECRET}
- AISSTREAM_API_KEY=${AISSTREAM_API_KEY}
- N2YO_API_KEY=${N2YO_API_KEY}
- OPENSKY_USERNAME=${OPENSKY_USERNAME}
- OPENSKY_PASSWORD=${OPENSKY_PASSWORD}
- LTA_ACCOUNT_KEY=${LTA_ACCOUNT_KEY}
# Override allowed CORS origins (comma-separated). Auto-detects LAN IPs if empty.
- CORS_ORIGINS=${CORS_ORIGINS:-}
volumes:
- backend_data:/app/data
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/api/live-data/fast"]
interval: 30s
timeout: 10s
retries: 3
start_period: 90s
deploy:
resources:
limits:
memory: 2G
cpus: '2'
frontend:
build:
@@ -34,24 +24,10 @@ services:
ports:
- "3000:3000"
environment:
# Points the Next.js server-side proxy at the backend container via Docker networking.
# Change this if your backend runs on a different host or port.
- BACKEND_URL=http://backend:8000
- NEXT_PUBLIC_API_URL=http://localhost:8000
depends_on:
backend:
condition: service_healthy
- backend
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
deploy:
resources:
limits:
memory: 512M
cpus: '1'
volumes:
backend_data:
+7 -7
View File
@@ -1,13 +1,13 @@
Dockerfile
.dockerignore
node_modules
npm-debug.log
README.md
.next
.git
.env
.env.local
.env.*
eslint.config.mjs
node_modules
npm-debug.log*
build_logs*.txt
build_output.txt
build_error.txt
errors.txt
server_logs*.txt
postcss.config.mjs
tailwind.config.ts
+6 -10
View File
@@ -1,4 +1,4 @@
FROM node:20-alpine AS base
FROM node:18-alpine AS base
FROM base AS deps
RUN apk add --no-cache libc6-compat
@@ -10,17 +10,13 @@ FROM base AS builder
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY . .
ENV NEXT_TELEMETRY_DISABLED=1
# NEXT_PUBLIC_* vars must exist at build time for Next.js to inline them.
# Default empty = auto-detect from browser hostname at runtime.
ARG NEXT_PUBLIC_API_URL=""
ENV NEXT_PUBLIC_API_URL=$NEXT_PUBLIC_API_URL
ENV NEXT_TELEMETRY_DISABLED 1
RUN npm run build
FROM base AS runner
WORKDIR /app
ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
ENV NODE_ENV production
ENV NEXT_TELEMETRY_DISABLED 1
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
@@ -36,7 +32,7 @@ COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
USER nextjs
EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
ENV PORT 3000
ENV HOSTNAME "0.0.0.0"
CMD ["node", "server.js"]

Some files were not shown because too many files have changed in this diff Show More