mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-08 18:34:58 +02:00
90c2e90e2c
- Parallelized startup (60s → 15s) via ThreadPoolExecutor - Adaptive polling engine with ETag caching (no more bbox interrupts) - useCallback optimization for interpolation functions - Sliding LAYERS/INTEL edge panels replace bulky Record Panel - Modular fetcher architecture (flights, geo, infrastructure, financial, earth_observation) - Stable entity IDs for GDELT & News popups (PR #63, credit @csysp) - Admin auth (X-Admin-Key), rate limiting (slowapi), auto-updater - Docker Swarm secrets support, env_check.py validation - 85+ vitest tests, CI pipeline, geoJSON builder extraction - Server-side viewport bbox filtering reduces payloads 80%+ Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Former-commit-id: f2883150b5bc78ebc139d89cc966a76f7d7c0408
146 lines
6.1 KiB
Python
146 lines
6.1 KiB
Python
"""Data fetcher orchestrator — schedules and coordinates all data source modules.
|
||
|
||
Heavy logic has been extracted into services/fetchers/:
|
||
- _store.py — shared state (latest_data, locks, timestamps)
|
||
- plane_alert.py — aircraft enrichment DB
|
||
- flights.py — commercial flights, routes, trails, GPS jamming
|
||
- military.py — military flights, UAV detection
|
||
- satellites.py — satellite tracking (SGP4)
|
||
- news.py — RSS news fetching, clustering, risk assessment
|
||
- yacht_alert.py — superyacht alert enrichment
|
||
- financial.py — defense stocks, oil prices
|
||
- earth_observation.py — earthquakes, FIRMS fires, space weather, weather radar
|
||
- infrastructure.py — internet outages, data centers, CCTV, KiwiSDR
|
||
- geo.py — ships, airports, frontlines, GDELT, LiveUAMap
|
||
"""
|
||
import logging
|
||
import concurrent.futures
|
||
from datetime import datetime
|
||
from dotenv import load_dotenv
|
||
load_dotenv()
|
||
|
||
from apscheduler.schedulers.background import BackgroundScheduler
|
||
from services.cctv_pipeline import init_db
|
||
|
||
# Shared state — all fetcher modules read/write through this
|
||
from services.fetchers._store import (
|
||
latest_data, source_timestamps, _mark_fresh, _data_lock, # noqa: F401 — re-exported for main.py
|
||
)
|
||
|
||
# Domain-specific fetcher modules (already extracted)
|
||
from services.fetchers.flights import fetch_flights # noqa: F401
|
||
from services.fetchers.flights import _BLIND_SPOT_REGIONS # noqa: F401 — re-exported for tests
|
||
from services.fetchers.military import fetch_military_flights # noqa: F401
|
||
from services.fetchers.satellites import fetch_satellites # noqa: F401
|
||
from services.fetchers.news import fetch_news # noqa: F401
|
||
|
||
# Newly extracted fetcher modules
|
||
from services.fetchers.financial import fetch_defense_stocks, fetch_oil_prices # noqa: F401
|
||
from services.fetchers.earth_observation import ( # noqa: F401
|
||
fetch_earthquakes, fetch_firms_fires, fetch_space_weather, fetch_weather,
|
||
)
|
||
from services.fetchers.infrastructure import ( # noqa: F401
|
||
fetch_internet_outages, fetch_datacenters, fetch_cctv, fetch_kiwisdr,
|
||
)
|
||
from services.fetchers.geo import ( # noqa: F401
|
||
fetch_ships, fetch_airports, find_nearest_airport, cached_airports,
|
||
fetch_frontlines, fetch_gdelt, fetch_geopolitics, update_liveuamap,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Scheduler & Orchestration
|
||
# ---------------------------------------------------------------------------
|
||
def update_fast_data():
|
||
"""Fast-tier: moving entities that need frequent updates (every 60s)."""
|
||
logger.info("Fast-tier data update starting...")
|
||
fast_funcs = [
|
||
fetch_flights,
|
||
fetch_military_flights,
|
||
fetch_ships,
|
||
fetch_satellites,
|
||
]
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(fast_funcs)) as executor:
|
||
futures = [executor.submit(func) for func in fast_funcs]
|
||
concurrent.futures.wait(futures)
|
||
with _data_lock:
|
||
latest_data['last_updated'] = datetime.utcnow().isoformat()
|
||
logger.info("Fast-tier update complete.")
|
||
|
||
def update_slow_data():
|
||
"""Slow-tier: contextual + enrichment data that refreshes less often (every 5–10 min)."""
|
||
logger.info("Slow-tier data update starting...")
|
||
slow_funcs = [
|
||
fetch_news,
|
||
fetch_earthquakes,
|
||
fetch_firms_fires,
|
||
fetch_defense_stocks,
|
||
fetch_oil_prices,
|
||
fetch_weather,
|
||
fetch_space_weather,
|
||
fetch_internet_outages,
|
||
fetch_cctv,
|
||
fetch_kiwisdr,
|
||
fetch_frontlines,
|
||
fetch_gdelt,
|
||
fetch_datacenters,
|
||
]
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(slow_funcs)) as executor:
|
||
futures = [executor.submit(func) for func in slow_funcs]
|
||
concurrent.futures.wait(futures)
|
||
logger.info("Slow-tier update complete.")
|
||
|
||
def update_all_data():
|
||
"""Full refresh — all tiers run IN PARALLEL for fastest startup."""
|
||
logger.info("Full data update starting (parallel)...")
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as pool:
|
||
f0 = pool.submit(fetch_airports)
|
||
f1 = pool.submit(update_fast_data)
|
||
f2 = pool.submit(update_slow_data)
|
||
concurrent.futures.wait([f0, f1, f2])
|
||
logger.info("Full data update complete.")
|
||
|
||
_scheduler = None
|
||
|
||
def start_scheduler():
|
||
global _scheduler
|
||
init_db()
|
||
_scheduler = BackgroundScheduler(daemon=True)
|
||
|
||
# Fast tier — every 60 seconds
|
||
_scheduler.add_job(update_fast_data, 'interval', seconds=60, id='fast_tier', max_instances=1, misfire_grace_time=30)
|
||
|
||
# Slow tier — every 5 minutes
|
||
_scheduler.add_job(update_slow_data, 'interval', minutes=5, id='slow_tier', max_instances=1, misfire_grace_time=120)
|
||
|
||
# Very slow — every 15 minutes
|
||
_scheduler.add_job(fetch_gdelt, 'interval', minutes=15, id='gdelt', max_instances=1, misfire_grace_time=120)
|
||
_scheduler.add_job(update_liveuamap, 'interval', minutes=15, id='liveuamap', max_instances=1, misfire_grace_time=120)
|
||
|
||
# CCTV pipeline refresh — every 10 minutes
|
||
# Instantiate once and reuse — avoids re-creating DB connections on every tick
|
||
from services.cctv_pipeline import (
|
||
TFLJamCamIngestor, LTASingaporeIngestor,
|
||
AustinTXIngestor, NYCDOTIngestor,
|
||
)
|
||
_cctv_tfl = TFLJamCamIngestor()
|
||
_cctv_lta = LTASingaporeIngestor()
|
||
_cctv_atx = AustinTXIngestor()
|
||
_cctv_nyc = NYCDOTIngestor()
|
||
_scheduler.add_job(_cctv_tfl.ingest, 'interval', minutes=10, id='cctv_tfl', max_instances=1, misfire_grace_time=120)
|
||
_scheduler.add_job(_cctv_lta.ingest, 'interval', minutes=10, id='cctv_lta', max_instances=1, misfire_grace_time=120)
|
||
_scheduler.add_job(_cctv_atx.ingest, 'interval', minutes=10, id='cctv_atx', max_instances=1, misfire_grace_time=120)
|
||
_scheduler.add_job(_cctv_nyc.ingest, 'interval', minutes=10, id='cctv_nyc', max_instances=1, misfire_grace_time=120)
|
||
|
||
_scheduler.start()
|
||
logger.info("Scheduler started.")
|
||
|
||
def stop_scheduler():
|
||
if _scheduler:
|
||
_scheduler.shutdown(wait=False)
|
||
|
||
def get_latest_data():
|
||
with _data_lock:
|
||
return dict(latest_data)
|