mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-31 19:41:37 +02:00
fc9eff865e
New features: - In-app auto-updater with confirmation dialog, manual download fallback, restart polling, and protected file safety net - Ship layers split into 4 independent toggles (Military/Carriers, Cargo/Tankers, Civilian, Cruise/Passenger) with per-category counts - Stable entity IDs using MMSI/callsign instead of volatile array indices - Dismissible threat alert bubbles (session-scoped, survives data refresh) Performance: - GDELT title fetching is now non-blocking (background enrichment) - Removed duplicate startup fetch jobs - Docker healthcheck start_period 15s → 90s Bug fixes: - Removed fake intelligence assessment generator (OSINT-only policy) - Fixed carrier tracker GDELT 429/TypeError crash - Fixed ETag collision (full payload hash) - Added concurrent /api/refresh guard Contributors: @imqdcr (ship split + stable IDs), @csysp (dismissible alerts, PR #48) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Former-commit-id: a2c4c67da54345393f70a9b33b52e7e4fd6c049f
599 lines
23 KiB
Python
599 lines
23 KiB
Python
"""Data fetcher orchestrator — schedules and coordinates all data source modules.
|
|
|
|
Heavy logic has been extracted into services/fetchers/:
|
|
- _store.py — shared state (latest_data, locks, timestamps)
|
|
- plane_alert.py — aircraft enrichment DB
|
|
- flights.py — commercial flights, routes, trails, GPS jamming
|
|
- military.py — military flights, UAV detection
|
|
- satellites.py — satellite tracking (SGP4)
|
|
- news.py — RSS news fetching, clustering, risk assessment
|
|
"""
|
|
import yfinance as yf
|
|
import csv
|
|
import io
|
|
import json
|
|
import time
|
|
import math
|
|
import logging
|
|
import heapq
|
|
import concurrent.futures
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from cachetools import TTLCache
|
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
from services.network_utils import fetch_with_curl
|
|
from services.cctv_pipeline import (
|
|
init_db, TFLJamCamIngestor, LTASingaporeIngestor,
|
|
AustinTXIngestor, NYCDOTIngestor, get_all_cameras,
|
|
)
|
|
|
|
# Shared state — all fetcher modules read/write through this
|
|
from services.fetchers._store import (
|
|
latest_data, source_timestamps, _mark_fresh, _data_lock, # noqa: F401 — source_timestamps re-exported for main.py
|
|
)
|
|
|
|
# Domain-specific fetcher modules
|
|
from services.fetchers.flights import fetch_flights
|
|
from services.fetchers.military import fetch_military_flights
|
|
from services.fetchers.satellites import fetch_satellites
|
|
from services.fetchers.news import fetch_news
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Financial data
|
|
# ---------------------------------------------------------------------------
|
|
def _fetch_single_ticker(symbol: str, period: str = "2d"):
|
|
"""Fetch a single yfinance ticker. Returns (symbol, data_dict) or (symbol, None)."""
|
|
try:
|
|
ticker = yf.Ticker(symbol)
|
|
hist = ticker.history(period=period)
|
|
if len(hist) >= 1:
|
|
current_price = hist['Close'].iloc[-1]
|
|
prev_close = hist['Close'].iloc[0] if len(hist) > 1 else current_price
|
|
change_percent = ((current_price - prev_close) / prev_close) * 100 if prev_close else 0
|
|
return symbol, {
|
|
"price": round(float(current_price), 2),
|
|
"change_percent": round(float(change_percent), 2),
|
|
"up": bool(change_percent >= 0)
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Could not fetch data for {symbol}: {e}")
|
|
return symbol, None
|
|
|
|
def fetch_defense_stocks():
|
|
tickers = ["RTX", "LMT", "NOC", "GD", "BA", "PLTR"]
|
|
try:
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool:
|
|
results = pool.map(lambda t: _fetch_single_ticker(t, "2d"), tickers)
|
|
stocks_data = {sym: data for sym, data in results if data}
|
|
with _data_lock:
|
|
latest_data['stocks'] = stocks_data
|
|
_mark_fresh("stocks")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching stocks: {e}")
|
|
|
|
def fetch_oil_prices():
|
|
tickers = {"WTI Crude": "CL=F", "Brent Crude": "BZ=F"}
|
|
try:
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
|
|
results = pool.map(lambda item: (_fetch_single_ticker(item[1], "5d")[1], item[0]), tickers.items())
|
|
oil_data = {name: data for data, name in results if data}
|
|
with _data_lock:
|
|
latest_data['oil'] = oil_data
|
|
_mark_fresh("oil")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching oil: {e}")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Weather
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_weather():
|
|
try:
|
|
url = "https://api.rainviewer.com/public/weather-maps.json"
|
|
response = fetch_with_curl(url, timeout=10)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
if "radar" in data and "past" in data["radar"]:
|
|
latest_time = data["radar"]["past"][-1]["time"]
|
|
with _data_lock:
|
|
latest_data["weather"] = {"time": latest_time, "host": data.get("host", "https://tilecache.rainviewer.com")}
|
|
_mark_fresh("weather")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching weather: {e}")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CCTV
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_cctv():
|
|
try:
|
|
cameras = get_all_cameras()
|
|
with _data_lock:
|
|
latest_data["cctv"] = cameras
|
|
_mark_fresh("cctv")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching cctv from DB: {e}")
|
|
with _data_lock:
|
|
latest_data["cctv"] = []
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# KiwiSDR
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_kiwisdr():
|
|
try:
|
|
from services.kiwisdr_fetcher import fetch_kiwisdr_nodes
|
|
nodes = fetch_kiwisdr_nodes()
|
|
with _data_lock:
|
|
latest_data["kiwisdr"] = nodes
|
|
_mark_fresh("kiwisdr")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching KiwiSDR nodes: {e}")
|
|
with _data_lock:
|
|
latest_data["kiwisdr"] = []
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NASA FIRMS Fires
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_firms_fires():
|
|
"""Fetch global fire/thermal anomalies from NASA FIRMS (NOAA-20 VIIRS, 24h, no key needed)."""
|
|
fires = []
|
|
try:
|
|
url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_24h.csv"
|
|
response = fetch_with_curl(url, timeout=30)
|
|
if response.status_code == 200:
|
|
reader = csv.DictReader(io.StringIO(response.text))
|
|
all_rows = []
|
|
for row in reader:
|
|
try:
|
|
lat = float(row.get("latitude", 0))
|
|
lng = float(row.get("longitude", 0))
|
|
frp = float(row.get("frp", 0))
|
|
conf = row.get("confidence", "nominal")
|
|
daynight = row.get("daynight", "")
|
|
bright = float(row.get("bright_ti4", 0))
|
|
all_rows.append({
|
|
"lat": lat, "lng": lng, "frp": frp,
|
|
"brightness": bright, "confidence": conf,
|
|
"daynight": daynight,
|
|
"acq_date": row.get("acq_date", ""),
|
|
"acq_time": row.get("acq_time", ""),
|
|
})
|
|
except (ValueError, TypeError):
|
|
continue
|
|
fires = heapq.nlargest(5000, all_rows, key=lambda x: x["frp"])
|
|
logger.info(f"FIRMS fires: {len(fires)} hotspots (from {response.status_code})")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching FIRMS fires: {e}")
|
|
with _data_lock:
|
|
latest_data["firms_fires"] = fires
|
|
if fires:
|
|
_mark_fresh("firms_fires")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Space Weather
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_space_weather():
|
|
"""Fetch NOAA SWPC Kp index and recent solar events."""
|
|
try:
|
|
kp_resp = fetch_with_curl("https://services.swpc.noaa.gov/json/planetary_k_index_1m.json", timeout=10)
|
|
kp_value = None
|
|
kp_text = "QUIET"
|
|
if kp_resp.status_code == 200:
|
|
kp_data = kp_resp.json()
|
|
if kp_data:
|
|
latest_kp = kp_data[-1]
|
|
kp_value = float(latest_kp.get("kp_index", 0))
|
|
if kp_value >= 7:
|
|
kp_text = f"STORM G{min(int(kp_value) - 4, 5)}"
|
|
elif kp_value >= 5:
|
|
kp_text = f"STORM G{min(int(kp_value) - 4, 5)}"
|
|
elif kp_value >= 4:
|
|
kp_text = "ACTIVE"
|
|
elif kp_value >= 3:
|
|
kp_text = "UNSETTLED"
|
|
|
|
events = []
|
|
ev_resp = fetch_with_curl("https://services.swpc.noaa.gov/json/edited_events.json", timeout=10)
|
|
if ev_resp.status_code == 200:
|
|
all_events = ev_resp.json()
|
|
for ev in all_events[-10:]:
|
|
events.append({
|
|
"type": ev.get("type", ""),
|
|
"begin": ev.get("begin", ""),
|
|
"end": ev.get("end", ""),
|
|
"classtype": ev.get("classtype", ""),
|
|
})
|
|
|
|
with _data_lock:
|
|
latest_data["space_weather"] = {
|
|
"kp_index": kp_value,
|
|
"kp_text": kp_text,
|
|
"events": events,
|
|
}
|
|
_mark_fresh("space_weather")
|
|
logger.info(f"Space weather: Kp={kp_value} ({kp_text}), {len(events)} events")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching space weather: {e}")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internet Outages (IODA)
|
|
# ---------------------------------------------------------------------------
|
|
_region_geocode_cache: TTLCache = TTLCache(maxsize=2000, ttl=86400)
|
|
|
|
def _geocode_region(region_name: str, country_name: str) -> tuple:
|
|
"""Geocode a region using OpenStreetMap Nominatim (cached, respects rate limit)."""
|
|
cache_key = f"{region_name}|{country_name}"
|
|
if cache_key in _region_geocode_cache:
|
|
return _region_geocode_cache[cache_key]
|
|
try:
|
|
import urllib.parse
|
|
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
|
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
|
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
if results:
|
|
lat = float(results[0]["lat"])
|
|
lon = float(results[0]["lon"])
|
|
_region_geocode_cache[cache_key] = (lat, lon)
|
|
return (lat, lon)
|
|
except Exception:
|
|
pass
|
|
_region_geocode_cache[cache_key] = None
|
|
return None
|
|
|
|
def fetch_internet_outages():
|
|
"""Fetch regional internet outage alerts from IODA (Georgia Tech)."""
|
|
RELIABLE_DATASOURCES = {"bgp", "ping-slash24"}
|
|
outages = []
|
|
try:
|
|
now = int(time.time())
|
|
start = now - 86400
|
|
url = f"https://api.ioda.inetintel.cc.gatech.edu/v2/outages/alerts?from={start}&until={now}&limit=500"
|
|
response = fetch_with_curl(url, timeout=15)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
alerts = data.get("data", [])
|
|
region_outages = {}
|
|
for alert in alerts:
|
|
entity = alert.get("entity", {})
|
|
etype = entity.get("type", "")
|
|
level = alert.get("level", "")
|
|
if level == "normal" or etype != "region":
|
|
continue
|
|
datasource = alert.get("datasource", "")
|
|
if datasource not in RELIABLE_DATASOURCES:
|
|
continue
|
|
code = entity.get("code", "")
|
|
name = entity.get("name", "")
|
|
attrs = entity.get("attrs", {})
|
|
country_code = attrs.get("country_code", "")
|
|
country_name = attrs.get("country_name", "")
|
|
value = alert.get("value", 0)
|
|
history_value = alert.get("historyValue", 0)
|
|
severity = 0
|
|
if history_value and history_value > 0:
|
|
severity = round((1 - value / history_value) * 100)
|
|
severity = max(0, min(severity, 100))
|
|
if severity < 10:
|
|
continue
|
|
if code not in region_outages or severity > region_outages[code]["severity"]:
|
|
region_outages[code] = {
|
|
"region_code": code,
|
|
"region_name": name,
|
|
"country_code": country_code,
|
|
"country_name": country_name,
|
|
"level": level,
|
|
"datasource": datasource,
|
|
"severity": severity,
|
|
}
|
|
geocoded = []
|
|
for rcode, r in region_outages.items():
|
|
coords = _geocode_region(r["region_name"], r["country_name"])
|
|
if coords:
|
|
r["lat"] = coords[0]
|
|
r["lng"] = coords[1]
|
|
geocoded.append(r)
|
|
outages = heapq.nlargest(100, geocoded, key=lambda x: x["severity"])
|
|
logger.info(f"Internet outages: {len(outages)} regions affected")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching internet outages: {e}")
|
|
with _data_lock:
|
|
latest_data["internet_outages"] = outages
|
|
if outages:
|
|
_mark_fresh("internet_outages")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data Centers
|
|
# ---------------------------------------------------------------------------
|
|
_DC_GEOCODED_PATH = Path(__file__).parent.parent / "data" / "datacenters_geocoded.json"
|
|
|
|
def fetch_datacenters():
|
|
"""Load geocoded data centers (5K+ street-level precise locations)."""
|
|
dcs = []
|
|
try:
|
|
if not _DC_GEOCODED_PATH.exists():
|
|
logger.warning(f"Geocoded DC file not found: {_DC_GEOCODED_PATH}")
|
|
return
|
|
raw = json.loads(_DC_GEOCODED_PATH.read_text(encoding="utf-8"))
|
|
for entry in raw:
|
|
lat = entry.get("lat")
|
|
lng = entry.get("lng")
|
|
if lat is None or lng is None:
|
|
continue
|
|
if not (-90 <= lat <= 90 and -180 <= lng <= 180):
|
|
continue
|
|
dcs.append({
|
|
"name": entry.get("name", "Unknown"),
|
|
"company": entry.get("company", ""),
|
|
"street": entry.get("street", ""),
|
|
"city": entry.get("city", ""),
|
|
"country": entry.get("country", ""),
|
|
"zip": entry.get("zip", ""),
|
|
"lat": lat, "lng": lng,
|
|
})
|
|
logger.info(f"Data centers: {len(dcs)} geocoded locations loaded")
|
|
except Exception as e:
|
|
logger.error(f"Error loading data centers: {e}")
|
|
with _data_lock:
|
|
latest_data["datacenters"] = dcs
|
|
if dcs:
|
|
_mark_fresh("datacenters")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Earthquakes
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_earthquakes():
|
|
quakes = []
|
|
try:
|
|
url = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"
|
|
response = fetch_with_curl(url, timeout=10)
|
|
if response.status_code == 200:
|
|
features = response.json().get("features", [])
|
|
for f in features[:50]:
|
|
mag = f["properties"]["mag"]
|
|
lng, lat, depth = f["geometry"]["coordinates"]
|
|
quakes.append({
|
|
"id": f["id"], "mag": mag,
|
|
"lat": lat, "lng": lng,
|
|
"place": f["properties"]["place"]
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error fetching earthquakes: {e}")
|
|
with _data_lock:
|
|
latest_data["earthquakes"] = quakes
|
|
if quakes:
|
|
_mark_fresh("earthquakes")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ships (AIS + Carriers)
|
|
# ---------------------------------------------------------------------------
|
|
def fetch_ships():
|
|
"""Fetch real-time AIS vessel data and combine with OSINT carrier positions."""
|
|
from services.ais_stream import get_ais_vessels
|
|
from services.carrier_tracker import get_carrier_positions
|
|
|
|
ships = []
|
|
try:
|
|
carriers = get_carrier_positions()
|
|
ships.extend(carriers)
|
|
except Exception as e:
|
|
logger.error(f"Carrier tracker error (non-fatal): {e}")
|
|
carriers = []
|
|
|
|
try:
|
|
ais_vessels = get_ais_vessels()
|
|
ships.extend(ais_vessels)
|
|
except Exception as e:
|
|
logger.error(f"AIS stream error (non-fatal): {e}")
|
|
ais_vessels = []
|
|
|
|
logger.info(f"Ships: {len(carriers)} carriers + {len(ais_vessels)} AIS vessels")
|
|
with _data_lock:
|
|
latest_data['ships'] = ships
|
|
_mark_fresh("ships")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Airports
|
|
# ---------------------------------------------------------------------------
|
|
cached_airports = []
|
|
|
|
def find_nearest_airport(lat, lng, max_distance_nm=200):
|
|
"""Find the nearest large airport to a given lat/lng using haversine distance."""
|
|
if not cached_airports:
|
|
return None
|
|
|
|
best = None
|
|
best_dist = float('inf')
|
|
lat_r = math.radians(lat)
|
|
lng_r = math.radians(lng)
|
|
|
|
for apt in cached_airports:
|
|
apt_lat_r = math.radians(apt['lat'])
|
|
apt_lng_r = math.radians(apt['lng'])
|
|
dlat = apt_lat_r - lat_r
|
|
dlng = apt_lng_r - lng_r
|
|
a = math.sin(dlat / 2) ** 2 + math.cos(lat_r) * math.cos(apt_lat_r) * math.sin(dlng / 2) ** 2
|
|
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
|
dist_nm = 3440.065 * c
|
|
|
|
if dist_nm < best_dist:
|
|
best_dist = dist_nm
|
|
best = apt
|
|
|
|
if best and best_dist <= max_distance_nm:
|
|
return {
|
|
"iata": best['iata'], "name": best['name'],
|
|
"lat": best['lat'], "lng": best['lng'],
|
|
"distance_nm": round(best_dist, 1)
|
|
}
|
|
return None
|
|
|
|
def fetch_airports():
|
|
global cached_airports
|
|
if not cached_airports:
|
|
logger.info("Downloading global airports database from ourairports.com...")
|
|
try:
|
|
url = "https://ourairports.com/data/airports.csv"
|
|
response = fetch_with_curl(url, timeout=15)
|
|
if response.status_code == 200:
|
|
f = io.StringIO(response.text)
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
if row['type'] == 'large_airport' and row['iata_code']:
|
|
cached_airports.append({
|
|
"id": row['ident'],
|
|
"name": row['name'],
|
|
"iata": row['iata_code'],
|
|
"lat": float(row['latitude_deg']),
|
|
"lng": float(row['longitude_deg']),
|
|
"type": "airport"
|
|
})
|
|
logger.info(f"Loaded {len(cached_airports)} large airports into cache.")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching airports: {e}")
|
|
|
|
with _data_lock:
|
|
latest_data['airports'] = cached_airports
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Geopolitics & Liveuamap
|
|
# ---------------------------------------------------------------------------
|
|
from services.geopolitics import fetch_ukraine_frontlines, fetch_global_military_incidents
|
|
|
|
def fetch_frontlines():
|
|
"""Fetch Ukraine frontline data (fast — single GitHub API call)."""
|
|
try:
|
|
frontlines = fetch_ukraine_frontlines()
|
|
if frontlines:
|
|
with _data_lock:
|
|
latest_data['frontlines'] = frontlines
|
|
_mark_fresh("frontlines")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching frontlines: {e}")
|
|
|
|
|
|
def fetch_gdelt():
|
|
"""Fetch GDELT global military incidents (slow — downloads 32 ZIP files)."""
|
|
try:
|
|
gdelt = fetch_global_military_incidents()
|
|
if gdelt is not None:
|
|
with _data_lock:
|
|
latest_data['gdelt'] = gdelt
|
|
_mark_fresh("gdelt")
|
|
except Exception as e:
|
|
logger.error(f"Error fetching GDELT: {e}")
|
|
|
|
|
|
def fetch_geopolitics():
|
|
"""Legacy wrapper — runs both sequentially. Used by recurring scheduler."""
|
|
fetch_frontlines()
|
|
fetch_gdelt()
|
|
|
|
def update_liveuamap():
|
|
logger.info("Running scheduled Liveuamap scraper...")
|
|
try:
|
|
from services.liveuamap_scraper import fetch_liveuamap
|
|
res = fetch_liveuamap()
|
|
if res:
|
|
with _data_lock:
|
|
latest_data['liveuamap'] = res
|
|
_mark_fresh("liveuamap")
|
|
except Exception as e:
|
|
logger.error(f"Liveuamap scraper error: {e}")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scheduler & Orchestration
|
|
# ---------------------------------------------------------------------------
|
|
def update_fast_data():
|
|
"""Fast-tier: moving entities that need frequent updates (every 60s)."""
|
|
logger.info("Fast-tier data update starting...")
|
|
fast_funcs = [
|
|
fetch_flights,
|
|
fetch_military_flights,
|
|
fetch_ships,
|
|
fetch_satellites,
|
|
]
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(fast_funcs)) as executor:
|
|
futures = [executor.submit(func) for func in fast_funcs]
|
|
concurrent.futures.wait(futures)
|
|
with _data_lock:
|
|
latest_data['last_updated'] = datetime.utcnow().isoformat()
|
|
logger.info("Fast-tier update complete.")
|
|
|
|
def update_slow_data():
|
|
"""Slow-tier: feeds that change infrequently (every 30min).
|
|
Each fetcher writes to latest_data independently as it finishes,
|
|
so the frontend sees results progressively — no all-or-nothing barrier."""
|
|
logger.info("Slow-tier data update starting...")
|
|
slow_funcs = [
|
|
fetch_news,
|
|
fetch_defense_stocks,
|
|
fetch_oil_prices,
|
|
fetch_weather,
|
|
fetch_cctv,
|
|
fetch_earthquakes,
|
|
fetch_frontlines, # fast — single GitHub API call
|
|
fetch_gdelt, # slow — 32 ZIP downloads (runs in parallel, won't block frontlines)
|
|
fetch_kiwisdr,
|
|
fetch_space_weather,
|
|
fetch_internet_outages,
|
|
fetch_firms_fires,
|
|
fetch_datacenters,
|
|
]
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(slow_funcs)) as executor:
|
|
futures = [executor.submit(func) for func in slow_funcs]
|
|
concurrent.futures.wait(futures)
|
|
logger.info("Slow-tier update complete.")
|
|
|
|
def update_all_data():
|
|
"""Full update — runs on startup. All tiers run IN PARALLEL for fastest startup."""
|
|
logger.info("Full data update starting (parallel)...")
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as pool:
|
|
f0 = pool.submit(fetch_airports)
|
|
f1 = pool.submit(update_fast_data)
|
|
f2 = pool.submit(update_slow_data)
|
|
concurrent.futures.wait([f0, f1, f2])
|
|
logger.info("Full data update complete.")
|
|
|
|
scheduler = BackgroundScheduler()
|
|
|
|
def start_scheduler():
|
|
init_db()
|
|
|
|
# NOTE: initial update_all_data() is called synchronously in main.py lifespan
|
|
# before start_scheduler(). These are only the RECURRING interval jobs.
|
|
scheduler.add_job(update_fast_data, 'interval', seconds=60)
|
|
scheduler.add_job(update_slow_data, 'interval', minutes=30)
|
|
|
|
def update_cctvs():
|
|
logger.info("Running CCTV Pipeline Ingestion...")
|
|
ingestors = [
|
|
TFLJamCamIngestor,
|
|
LTASingaporeIngestor,
|
|
AustinTXIngestor,
|
|
NYCDOTIngestor
|
|
]
|
|
for ingestor in ingestors:
|
|
try:
|
|
ingestor().ingest()
|
|
except Exception as e:
|
|
logger.error(f"Failed {ingestor.__name__} cctv ingest: {e}")
|
|
fetch_cctv()
|
|
|
|
scheduler.add_job(update_cctvs, 'interval', minutes=1)
|
|
|
|
scheduler.add_job(update_liveuamap, 'interval', hours=12)
|
|
|
|
scheduler.start()
|
|
|
|
def stop_scheduler():
|
|
scheduler.shutdown()
|
|
|
|
def get_latest_data():
|
|
with _data_lock:
|
|
return dict(latest_data)
|