mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-07 18:06:49 +02:00
239 lines
8.4 KiB
Python
239 lines
8.4 KiB
Python
"""Feed Ingester — background daemon that refreshes feed-backed pin layers.
|
|
|
|
Layers with a non-empty `feed_url` are polled at their `feed_interval`
|
|
(seconds, minimum 60). The feed is expected to return either:
|
|
|
|
1. GeoJSON FeatureCollection — features are converted to pins
|
|
2. JSON array of pin objects — used directly
|
|
|
|
Each refresh atomically replaces the layer's pins with the new data.
|
|
"""
|
|
|
|
import logging
|
|
import threading
|
|
import time
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# State
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_running = False
|
|
_thread: threading.Thread | None = None
|
|
_CHECK_INTERVAL = 30 # seconds between scanning for layers that need refresh
|
|
_last_fetched: dict[str, float] = {} # layer_id → last fetch timestamp
|
|
_FETCH_TIMEOUT = 20 # seconds
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GeoJSON → pin conversion
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _geojson_features_to_pins(features: list[dict]) -> list[dict[str, Any]]:
|
|
"""Convert GeoJSON Feature objects to pin dicts."""
|
|
pins: list[dict[str, Any]] = []
|
|
for feat in features:
|
|
if not isinstance(feat, dict):
|
|
continue
|
|
geom = feat.get("geometry") or {}
|
|
props = feat.get("properties") or {}
|
|
|
|
# Extract coordinates
|
|
coords = geom.get("coordinates")
|
|
if geom.get("type") != "Point" or not coords or len(coords) < 2:
|
|
continue
|
|
|
|
lng, lat = float(coords[0]), float(coords[1])
|
|
if not (-90 <= lat <= 90 and -180 <= lng <= 180):
|
|
continue
|
|
|
|
pin: dict[str, Any] = {
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"label": str(props.get("label", props.get("name", props.get("title", ""))))[:200],
|
|
"category": str(props.get("category", "custom"))[:50],
|
|
"color": str(props.get("color", ""))[:20],
|
|
"description": str(props.get("description", props.get("summary", "")))[:2000],
|
|
"source": "feed",
|
|
"source_url": str(props.get("source_url", props.get("url", props.get("link", ""))))[:500],
|
|
"confidence": float(props.get("confidence", 1.0)),
|
|
}
|
|
|
|
# Entity attachment if present
|
|
entity_type = props.get("entity_type", "")
|
|
entity_id = props.get("entity_id", "")
|
|
if entity_type and entity_id:
|
|
pin["entity_attachment"] = {
|
|
"entity_type": str(entity_type),
|
|
"entity_id": str(entity_id),
|
|
"entity_label": str(props.get("entity_label", "")),
|
|
}
|
|
|
|
pins.append(pin)
|
|
return pins
|
|
|
|
|
|
def _parse_feed_response(data: Any) -> list[dict[str, Any]]:
|
|
"""Parse a feed response into a list of pin dicts."""
|
|
if isinstance(data, dict):
|
|
# GeoJSON FeatureCollection
|
|
if data.get("type") == "FeatureCollection" and isinstance(data.get("features"), list):
|
|
return _geojson_features_to_pins(data["features"])
|
|
# Single Feature
|
|
if data.get("type") == "Feature":
|
|
return _geojson_features_to_pins([data])
|
|
# Wrapped response like {"ok": true, "data": [...]}
|
|
inner = data.get("data") or data.get("results") or data.get("pins") or data.get("items")
|
|
if isinstance(inner, list):
|
|
return _normalize_pin_list(inner)
|
|
|
|
if isinstance(data, list):
|
|
# Check if first item looks like a GeoJSON Feature
|
|
if data and isinstance(data[0], dict) and data[0].get("type") == "Feature":
|
|
return _geojson_features_to_pins(data)
|
|
return _normalize_pin_list(data)
|
|
|
|
return []
|
|
|
|
|
|
def _normalize_pin_list(items: list) -> list[dict[str, Any]]:
|
|
"""Normalize a list of raw pin objects, ensuring lat/lng are present."""
|
|
pins: list[dict[str, Any]] = []
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
lat = item.get("lat") or item.get("latitude")
|
|
lng = item.get("lng") or item.get("lon") or item.get("longitude")
|
|
if lat is None or lng is None:
|
|
continue
|
|
try:
|
|
lat, lng = float(lat), float(lng)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if not (-90 <= lat <= 90 and -180 <= lng <= 180):
|
|
continue
|
|
|
|
pin: dict[str, Any] = {
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"label": str(item.get("label", item.get("name", item.get("title", ""))))[:200],
|
|
"category": str(item.get("category", "custom"))[:50],
|
|
"color": str(item.get("color", ""))[:20],
|
|
"description": str(item.get("description", item.get("summary", "")))[:2000],
|
|
"source": "feed",
|
|
"source_url": str(item.get("source_url", item.get("url", item.get("link", ""))))[:500],
|
|
"confidence": float(item.get("confidence", 1.0)),
|
|
}
|
|
|
|
entity_type = item.get("entity_type", "")
|
|
entity_id = item.get("entity_id", "")
|
|
if entity_type and entity_id:
|
|
pin["entity_attachment"] = {
|
|
"entity_type": str(entity_type),
|
|
"entity_id": str(entity_id),
|
|
"entity_label": str(item.get("entity_label", "")),
|
|
}
|
|
|
|
pins.append(pin)
|
|
return pins
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fetch a single layer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
|
"""Fetch a feed URL and replace the layer's pins."""
|
|
layer_id = layer["id"]
|
|
feed_url = layer["feed_url"]
|
|
layer_name = layer.get("name", layer_id)
|
|
|
|
try:
|
|
resp = requests.get(
|
|
feed_url,
|
|
timeout=_FETCH_TIMEOUT,
|
|
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except requests.RequestException as e:
|
|
logger.warning("Feed fetch failed for layer '%s' (%s): %s", layer_name, feed_url, e)
|
|
return
|
|
except (ValueError, TypeError) as e:
|
|
logger.warning("Feed parse failed for layer '%s' (%s): %s", layer_name, feed_url, e)
|
|
return
|
|
|
|
pins = _parse_feed_response(data)
|
|
|
|
from services.ai_pin_store import replace_layer_pins, update_layer
|
|
count = replace_layer_pins(layer_id, pins)
|
|
|
|
# Update layer metadata with last_fetched timestamp
|
|
update_layer(layer_id, feed_last_fetched=time.time())
|
|
|
|
_last_fetched[layer_id] = time.time()
|
|
logger.info("Feed refresh for layer '%s': %d pins from %s", layer_name, count, feed_url)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main loop
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _ingest_loop() -> None:
|
|
"""Daemon loop: scan for feed layers and refresh those that are due."""
|
|
while _running:
|
|
try:
|
|
from services.ai_pin_store import get_feed_layers
|
|
|
|
layers = get_feed_layers()
|
|
now = time.time()
|
|
|
|
for layer in layers:
|
|
layer_id = layer["id"]
|
|
interval = max(60, layer.get("feed_interval", 300))
|
|
last = _last_fetched.get(layer_id, 0)
|
|
|
|
if now - last >= interval:
|
|
try:
|
|
_fetch_layer_feed(layer)
|
|
except Exception as e:
|
|
logger.warning("Feed ingestion error for layer %s: %s",
|
|
layer.get("name", layer_id), e)
|
|
|
|
except Exception as e:
|
|
logger.error("Feed ingester loop error: %s", e)
|
|
|
|
# Sleep in short increments so we can stop cleanly
|
|
for _ in range(int(_CHECK_INTERVAL)):
|
|
if not _running:
|
|
break
|
|
time.sleep(1)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Start / stop
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def start_feed_ingester() -> None:
|
|
"""Start the feed ingester daemon thread."""
|
|
global _running, _thread
|
|
if _thread and _thread.is_alive():
|
|
return
|
|
_running = True
|
|
_thread = threading.Thread(target=_ingest_loop, daemon=True, name="feed-ingester")
|
|
_thread.start()
|
|
logger.info("Feed ingester daemon started (check interval=%ds)", _CHECK_INTERVAL)
|
|
|
|
|
|
def stop_feed_ingester() -> None:
|
|
"""Stop the feed ingester daemon."""
|
|
global _running
|
|
_running = False
|