mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-08 02:16:41 +02:00
2255 lines
81 KiB
Python
2255 lines
81 KiB
Python
"""Telemetry facade — provides cached telemetry snapshots for the command channel.
|
|
|
|
Wraps services.fetchers._store so the openclaw_channel and watchdog can access
|
|
all dashboard data (fast + slow tiers) through a single import.
|
|
|
|
The data returned includes ALL enrichment — plane_alert tags, tracked names,
|
|
alert_category, alert_operator, etc. — because _store holds the post-enrichment
|
|
data that the fetchers have already processed.
|
|
"""
|
|
|
|
import math
|
|
import re
|
|
import threading
|
|
from difflib import get_close_matches
|
|
from typing import Any
|
|
|
|
from services.fetchers._store import (
|
|
get_data_version,
|
|
get_layer_versions,
|
|
get_latest_data_subset,
|
|
get_latest_data_subset_refs,
|
|
latest_data,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fast-tier: flights, ships, sigint, satellites, CCTV, etc.
|
|
# Same keys as /api/live-data/fast
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_FAST_KEYS = (
|
|
"last_updated",
|
|
"commercial_flights",
|
|
"military_flights",
|
|
"private_flights",
|
|
"private_jets",
|
|
"tracked_flights",
|
|
"ships",
|
|
"cctv",
|
|
"uavs",
|
|
"liveuamap",
|
|
"gps_jamming",
|
|
"satellites",
|
|
"satellite_source",
|
|
"satellite_analysis",
|
|
"sigint",
|
|
"sigint_totals",
|
|
"trains",
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Slow-tier: news, prediction markets, GDELT, earthquakes, weather, etc.
|
|
# Same keys as /api/live-data/slow
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SLOW_KEYS = (
|
|
"last_updated",
|
|
"news",
|
|
"stocks",
|
|
"financial_source",
|
|
"oil",
|
|
"weather",
|
|
"traffic",
|
|
"earthquakes",
|
|
"frontlines",
|
|
"gdelt",
|
|
"airports",
|
|
"kiwisdr",
|
|
"satnogs_stations",
|
|
"satnogs_observations",
|
|
"tinygs_satellites",
|
|
"space_weather",
|
|
"internet_outages",
|
|
"firms_fires",
|
|
"datacenters",
|
|
"military_bases",
|
|
"power_plants",
|
|
"viirs_change_nodes",
|
|
"scanners",
|
|
"weather_alerts",
|
|
"ukraine_alerts",
|
|
"air_quality",
|
|
"volcanoes",
|
|
"fishing_activity",
|
|
"psk_reporter",
|
|
"crowdthreat",
|
|
"correlations",
|
|
"prediction_markets",
|
|
"threat_level",
|
|
"trending_markets",
|
|
"uap_sightings",
|
|
"wastewater",
|
|
"sar_scenes",
|
|
"sar_anomalies",
|
|
"sar_aoi_coverage",
|
|
)
|
|
|
|
|
|
def get_cached_telemetry() -> dict[str, Any]:
|
|
"""Return a deep-copy snapshot of fast-tier telemetry data.
|
|
|
|
Includes enriched fields: alert_category, alert_operator, alert_color,
|
|
alert_socials, etc. — all the 'Tracked Aircraft — People' data is here
|
|
in the tracked_flights list.
|
|
"""
|
|
return get_latest_data_subset(*_FAST_KEYS)
|
|
|
|
|
|
def get_cached_slow_telemetry() -> dict[str, Any]:
|
|
"""Return a deep-copy snapshot of slow-tier telemetry data.
|
|
|
|
Includes news, GDELT, prediction markets, earthquakes, weather, etc.
|
|
"""
|
|
return get_latest_data_subset(*_SLOW_KEYS)
|
|
|
|
|
|
def get_cached_telemetry_refs() -> dict[str, Any]:
|
|
"""Return zero-copy refs to fast-tier telemetry (read-only callers only).
|
|
|
|
Callers MUST NOT mutate the returned data. Safe because writers replace
|
|
top-level values atomically under the data lock.
|
|
"""
|
|
return get_latest_data_subset_refs(*_FAST_KEYS)
|
|
|
|
|
|
def get_cached_slow_telemetry_refs() -> dict[str, Any]:
|
|
"""Return zero-copy refs to slow-tier telemetry (read-only callers only)."""
|
|
return get_latest_data_subset_refs(*_SLOW_KEYS)
|
|
|
|
|
|
_FLIGHT_LAYER_ALIASES = {
|
|
"commercial": "commercial_flights",
|
|
"commercial_flights": "commercial_flights",
|
|
"private": "private_flights",
|
|
"private_flights": "private_flights",
|
|
"jets": "private_jets",
|
|
"private_jets": "private_jets",
|
|
"military": "military_flights",
|
|
"military_flights": "military_flights",
|
|
"tracked": "tracked_flights",
|
|
"tracked_flights": "tracked_flights",
|
|
"flights": "flights",
|
|
}
|
|
|
|
_ENTITY_LAYER_ALIASES = {
|
|
**_FLIGHT_LAYER_ALIASES,
|
|
"ships": "ships",
|
|
"fishing": "fishing_activity",
|
|
"fishing_activity": "fishing_activity",
|
|
"global_fishing_watch": "fishing_activity",
|
|
"gfw": "fishing_activity",
|
|
"uavs": "uavs",
|
|
"satellites": "satellites",
|
|
"earthquakes": "earthquakes",
|
|
"news": "news",
|
|
"uap": "uap_sightings",
|
|
"ufo": "uap_sightings",
|
|
"uap_sightings": "uap_sightings",
|
|
"wastewater": "wastewater",
|
|
"pins": "pins",
|
|
}
|
|
|
|
_SLICEABLE_LAYERS = tuple(dict.fromkeys(_FAST_KEYS + _SLOW_KEYS))
|
|
_LAYER_ALIASES = {
|
|
**{key: key for key in _SLICEABLE_LAYERS},
|
|
**_ENTITY_LAYER_ALIASES,
|
|
"global_incidents": "gdelt",
|
|
"prediction_markets": "prediction_markets",
|
|
"markets": "prediction_markets",
|
|
"weather_alerts": "weather_alerts",
|
|
"internet_outages": "internet_outages",
|
|
"military_bases": "military_bases",
|
|
"power_plants": "power_plants",
|
|
"datacenters": "datacenters",
|
|
"scanners": "scanners",
|
|
"air_quality": "air_quality",
|
|
"volcanoes": "volcanoes",
|
|
"crowdthreat": "crowdthreat",
|
|
"correlations": "correlations",
|
|
"psk_reporter": "psk_reporter",
|
|
"ukraine_alerts": "ukraine_alerts",
|
|
"frontlines": "frontlines",
|
|
# SAR (Synthetic Aperture Radar)
|
|
"sar": "sar_anomalies",
|
|
"sar_scenes": "sar_scenes",
|
|
"sar_anomalies": "sar_anomalies",
|
|
"sar_aoi_coverage": "sar_aoi_coverage",
|
|
"sar_coverage": "sar_aoi_coverage",
|
|
# Satellite analysis (maneuvers, decay, Starlink)
|
|
"satellite_analysis": "satellite_analysis",
|
|
}
|
|
|
|
_UNIVERSAL_SEARCH_DEFAULT_LAYERS = (
|
|
"tracked_flights",
|
|
"military_flights",
|
|
"private_jets",
|
|
"private_flights",
|
|
"commercial_flights",
|
|
"ships",
|
|
"fishing_activity",
|
|
"news",
|
|
"gdelt",
|
|
"crowdthreat",
|
|
"frontlines",
|
|
"liveuamap",
|
|
"uap_sightings",
|
|
"wastewater",
|
|
"prediction_markets",
|
|
"earthquakes",
|
|
"weather_alerts",
|
|
"internet_outages",
|
|
"datacenters",
|
|
"military_bases",
|
|
"power_plants",
|
|
"scanners",
|
|
"air_quality",
|
|
"volcanoes",
|
|
"sigint",
|
|
"cctv",
|
|
"satellites",
|
|
"trains",
|
|
"kiwisdr",
|
|
"satnogs_stations",
|
|
"satnogs_observations",
|
|
"tinygs_satellites",
|
|
"psk_reporter",
|
|
"ukraine_alerts",
|
|
)
|
|
|
|
_GENERIC_QUERY_STOPWORDS = {
|
|
"where",
|
|
"is",
|
|
"the",
|
|
"a",
|
|
"an",
|
|
"of",
|
|
"to",
|
|
"for",
|
|
"at",
|
|
"in",
|
|
"on",
|
|
"right",
|
|
"now",
|
|
"current",
|
|
"currently",
|
|
"latest",
|
|
"recent",
|
|
"show",
|
|
"find",
|
|
"look",
|
|
"lookup",
|
|
"track",
|
|
"tracking",
|
|
}
|
|
|
|
_GENERIC_LAYER_HINTS: dict[str, tuple[str, ...]] = {
|
|
"jet": ("tracked_flights", "private_jets", "private_flights", "military_flights", "commercial_flights"),
|
|
"plane": ("tracked_flights", "private_jets", "private_flights", "military_flights", "commercial_flights"),
|
|
"aircraft": ("tracked_flights", "private_jets", "private_flights", "military_flights", "commercial_flights"),
|
|
"flight": ("tracked_flights", "private_jets", "private_flights", "military_flights", "commercial_flights"),
|
|
"helicopter": ("tracked_flights", "military_flights", "private_flights"),
|
|
"yacht": ("ships", "fishing_activity"),
|
|
"ship": ("ships", "fishing_activity"),
|
|
"boat": ("ships", "fishing_activity"),
|
|
"vessel": ("ships", "fishing_activity"),
|
|
"satellite": ("satellites", "tinygs_satellites", "satnogs_stations", "satnogs_observations"),
|
|
"uap": ("uap_sightings",),
|
|
"ufo": ("uap_sightings",),
|
|
"protest": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
|
"riot": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
|
"event": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
|
"news": ("news", "gdelt", "crowdthreat", "frontlines", "liveuamap"),
|
|
"plant": ("power_plants", "wastewater"),
|
|
"datacenter": ("datacenters",),
|
|
"data": ("datacenters",),
|
|
"base": ("military_bases",),
|
|
"scanner": ("scanners",),
|
|
"camera": ("cctv",),
|
|
"radio": ("sigint", "kiwisdr", "psk_reporter"),
|
|
}
|
|
|
|
_SEARCH_GROUP_BY_LAYER = {
|
|
"tracked_flights": "aircraft",
|
|
"military_flights": "aircraft",
|
|
"private_jets": "aircraft",
|
|
"private_flights": "aircraft",
|
|
"commercial_flights": "aircraft",
|
|
"ships": "maritime",
|
|
"fishing_activity": "maritime",
|
|
"satellites": "space",
|
|
"tinygs_satellites": "space",
|
|
"satnogs_stations": "space",
|
|
"satnogs_observations": "space",
|
|
"uap_sightings": "anomalies",
|
|
"wastewater": "biosurveillance",
|
|
"news": "events",
|
|
"gdelt": "events",
|
|
"crowdthreat": "events",
|
|
"frontlines": "events",
|
|
"liveuamap": "events",
|
|
"prediction_markets": "markets",
|
|
"weather_alerts": "hazards",
|
|
"earthquakes": "hazards",
|
|
"internet_outages": "infrastructure",
|
|
"datacenters": "infrastructure",
|
|
"military_bases": "infrastructure",
|
|
"power_plants": "infrastructure",
|
|
"scanners": "signals",
|
|
"air_quality": "environment",
|
|
"volcanoes": "environment",
|
|
"sigint": "signals",
|
|
"cctv": "surveillance",
|
|
"trains": "transport",
|
|
"kiwisdr": "signals",
|
|
"psk_reporter": "signals",
|
|
"ukraine_alerts": "events",
|
|
}
|
|
|
|
_SEARCH_QUERY_SYNONYMS: dict[str, tuple[str, ...]] = {
|
|
"jets": ("jet",),
|
|
"planes": ("plane", "aircraft"),
|
|
"boats": ("boat", "ship", "vessel"),
|
|
"ships": ("ship", "vessel"),
|
|
"yachts": ("yacht",),
|
|
"ufos": ("ufo", "uap"),
|
|
"protests": ("protest",),
|
|
"riots": ("riot", "protest"),
|
|
"plants": ("plant",),
|
|
"cameras": ("camera",),
|
|
"radios": ("radio",),
|
|
}
|
|
|
|
_SEARCH_INDEX_LOCK = threading.Lock()
|
|
# The live index reference — swapped atomically so readers never block.
|
|
# Readers grab the reference once; writers build a new dict and swap.
|
|
_SEARCH_INDEX_REF: dict[str, Any] = {
|
|
"version": None,
|
|
"docs": [],
|
|
"vocabulary": set(),
|
|
"postings": {},
|
|
"built_at": 0.0,
|
|
}
|
|
# Minimum seconds between full index rebuilds. ADS-B / AIS bump the data
|
|
# version every few seconds, but the search index doesn't need to be
|
|
# perfectly real-time — a 10-second staleness window avoids rebuilding
|
|
# 50K+ docs on every single query while keeping results fresh enough.
|
|
_SEARCH_INDEX_MIN_AGE: float = 10.0
|
|
|
|
_UNIVERSAL_SEARCH_SPECS: dict[str, dict[str, Any]] = {
|
|
"tracked_flights": {
|
|
"fields": ("callsign", "flight", "call", "registration", "r", "icao24", "owner", "operator", "alert_operator", "type", "alert_category", "category", "intel_tags", "name"),
|
|
"primary_fields": ("callsign", "registration", "owner", "operator", "alert_operator", "name"),
|
|
"label_fields": ("callsign", "flight", "call", "registration"),
|
|
"summary_fields": ("owner", "operator", "alert_operator", "category", "type", "alert_category", "intel_tags"),
|
|
"type_fields": ("category", "type", "alert_category"),
|
|
"id_fields": ("icao24", "registration"),
|
|
"time_fields": ("last_seen", "updated", "timestamp"),
|
|
},
|
|
"military_flights": {
|
|
"fields": ("callsign", "flight", "call", "registration", "r", "icao24", "owner", "operator", "alert_operator", "type"),
|
|
"primary_fields": ("callsign", "registration", "icao24"),
|
|
"label_fields": ("callsign", "flight", "call", "registration"),
|
|
"summary_fields": ("owner", "operator", "type"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("icao24", "registration"),
|
|
"time_fields": ("last_seen", "updated", "timestamp"),
|
|
},
|
|
"private_jets": {
|
|
"fields": ("callsign", "registration", "r", "icao24", "owner", "operator", "type"),
|
|
"primary_fields": ("callsign", "registration", "owner"),
|
|
"label_fields": ("callsign", "registration"),
|
|
"summary_fields": ("owner", "operator", "type"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("icao24", "registration"),
|
|
"time_fields": ("last_seen", "updated", "timestamp"),
|
|
},
|
|
"private_flights": {
|
|
"fields": ("callsign", "registration", "r", "icao24", "owner", "operator", "type"),
|
|
"primary_fields": ("callsign", "registration", "owner"),
|
|
"label_fields": ("callsign", "registration"),
|
|
"summary_fields": ("owner", "operator", "type"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("icao24", "registration"),
|
|
"time_fields": ("last_seen", "updated", "timestamp"),
|
|
},
|
|
"commercial_flights": {
|
|
"fields": ("callsign", "flight", "call", "registration", "r", "icao24", "operator", "airline", "type"),
|
|
"primary_fields": ("callsign", "registration", "operator", "airline"),
|
|
"label_fields": ("callsign", "flight", "call", "registration"),
|
|
"summary_fields": ("operator", "airline", "type"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("icao24", "registration"),
|
|
"time_fields": ("last_seen", "updated", "timestamp"),
|
|
},
|
|
"ships": {
|
|
"fields": ("name", "shipName", "mmsi", "imo", "callsign", "shipType", "type", "yacht_owner", "yacht_name", "yacht_category", "owner"),
|
|
"primary_fields": ("name", "shipName", "yacht_owner", "yacht_name", "mmsi", "imo"),
|
|
"label_fields": ("yacht_name", "name", "shipName"),
|
|
"summary_fields": ("yacht_owner", "shipType", "type", "yacht_category", "callsign"),
|
|
"type_fields": ("yacht_category", "shipType", "type"),
|
|
"id_fields": ("mmsi", "imo"),
|
|
"time_fields": ("updated", "timestamp", "last_seen"),
|
|
},
|
|
"fishing_activity": {
|
|
"fields": ("name", "vessel_name", "flag", "type", "id", "vessel_id", "vessel_ssvid", "region", "country"),
|
|
"primary_fields": ("name", "vessel_name", "vessel_ssvid", "vessel_id"),
|
|
"label_fields": ("vessel_name", "name", "id"),
|
|
"summary_fields": ("flag", "type", "region", "country"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("id", "vessel_ssvid", "vessel_id"),
|
|
"time_fields": ("end", "start", "timestamp"),
|
|
},
|
|
"news": {
|
|
"fields": ("title", "summary", "description", "source"),
|
|
"primary_fields": ("title",),
|
|
"label_fields": ("title",),
|
|
"summary_fields": ("summary", "description", "source"),
|
|
"type_fields": ("source",),
|
|
"id_fields": ("link", "url"),
|
|
"time_fields": ("published", "pub_date", "timestamp"),
|
|
},
|
|
"gdelt": {
|
|
"fields": ("title", "name", "sourceurl", "actor1name", "actor2name"),
|
|
"primary_fields": ("title", "name"),
|
|
"label_fields": ("title", "name"),
|
|
"summary_fields": ("actor1name", "actor2name"),
|
|
"type_fields": ("eventcode", "eventrootcode"),
|
|
"id_fields": ("sourceurl",),
|
|
"time_fields": ("sqldate", "date"),
|
|
},
|
|
"crowdthreat": {
|
|
"fields": ("title", "summary", "description", "category", "city", "state", "region"),
|
|
"primary_fields": ("title", "category", "city", "state"),
|
|
"label_fields": ("title",),
|
|
"summary_fields": ("summary", "description", "category", "city", "state"),
|
|
"type_fields": ("category",),
|
|
"id_fields": ("id", "link", "url"),
|
|
"time_fields": ("date", "timestamp", "created_at", "updated_at"),
|
|
},
|
|
"frontlines": {
|
|
"fields": ("title", "name", "description", "category", "source"),
|
|
"primary_fields": ("title", "name"),
|
|
"label_fields": ("title", "name"),
|
|
"summary_fields": ("description", "category", "source"),
|
|
"type_fields": ("category",),
|
|
"id_fields": ("id", "sourceurl", "url"),
|
|
"time_fields": ("date", "timestamp", "updated_at"),
|
|
},
|
|
"liveuamap": {
|
|
"fields": ("title", "description", "place", "category", "source"),
|
|
"primary_fields": ("title", "place"),
|
|
"label_fields": ("title", "place"),
|
|
"summary_fields": ("description", "category", "source"),
|
|
"type_fields": ("category",),
|
|
"id_fields": ("id", "url", "link"),
|
|
"time_fields": ("time", "date", "timestamp"),
|
|
},
|
|
"uap_sightings": {
|
|
"fields": ("city", "state", "country", "shape", "shape_raw", "summary", "duration"),
|
|
"primary_fields": ("city", "state", "shape", "shape_raw"),
|
|
"label_fields": ("city", "state", "shape_raw"),
|
|
"summary_fields": ("summary", "duration", "country"),
|
|
"type_fields": ("shape", "shape_raw"),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("date_time", "posted"),
|
|
},
|
|
"wastewater": {
|
|
"fields": ("name", "site_name", "city", "state", "pathogen", "status", "signal", "county"),
|
|
"primary_fields": ("name", "site_name", "city", "state", "pathogen"),
|
|
"label_fields": ("name", "site_name"),
|
|
"summary_fields": ("city", "state", "pathogen", "status", "signal"),
|
|
"type_fields": ("pathogen", "status"),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp", "date"),
|
|
},
|
|
"prediction_markets": {
|
|
"fields": ("title", "question", "category", "status", "source"),
|
|
"primary_fields": ("title", "question"),
|
|
"label_fields": ("title", "question"),
|
|
"summary_fields": ("category", "status", "source"),
|
|
"type_fields": ("category", "status"),
|
|
"id_fields": ("id", "slug"),
|
|
"time_fields": ("end_date", "updated_at", "timestamp"),
|
|
},
|
|
"earthquakes": {
|
|
"fields": ("place", "title", "id", "mag"),
|
|
"primary_fields": ("place", "title"),
|
|
"label_fields": ("place", "title"),
|
|
"summary_fields": ("mag",),
|
|
"type_fields": ("mag",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("time", "timestamp", "updated"),
|
|
},
|
|
"weather_alerts": {
|
|
"fields": ("event", "headline", "area", "severity", "sender"),
|
|
"primary_fields": ("event", "headline", "area"),
|
|
"label_fields": ("headline", "event", "area"),
|
|
"summary_fields": ("area", "severity", "sender"),
|
|
"type_fields": ("event", "severity"),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("sent", "effective", "onset", "timestamp"),
|
|
},
|
|
"internet_outages": {
|
|
"fields": ("name", "region", "country", "provider", "status"),
|
|
"primary_fields": ("name", "region", "country"),
|
|
"label_fields": ("name", "region"),
|
|
"summary_fields": ("country", "provider", "status"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp", "date"),
|
|
},
|
|
"datacenters": {
|
|
"fields": ("name", "company", "city", "state", "country"),
|
|
"primary_fields": ("name", "company", "city", "state"),
|
|
"label_fields": ("name", "company"),
|
|
"summary_fields": ("city", "state", "country"),
|
|
"type_fields": ("company",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"military_bases": {
|
|
"fields": ("name", "branch", "country", "state", "city"),
|
|
"primary_fields": ("name", "branch", "city", "state"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("branch", "city", "state", "country"),
|
|
"type_fields": ("branch",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"power_plants": {
|
|
"fields": ("name", "owner", "fuel", "city", "state", "country"),
|
|
"primary_fields": ("name", "owner", "fuel"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("owner", "fuel", "city", "state", "country"),
|
|
"type_fields": ("fuel",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"scanners": {
|
|
"fields": ("name", "county", "state", "city", "agency"),
|
|
"primary_fields": ("name", "county", "state", "city"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("agency", "city", "state", "county"),
|
|
"type_fields": ("agency",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"air_quality": {
|
|
"fields": ("name", "city", "state", "country", "category"),
|
|
"primary_fields": ("name", "city", "state"),
|
|
"label_fields": ("name", "city"),
|
|
"summary_fields": ("category", "state", "country"),
|
|
"type_fields": ("category",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"volcanoes": {
|
|
"fields": ("name", "country", "region", "status"),
|
|
"primary_fields": ("name", "country", "region"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("country", "region", "status"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"sigint": {
|
|
"fields": ("call", "callsign", "name", "msg", "message", "symbol_name", "type"),
|
|
"primary_fields": ("call", "callsign", "name"),
|
|
"label_fields": ("call", "callsign", "name"),
|
|
"summary_fields": ("msg", "message", "symbol_name", "type"),
|
|
"type_fields": ("type", "symbol_name"),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("timestamp", "heard_at", "last_seen"),
|
|
},
|
|
"cctv": {
|
|
"fields": ("id", "source_agency", "direction_facing", "location", "name"),
|
|
"primary_fields": ("direction_facing", "location", "source_agency", "name"),
|
|
"label_fields": ("name", "direction_facing", "id"),
|
|
"summary_fields": ("source_agency", "location"),
|
|
"type_fields": ("source_agency",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"satellites": {
|
|
"fields": ("name", "id", "norad_id", "country", "type"),
|
|
"primary_fields": ("name", "id", "norad_id"),
|
|
"label_fields": ("name", "norad_id", "id"),
|
|
"summary_fields": ("country", "type"),
|
|
"type_fields": ("type",),
|
|
"id_fields": ("norad_id", "id"),
|
|
"time_fields": ("epoch", "updated_at", "timestamp"),
|
|
},
|
|
"trains": {
|
|
"fields": ("name", "train_no", "route", "operator", "status"),
|
|
"primary_fields": ("name", "train_no", "route"),
|
|
"label_fields": ("name", "train_no", "route"),
|
|
"summary_fields": ("operator", "status"),
|
|
"type_fields": ("operator", "status"),
|
|
"id_fields": ("id", "train_no"),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"kiwisdr": {
|
|
"fields": ("name", "city", "state", "country", "owner"),
|
|
"primary_fields": ("name", "city", "state", "country"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("city", "state", "country", "owner"),
|
|
"type_fields": ("country",),
|
|
"id_fields": ("id", "url"),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"satnogs_stations": {
|
|
"fields": ("name", "location", "city", "country", "status"),
|
|
"primary_fields": ("name", "location", "city", "country"),
|
|
"label_fields": ("name",),
|
|
"summary_fields": ("location", "city", "country", "status"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"satnogs_observations": {
|
|
"fields": ("satellite", "ground_station", "name", "status"),
|
|
"primary_fields": ("satellite", "ground_station", "name"),
|
|
"label_fields": ("satellite", "name"),
|
|
"summary_fields": ("ground_station", "status"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("timestamp", "start", "end"),
|
|
},
|
|
"tinygs_satellites": {
|
|
"fields": ("name", "norad_id", "status", "country"),
|
|
"primary_fields": ("name", "norad_id"),
|
|
"label_fields": ("name", "norad_id"),
|
|
"summary_fields": ("status", "country"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("norad_id", "id"),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
"psk_reporter": {
|
|
"fields": ("sender", "receiver", "mode", "band", "country"),
|
|
"primary_fields": ("sender", "receiver"),
|
|
"label_fields": ("sender", "receiver"),
|
|
"summary_fields": ("mode", "band", "country"),
|
|
"type_fields": ("mode", "band"),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("timestamp", "updated_at"),
|
|
},
|
|
"ukraine_alerts": {
|
|
"fields": ("name", "region", "status", "description"),
|
|
"primary_fields": ("name", "region"),
|
|
"label_fields": ("name", "region"),
|
|
"summary_fields": ("status", "description"),
|
|
"type_fields": ("status",),
|
|
"id_fields": ("id",),
|
|
"time_fields": ("updated_at", "timestamp"),
|
|
},
|
|
}
|
|
|
|
|
|
def _norm_text(value: Any) -> str:
|
|
return str(value or "").strip().lower()
|
|
|
|
|
|
def _norm_key(value: Any) -> str:
|
|
return str(value or "").strip().lower().replace("-", "_").replace(" ", "_")
|
|
|
|
|
|
def _query_tokens(value: Any) -> list[str]:
|
|
return re.findall(r"[a-z0-9]+", _norm_text(value))
|
|
|
|
|
|
def _coerce_float(value: Any) -> float | None:
|
|
try:
|
|
if value is None or value == "":
|
|
return None
|
|
return float(value)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def _coerce_limit(value: Any, default: int = 25, maximum: int = 100) -> int:
|
|
try:
|
|
parsed = int(value)
|
|
except (TypeError, ValueError):
|
|
parsed = default
|
|
return max(1, min(maximum, parsed))
|
|
|
|
|
|
def _coerce_optional_limit(value: Any) -> int | None:
|
|
try:
|
|
if value is None or value == "":
|
|
return None
|
|
parsed = int(value)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
if parsed <= 0:
|
|
return None
|
|
return parsed
|
|
|
|
|
|
def _matches_query(candidate: dict[str, Any], query: str, fields: tuple[str, ...]) -> bool:
|
|
normalized = _norm_text(query)
|
|
if not normalized:
|
|
return True
|
|
haystack = " ".join(_norm_text(candidate.get(field)) for field in fields)
|
|
if normalized in haystack:
|
|
return True
|
|
tokens = _query_tokens(normalized)
|
|
return bool(tokens) and all(token in haystack for token in tokens)
|
|
|
|
|
|
def _first_present(candidate: dict[str, Any], fields: tuple[str, ...]) -> Any:
|
|
for field in fields:
|
|
value = candidate.get(field)
|
|
if value not in (None, ""):
|
|
return value
|
|
return None
|
|
|
|
|
|
def _extract_coords(candidate: dict[str, Any]) -> tuple[float | None, float | None]:
|
|
lat = _coerce_float(
|
|
candidate.get("lat")
|
|
or candidate.get("latitude")
|
|
or candidate.get("y")
|
|
)
|
|
lng = _coerce_float(
|
|
candidate.get("lng")
|
|
or candidate.get("lon")
|
|
or candidate.get("longitude")
|
|
or candidate.get("x")
|
|
)
|
|
geometry = candidate.get("geometry")
|
|
if (lat is None or lng is None) and isinstance(geometry, dict):
|
|
coords = geometry.get("coordinates") or []
|
|
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
|
lng = lng if lng is not None else _coerce_float(coords[0])
|
|
lat = lat if lat is not None else _coerce_float(coords[1])
|
|
return lat, lng
|
|
|
|
|
|
def _score_text_match(query: str, value: Any, *, exact_weight: int, prefix_weight: int, contains_weight: int) -> int:
|
|
normalized = _norm_text(value)
|
|
if not normalized or not query:
|
|
return 0
|
|
if normalized == query:
|
|
return exact_weight
|
|
if normalized.startswith(query):
|
|
return prefix_weight
|
|
if query in normalized:
|
|
return contains_weight
|
|
tokens = _query_tokens(query)
|
|
if tokens and all(token in normalized for token in tokens):
|
|
return contains_weight
|
|
return 0
|
|
|
|
|
|
def _text_matches_query(query: str, text: Any) -> bool:
|
|
normalized_query = _norm_text(query)
|
|
normalized_text = _norm_text(text)
|
|
if not normalized_query:
|
|
return True
|
|
if normalized_query in normalized_text:
|
|
return True
|
|
tokens = _query_tokens(normalized_query)
|
|
return bool(tokens) and all(token in normalized_text for token in tokens)
|
|
|
|
|
|
def _dedupe_tokens(tokens: list[str]) -> list[str]:
|
|
return list(dict.fromkeys(token for token in tokens if token))
|
|
|
|
|
|
def _iter_searchable_scalars(value: Any, *, depth: int = 0) -> list[str]:
|
|
if depth > 3:
|
|
return []
|
|
if value in (None, "", False):
|
|
return []
|
|
if isinstance(value, dict):
|
|
out: list[str] = []
|
|
for nested in value.values():
|
|
out.extend(_iter_searchable_scalars(nested, depth=depth + 1))
|
|
return out
|
|
if isinstance(value, (list, tuple, set)):
|
|
out: list[str] = []
|
|
for nested in value:
|
|
out.extend(_iter_searchable_scalars(nested, depth=depth + 1))
|
|
return out
|
|
if isinstance(value, (str, int, float)):
|
|
normalized = _norm_text(value)
|
|
return [normalized] if normalized else []
|
|
return []
|
|
|
|
|
|
def _document_text(candidate: dict[str, Any], fields: tuple[str, ...]) -> str:
|
|
parts: list[str] = []
|
|
for value in _iter_searchable_scalars(candidate):
|
|
if value and value not in parts:
|
|
parts.append(value)
|
|
for field in fields:
|
|
value = _norm_text(candidate.get(field))
|
|
if value and value not in parts:
|
|
parts.insert(0, value)
|
|
return " ".join(parts)
|
|
|
|
|
|
def _normalize_search_token(token: str) -> list[str]:
|
|
normalized = _norm_text(token)
|
|
variants = [normalized] if normalized else []
|
|
if normalized.endswith("ies") and len(normalized) > 4:
|
|
variants.append(f"{normalized[:-3]}y")
|
|
elif normalized.endswith("es") and len(normalized) > 4:
|
|
variants.append(normalized[:-2])
|
|
elif normalized.endswith("s") and len(normalized) > 3:
|
|
variants.append(normalized[:-1])
|
|
return _dedupe_tokens(variants)
|
|
|
|
|
|
def _expand_query_terms(tokens: list[str], vocabulary: set[str]) -> list[str]:
|
|
expanded: list[str] = []
|
|
for token in tokens:
|
|
variants = _normalize_search_token(token)
|
|
variants.extend(_SEARCH_QUERY_SYNONYMS.get(token, ()))
|
|
for variant in list(variants):
|
|
if variant in vocabulary:
|
|
expanded.append(variant)
|
|
elif len(variant) >= 4 and vocabulary:
|
|
expanded.extend(get_close_matches(variant, sorted(vocabulary), n=2, cutoff=0.84))
|
|
else:
|
|
expanded.append(variant)
|
|
return _dedupe_tokens(expanded)
|
|
|
|
|
|
def _layer_group(layer: str) -> str:
|
|
return _SEARCH_GROUP_BY_LAYER.get(layer, "other")
|
|
|
|
|
|
def _build_search_document(doc_id: int, layer: str, candidate: dict[str, Any], spec: dict[str, Any]) -> dict[str, Any]:
|
|
fields = tuple(spec.get("fields", ()))
|
|
text = _document_text(candidate, fields)
|
|
tokens = _dedupe_tokens(_query_tokens(text))
|
|
return {
|
|
"id": doc_id,
|
|
"layer": layer,
|
|
"group": _layer_group(layer),
|
|
"candidate": candidate,
|
|
"spec": spec,
|
|
"text": text,
|
|
"tokens": tokens,
|
|
}
|
|
|
|
|
|
def _get_search_index() -> dict[str, Any]:
|
|
global _SEARCH_INDEX_REF
|
|
import time as _time
|
|
|
|
version = get_data_version()
|
|
# Grab ref once — readers use this snapshot, no lock needed.
|
|
current = _SEARCH_INDEX_REF
|
|
now = _time.monotonic()
|
|
|
|
# Fast path: version unchanged OR index is fresh enough (within TTL).
|
|
# ADS-B/AIS bump the version every few seconds, but we don't need to
|
|
# rebuild a 50K-doc inverted index on every tick.
|
|
if current["version"] == version:
|
|
return current
|
|
if current["version"] is not None and (now - current["built_at"]) < _SEARCH_INDEX_MIN_AGE:
|
|
return current
|
|
|
|
with _SEARCH_INDEX_LOCK:
|
|
# Double-check under lock (another thread may have rebuilt)
|
|
current = _SEARCH_INDEX_REF
|
|
if current["version"] == version:
|
|
return current
|
|
if current["version"] is not None and (_time.monotonic() - current["built_at"]) < _SEARCH_INDEX_MIN_AGE:
|
|
return current
|
|
|
|
layers = [layer for layer in _UNIVERSAL_SEARCH_DEFAULT_LAYERS if layer in _UNIVERSAL_SEARCH_SPECS]
|
|
snap = get_latest_data_subset_refs(*layers)
|
|
docs: list[dict[str, Any]] = []
|
|
postings: dict[str, set[int]] = {}
|
|
vocabulary: set[str] = set()
|
|
|
|
for layer in layers:
|
|
spec = _UNIVERSAL_SEARCH_SPECS[layer]
|
|
items = snap.get(layer) or []
|
|
if isinstance(items, dict):
|
|
items = items.get("items", []) or items.get("results", []) or items.get("vessels", [])
|
|
if not isinstance(items, list):
|
|
continue
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
doc = _build_search_document(len(docs), layer, item, spec)
|
|
if not doc["tokens"]:
|
|
continue
|
|
docs.append(doc)
|
|
for token in doc["tokens"]:
|
|
vocabulary.add(token)
|
|
postings.setdefault(token, set()).add(doc["id"])
|
|
|
|
# Atomic swap — readers grabbing _SEARCH_INDEX_REF after this line
|
|
# see the new index; readers who grabbed it before still see the old
|
|
# one (safe, just stale). No reader ever sees partial state.
|
|
_SEARCH_INDEX_REF = {
|
|
"version": version,
|
|
"docs": docs,
|
|
"vocabulary": vocabulary,
|
|
"postings": postings,
|
|
"built_at": _time.monotonic(),
|
|
}
|
|
return _SEARCH_INDEX_REF
|
|
|
|
|
|
def _parse_search_query(query: str, searchable_layers: list[str]) -> dict[str, Any]:
|
|
normalized = _norm_text(query)
|
|
raw_tokens = _query_tokens(normalized)
|
|
entity_tokens: list[str] = []
|
|
hint_tokens: list[str] = []
|
|
preferred_layers: list[str] = []
|
|
|
|
for token in raw_tokens:
|
|
if token in _GENERIC_QUERY_STOPWORDS:
|
|
continue
|
|
hinted_layers = _GENERIC_LAYER_HINTS.get(token)
|
|
if hinted_layers:
|
|
hint_tokens.append(token)
|
|
for layer in hinted_layers:
|
|
if layer in searchable_layers and layer not in preferred_layers:
|
|
preferred_layers.append(layer)
|
|
continue
|
|
entity_tokens.append(token)
|
|
|
|
fallback_tokens = [token for token in raw_tokens if token not in _GENERIC_QUERY_STOPWORDS]
|
|
entity_tokens = _dedupe_tokens(entity_tokens or fallback_tokens or raw_tokens)
|
|
hint_tokens = _dedupe_tokens(hint_tokens)
|
|
anchor_tokens = sorted(
|
|
[token for token in entity_tokens if len(token) >= 3],
|
|
key=lambda token: (-len(token), token),
|
|
)[:3]
|
|
anchor_tokens = _dedupe_tokens(anchor_tokens or entity_tokens[:2] or entity_tokens)
|
|
|
|
return {
|
|
"normalized": normalized,
|
|
"raw_tokens": raw_tokens,
|
|
"entity_tokens": entity_tokens,
|
|
"hint_tokens": hint_tokens,
|
|
"anchor_tokens": anchor_tokens,
|
|
"entity_phrase": " ".join(entity_tokens).strip(),
|
|
"preferred_layers": preferred_layers,
|
|
}
|
|
|
|
|
|
def _field_texts(candidate: dict[str, Any], fields: tuple[str, ...]) -> dict[str, str]:
|
|
texts: dict[str, str] = {}
|
|
for field in fields:
|
|
normalized = _norm_text(candidate.get(field))
|
|
if normalized:
|
|
texts[field] = normalized
|
|
return texts
|
|
|
|
|
|
def _match_tokens(tokens: list[str], texts: dict[str, str], *, preferred_fields: tuple[str, ...]) -> tuple[list[str], int]:
|
|
matched: list[str] = []
|
|
score = 0
|
|
for token in tokens:
|
|
token_score = 0
|
|
for field in preferred_fields:
|
|
value = texts.get(field, "")
|
|
if not value:
|
|
continue
|
|
if value == token:
|
|
token_score = max(token_score, 120)
|
|
elif value.startswith(token):
|
|
token_score = max(token_score, 90)
|
|
elif token in value:
|
|
token_score = max(token_score, 70)
|
|
if token_score <= 0:
|
|
for value in texts.values():
|
|
if value == token:
|
|
token_score = max(token_score, 70)
|
|
elif value.startswith(token):
|
|
token_score = max(token_score, 50)
|
|
elif token in value:
|
|
token_score = max(token_score, 35)
|
|
if token_score > 0:
|
|
matched.append(token)
|
|
score += token_score
|
|
return matched, score
|
|
|
|
|
|
def _score_candidate(candidate: dict[str, Any], query_info: dict[str, Any], spec: dict[str, Any], layer: str) -> dict[str, Any] | None:
|
|
fields = tuple(spec.get("fields", ()))
|
|
primary_fields = tuple(spec.get("primary_fields", ()))
|
|
texts = _field_texts(candidate, fields)
|
|
document_text = _document_text(candidate, fields)
|
|
if not texts and not document_text:
|
|
return None
|
|
|
|
combined = " ".join([*texts.values(), document_text]).strip()
|
|
entity_tokens = list(query_info.get("entity_tokens") or [])
|
|
hint_tokens = list(query_info.get("hint_tokens") or [])
|
|
anchor_tokens = list(query_info.get("anchor_tokens") or [])
|
|
entity_phrase = str(query_info.get("entity_phrase") or "")
|
|
normalized_query = str(query_info.get("normalized") or "")
|
|
|
|
matched_entity_tokens, score = _match_tokens(entity_tokens, texts, preferred_fields=primary_fields)
|
|
document_hits = [token for token in entity_tokens if token in document_text and token not in matched_entity_tokens]
|
|
matched_entity_tokens.extend(document_hits)
|
|
score += 20 * len(document_hits)
|
|
entity_match_count = len(matched_entity_tokens)
|
|
entity_token_count = len(entity_tokens)
|
|
anchor_match_count = sum(1 for token in anchor_tokens if token in document_text)
|
|
|
|
if entity_phrase:
|
|
for field in primary_fields:
|
|
value = texts.get(field, "")
|
|
if entity_phrase and entity_phrase in value:
|
|
score += 140
|
|
break
|
|
else:
|
|
if entity_phrase in combined:
|
|
score += 80
|
|
elif normalized_query and normalized_query in combined:
|
|
score += 60
|
|
|
|
if entity_token_count:
|
|
if entity_match_count == 0 or (anchor_tokens and anchor_match_count == 0):
|
|
return None
|
|
score += 20 * entity_match_count
|
|
if entity_match_count == entity_token_count:
|
|
score += 40
|
|
else:
|
|
score += 10 * anchor_match_count
|
|
elif normalized_query and normalized_query not in combined and not matched_entity_tokens:
|
|
return None
|
|
|
|
matched_hint_tokens: list[str] = []
|
|
if hint_tokens:
|
|
if layer in query_info.get("preferred_layers", []):
|
|
score += 25 + (5 * len(hint_tokens))
|
|
matched_hint_tokens.extend(hint_tokens)
|
|
type_text = " ".join(
|
|
_norm_text(candidate.get(field))
|
|
for field in tuple(spec.get("type_fields", ())) + tuple(spec.get("summary_fields", ()))
|
|
)
|
|
for token in hint_tokens:
|
|
if token in type_text and token not in matched_hint_tokens:
|
|
matched_hint_tokens.append(token)
|
|
score += 15
|
|
|
|
matched_tokens = _dedupe_tokens(matched_entity_tokens + matched_hint_tokens)
|
|
confidence = min(0.99, max(0.1, score / 220.0))
|
|
|
|
return {
|
|
"score": score,
|
|
"matched_tokens": matched_tokens,
|
|
"confidence": round(confidence, 2),
|
|
}
|
|
|
|
|
|
def _compact_search_result(
|
|
layer: str,
|
|
candidate: dict[str, Any],
|
|
spec: dict[str, Any],
|
|
score: int,
|
|
*,
|
|
matched_tokens: list[str] | None = None,
|
|
confidence: float | None = None,
|
|
) -> dict[str, Any]:
|
|
label = _first_present(candidate, tuple(spec.get("label_fields", ()))) or ""
|
|
summary_parts = []
|
|
for field in tuple(spec.get("summary_fields", ())):
|
|
value = candidate.get(field)
|
|
if value in (None, ""):
|
|
continue
|
|
rendered = str(value).strip()
|
|
if rendered and rendered not in summary_parts:
|
|
summary_parts.append(rendered)
|
|
if len(summary_parts) >= 3:
|
|
break
|
|
lat, lng = _extract_coords(candidate)
|
|
time_value = _first_present(candidate, tuple(spec.get("time_fields", ())))
|
|
result = {
|
|
"source_layer": layer,
|
|
"group": _layer_group(layer),
|
|
"label": str(label),
|
|
"summary": " | ".join(summary_parts),
|
|
"type": str(_first_present(candidate, tuple(spec.get("type_fields", ()))) or ""),
|
|
"id": str(_first_present(candidate, tuple(spec.get("id_fields", ()))) or ""),
|
|
"score": score,
|
|
}
|
|
if matched_tokens:
|
|
result["matched_tokens"] = matched_tokens
|
|
if confidence is not None:
|
|
result["confidence"] = confidence
|
|
if lat is not None:
|
|
result["lat"] = lat
|
|
if lng is not None:
|
|
result["lng"] = lng
|
|
if time_value not in (None, ""):
|
|
result["time"] = str(time_value)
|
|
return result
|
|
|
|
|
|
def _haversine_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
|
r = 6371.0
|
|
dlat = math.radians(lat2 - lat1)
|
|
dlng = math.radians(lng2 - lng1)
|
|
a = (
|
|
math.sin(dlat / 2) ** 2
|
|
+ math.cos(math.radians(lat1))
|
|
* math.cos(math.radians(lat2))
|
|
* math.sin(dlng / 2) ** 2
|
|
)
|
|
return r * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
|
|
|
|
|
def _resolve_layers(
|
|
requested: list[str] | tuple[str, ...] | None,
|
|
alias_map: dict[str, str],
|
|
defaults: tuple[str, ...],
|
|
) -> list[str]:
|
|
if not requested:
|
|
return list(defaults)
|
|
resolved: list[str] = []
|
|
seen: set[str] = set()
|
|
for layer in requested:
|
|
canonical = alias_map.get(_norm_key(layer))
|
|
if canonical and canonical not in seen:
|
|
seen.add(canonical)
|
|
resolved.append(canonical)
|
|
return resolved or list(defaults)
|
|
|
|
|
|
def _available_layer_names() -> list[str]:
|
|
return [key for key in latest_data.keys() if key != "last_updated"]
|
|
|
|
|
|
def get_telemetry_summary() -> dict[str, Any]:
|
|
"""Return lightweight counts and discovery metadata for all telemetry layers."""
|
|
version = get_data_version()
|
|
layer_names = _available_layer_names()
|
|
snap = get_latest_data_subset_refs("last_updated", *layer_names)
|
|
counts: dict[str, Any] = {}
|
|
non_empty_layers: list[str] = []
|
|
|
|
for layer in layer_names:
|
|
value = snap.get(layer)
|
|
if isinstance(value, list):
|
|
counts[layer] = len(value)
|
|
if value:
|
|
non_empty_layers.append(layer)
|
|
elif isinstance(value, dict):
|
|
counts[layer] = len(value)
|
|
if value:
|
|
non_empty_layers.append(layer)
|
|
elif value is None:
|
|
counts[layer] = 0
|
|
else:
|
|
counts[layer] = 1
|
|
non_empty_layers.append(layer)
|
|
|
|
alias_examples = {
|
|
"gfw": "fishing_activity",
|
|
"global_fishing_watch": "fishing_activity",
|
|
"fishing": "fishing_activity",
|
|
"uap": "uap_sightings",
|
|
"ufo": "uap_sightings",
|
|
"tracked": "tracked_flights",
|
|
"military": "military_flights",
|
|
"jets": "private_jets",
|
|
}
|
|
|
|
return {
|
|
"counts": counts,
|
|
"available_layers": layer_names,
|
|
"non_empty_layers": non_empty_layers,
|
|
"layer_aliases": alias_examples,
|
|
"last_updated": snap.get("last_updated"),
|
|
"version": version,
|
|
}
|
|
|
|
|
|
def find_flights(
|
|
*,
|
|
query: str = "",
|
|
callsign: str = "",
|
|
registration: str = "",
|
|
icao24: str = "",
|
|
owner: str = "",
|
|
categories: list[str] | tuple[str, ...] | None = None,
|
|
limit: int = 25,
|
|
) -> dict[str, Any]:
|
|
"""Search flight layers without returning the full telemetry snapshot."""
|
|
layers = _resolve_layers(
|
|
categories,
|
|
_FLIGHT_LAYER_ALIASES,
|
|
("tracked_flights", "military_flights", "private_jets", "private_flights", "commercial_flights"),
|
|
)
|
|
snap = get_latest_data_subset_refs(*layers)
|
|
out: list[dict[str, Any]] = []
|
|
limit = _coerce_limit(limit)
|
|
query_norm = _norm_text(query)
|
|
callsign_norm = _norm_text(callsign)
|
|
registration_norm = _norm_text(registration)
|
|
icao24_norm = _norm_text(icao24)
|
|
owner_norm = _norm_text(owner)
|
|
|
|
for layer in layers:
|
|
items = snap.get(layer) or []
|
|
if not isinstance(items, list):
|
|
continue
|
|
for flight in items:
|
|
if not isinstance(flight, dict):
|
|
continue
|
|
flight_callsign = _norm_text(
|
|
flight.get("callsign") or flight.get("flight") or flight.get("call")
|
|
)
|
|
flight_registration = _norm_text(
|
|
flight.get("registration") or flight.get("r")
|
|
)
|
|
flight_icao24 = _norm_text(flight.get("icao24"))
|
|
flight_owner = _norm_text(
|
|
flight.get("owner")
|
|
or flight.get("operator")
|
|
or flight.get("alert_operator")
|
|
)
|
|
if callsign_norm and callsign_norm not in flight_callsign:
|
|
continue
|
|
if registration_norm and registration_norm not in flight_registration:
|
|
continue
|
|
if icao24_norm and icao24_norm != flight_icao24:
|
|
continue
|
|
if owner_norm and owner_norm not in flight_owner:
|
|
continue
|
|
if query_norm and not _matches_query(
|
|
flight,
|
|
query_norm,
|
|
(
|
|
"callsign",
|
|
"flight",
|
|
"call",
|
|
"registration",
|
|
"r",
|
|
"icao24",
|
|
"owner",
|
|
"operator",
|
|
"alert_operator",
|
|
"type",
|
|
"t",
|
|
"aircraft_type",
|
|
),
|
|
):
|
|
continue
|
|
out.append(
|
|
{
|
|
"source_layer": layer,
|
|
"callsign": flight.get("callsign") or flight.get("flight") or flight.get("call") or "",
|
|
"registration": flight.get("registration") or flight.get("r") or "",
|
|
"icao24": flight.get("icao24") or "",
|
|
"owner": flight.get("owner") or flight.get("operator") or flight.get("alert_operator") or "",
|
|
"type": flight.get("type") or flight.get("t") or flight.get("aircraft_type") or "",
|
|
"lat": flight.get("lat") or flight.get("latitude"),
|
|
"lng": flight.get("lng") or flight.get("lon") or flight.get("longitude"),
|
|
"altitude": flight.get("altitude") or flight.get("alt_baro") or flight.get("alt"),
|
|
"speed": flight.get("speed") or flight.get("gs"),
|
|
"heading": flight.get("heading") or flight.get("track"),
|
|
"alert_category": flight.get("alert_category") or "",
|
|
"alert_operator": flight.get("alert_operator") or "",
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
return {"results": out, "version": get_data_version(), "truncated": False}
|
|
|
|
|
|
def find_ships(
|
|
*,
|
|
query: str = "",
|
|
mmsi: str = "",
|
|
imo: str = "",
|
|
name: str = "",
|
|
limit: int = 25,
|
|
) -> dict[str, Any]:
|
|
"""Search ships without returning the entire ship layer."""
|
|
snap = get_latest_data_subset_refs("ships")
|
|
items = snap.get("ships") or []
|
|
out: list[dict[str, Any]] = []
|
|
limit = _coerce_limit(limit)
|
|
query_norm = _norm_text(query)
|
|
mmsi_norm = _norm_text(mmsi)
|
|
imo_norm = _norm_text(imo)
|
|
name_norm = _norm_text(name)
|
|
if isinstance(items, dict):
|
|
items = items.get("vessels", []) or items.get("items", [])
|
|
|
|
for ship in items if isinstance(items, list) else []:
|
|
if not isinstance(ship, dict):
|
|
continue
|
|
ship_mmsi = _norm_text(ship.get("mmsi"))
|
|
ship_imo = _norm_text(ship.get("imo"))
|
|
ship_name = _norm_text(ship.get("name") or ship.get("shipName"))
|
|
if mmsi_norm and mmsi_norm != ship_mmsi:
|
|
continue
|
|
if imo_norm and imo_norm != ship_imo:
|
|
continue
|
|
if name_norm and name_norm not in ship_name:
|
|
continue
|
|
if query_norm and not _matches_query(
|
|
ship,
|
|
query_norm,
|
|
(
|
|
"name",
|
|
"shipName",
|
|
"mmsi",
|
|
"imo",
|
|
"callsign",
|
|
"shipType",
|
|
"type",
|
|
"yacht_owner",
|
|
"yacht_name",
|
|
"yacht_category",
|
|
"owner",
|
|
),
|
|
):
|
|
continue
|
|
out.append(
|
|
{
|
|
"mmsi": ship.get("mmsi") or "",
|
|
"imo": ship.get("imo") or "",
|
|
"name": ship.get("name") or ship.get("shipName") or "",
|
|
"owner": ship.get("yacht_owner") or ship.get("owner") or "",
|
|
"tracked_name": ship.get("yacht_name") or "",
|
|
"tracked_category": ship.get("yacht_category") or "",
|
|
"callsign": ship.get("callsign") or "",
|
|
"type": ship.get("shipType") or ship.get("type") or "",
|
|
"lat": ship.get("lat") or ship.get("latitude"),
|
|
"lng": ship.get("lng") or ship.get("lon") or ship.get("longitude"),
|
|
"speed": ship.get("speed") or ship.get("sog"),
|
|
"heading": ship.get("heading") or ship.get("course"),
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
return {"results": out, "version": get_data_version(), "truncated": False}
|
|
|
|
|
|
def _entity_layers_for_type(entity_type: str) -> list[str] | None:
|
|
kind = _norm_key(entity_type)
|
|
if not kind:
|
|
return None
|
|
if kind in {"aircraft", "plane", "flight", "jet", "helicopter"}:
|
|
return ["tracked_flights", "military_flights", "private_jets", "private_flights", "commercial_flights"]
|
|
if kind in {"ship", "ships", "vessel", "boat", "yacht", "maritime"}:
|
|
return ["ships", "fishing_activity"]
|
|
if kind in {"event", "incident", "news", "protest"}:
|
|
return ["news", "gdelt", "crowdthreat", "frontlines", "liveuamap"]
|
|
if kind in {"satellite", "space"}:
|
|
return ["satellites", "tinygs_satellites", "satnogs_observations", "satnogs_stations"]
|
|
if kind in {"signal", "sigint", "radio"}:
|
|
return ["sigint", "kiwisdr", "psk_reporter"]
|
|
canonical = _LAYER_ALIASES.get(kind)
|
|
return [canonical] if canonical else None
|
|
|
|
|
|
def _entity_key(item: dict[str, Any]) -> str:
|
|
layer = str(item.get("source_layer") or item.get("layer") or "")
|
|
ident = str(item.get("id") or item.get("icao24") or item.get("registration") or item.get("mmsi") or item.get("imo") or "")
|
|
label = str(item.get("label") or item.get("callsign") or item.get("name") or "")
|
|
return f"{layer}:{ident or label}".lower()
|
|
|
|
|
|
def _normalize_entity_result(item: dict[str, Any], *, group: str = "") -> dict[str, Any]:
|
|
out = dict(item)
|
|
layer = str(out.get("source_layer") or out.get("layer") or "")
|
|
if layer and "source_layer" not in out:
|
|
out["source_layer"] = layer
|
|
if not group:
|
|
group = str(out.get("group") or _layer_group(layer))
|
|
out["group"] = group or "other"
|
|
if "label" not in out:
|
|
out["label"] = (
|
|
out.get("callsign")
|
|
or out.get("name")
|
|
or out.get("tracked_name")
|
|
or out.get("registration")
|
|
or out.get("mmsi")
|
|
or out.get("id")
|
|
or ""
|
|
)
|
|
if "id" not in out:
|
|
out["id"] = out.get("icao24") or out.get("registration") or out.get("mmsi") or out.get("imo") or ""
|
|
return out
|
|
|
|
|
|
def find_entity(
|
|
*,
|
|
query: str = "",
|
|
entity_type: str = "",
|
|
callsign: str = "",
|
|
registration: str = "",
|
|
icao24: str = "",
|
|
mmsi: str = "",
|
|
imo: str = "",
|
|
name: str = "",
|
|
owner: str = "",
|
|
layers: list[str] | tuple[str, ...] | None = None,
|
|
limit: int = 10,
|
|
) -> dict[str, Any]:
|
|
"""Find a named entity across aircraft, maritime, and general telemetry.
|
|
|
|
This is an intent-level lookup for agents. It tries high-precision
|
|
aircraft/ship fields first, then falls back to the universal search index.
|
|
"""
|
|
effective_query = str(query or name or owner or callsign or registration or icao24 or mmsi or imo or "").strip()
|
|
if not effective_query:
|
|
return {
|
|
"results": [],
|
|
"best_match": None,
|
|
"version": get_data_version(),
|
|
"truncated": False,
|
|
"searched_layers": [],
|
|
"strategy": "empty_query",
|
|
}
|
|
|
|
limit = _coerce_limit(limit, default=10, maximum=50)
|
|
requested_layers = list(layers or _entity_layers_for_type(entity_type) or [])
|
|
results: list[dict[str, Any]] = []
|
|
seen: set[str] = set()
|
|
strategies: list[str] = []
|
|
|
|
aircraft_hint = bool(callsign or registration or icao24) or _norm_key(entity_type) in {
|
|
"aircraft",
|
|
"plane",
|
|
"flight",
|
|
"jet",
|
|
"helicopter",
|
|
}
|
|
maritime_hint = bool(mmsi or imo) or _norm_key(entity_type) in {
|
|
"ship",
|
|
"ships",
|
|
"vessel",
|
|
"boat",
|
|
"yacht",
|
|
"maritime",
|
|
}
|
|
|
|
if aircraft_hint or not maritime_hint:
|
|
flight_result = find_flights(
|
|
query=effective_query if not (callsign or registration or icao24 or owner) else "",
|
|
callsign=callsign,
|
|
registration=registration,
|
|
icao24=icao24,
|
|
owner=owner,
|
|
categories=requested_layers or None,
|
|
limit=limit,
|
|
)
|
|
if flight_result.get("results"):
|
|
strategies.append("aircraft_exact_fields")
|
|
for item in flight_result.get("results") or []:
|
|
normalized = _normalize_entity_result(item, group="aircraft")
|
|
normalized.setdefault("score", 1000)
|
|
normalized.setdefault("confidence", 0.99)
|
|
key = _entity_key(normalized)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
results.append(normalized)
|
|
|
|
if maritime_hint or not aircraft_hint:
|
|
ship_result = find_ships(
|
|
query=effective_query if not (mmsi or imo or name) else "",
|
|
mmsi=mmsi,
|
|
imo=imo,
|
|
name=name,
|
|
limit=limit,
|
|
)
|
|
if ship_result.get("results"):
|
|
strategies.append("maritime_exact_fields")
|
|
for item in ship_result.get("results") or []:
|
|
normalized = _normalize_entity_result(item, group="maritime")
|
|
normalized.setdefault("score", 1000)
|
|
normalized.setdefault("confidence", 0.99)
|
|
key = _entity_key(normalized)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
results.append(normalized)
|
|
|
|
search_layers = requested_layers or _entity_layers_for_type(entity_type)
|
|
search_result = search_telemetry(query=effective_query, layers=search_layers, limit=limit)
|
|
if search_result.get("results"):
|
|
strategies.append("universal_index")
|
|
for item in search_result.get("results") or []:
|
|
normalized = _normalize_entity_result(item)
|
|
key = _entity_key(normalized)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
results.append(normalized)
|
|
|
|
results.sort(
|
|
key=lambda item: (
|
|
int(item.get("score", 0) or 0),
|
|
float(item.get("confidence", 0.0) or 0.0),
|
|
bool(item.get("lat") is not None and item.get("lng") is not None),
|
|
),
|
|
reverse=True,
|
|
)
|
|
truncated = len(results) > limit
|
|
limited = results[:limit]
|
|
return {
|
|
"query": effective_query,
|
|
"entity_type": entity_type or "",
|
|
"best_match": limited[0] if limited else None,
|
|
"results": limited,
|
|
"version": get_data_version(),
|
|
"truncated": truncated,
|
|
"searched_layers": search_result.get("searched_layers", search_layers or []),
|
|
"strategy": "+".join(strategies) if strategies else "no_match",
|
|
}
|
|
|
|
|
|
def _project_context_item(layer: str, item: dict[str, Any], distance_km: float) -> dict[str, Any]:
|
|
label = (
|
|
item.get("label")
|
|
or item.get("callsign")
|
|
or item.get("flight")
|
|
or item.get("name")
|
|
or item.get("shipName")
|
|
or item.get("title")
|
|
or item.get("headline")
|
|
or item.get("event")
|
|
or item.get("place")
|
|
or item.get("id")
|
|
or item.get("anomaly_id")
|
|
or ""
|
|
)
|
|
summary = (
|
|
item.get("summary")
|
|
or item.get("description")
|
|
or item.get("drivers")
|
|
or item.get("area")
|
|
or item.get("source")
|
|
or ""
|
|
)
|
|
if isinstance(summary, list):
|
|
summary = "; ".join(str(part) for part in summary[:4])
|
|
lat, lng = _extract_coords(item)
|
|
return {
|
|
"source_layer": layer,
|
|
"label": label,
|
|
"summary": str(summary or "")[:500],
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"distance_km": round(distance_km, 2),
|
|
"type": item.get("type") or item.get("kind") or item.get("category") or item.get("event") or "",
|
|
"severity": item.get("severity") or item.get("level") or item.get("score") or item.get("risk_score"),
|
|
"id": (
|
|
item.get("id")
|
|
or item.get("anomaly_id")
|
|
or item.get("mmsi")
|
|
or item.get("icao24")
|
|
or item.get("sourceurl")
|
|
or item.get("link")
|
|
or ""
|
|
),
|
|
"time": item.get("timestamp") or item.get("updated") or item.get("time") or item.get("date") or item.get("published") or "",
|
|
}
|
|
|
|
|
|
def _nearby_items_from_layers(
|
|
*,
|
|
lat: float,
|
|
lng: float,
|
|
radius_km: float,
|
|
layers: tuple[str, ...],
|
|
limit_per_layer: int,
|
|
) -> dict[str, list[dict[str, Any]]]:
|
|
snap = get_latest_data_subset_refs(*layers)
|
|
out: dict[str, list[dict[str, Any]]] = {}
|
|
for layer in layers:
|
|
value = snap.get(layer) or []
|
|
if isinstance(value, dict):
|
|
if layer == "gdelt" and isinstance(value.get("features"), list):
|
|
items = value.get("features") or []
|
|
else:
|
|
items = value.get("items") or value.get("features") or value.get("vessels") or []
|
|
else:
|
|
items = value
|
|
if not isinstance(items, list):
|
|
continue
|
|
matches: list[dict[str, Any]] = []
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
item_lat, item_lng = _extract_coords(item)
|
|
if item_lat is None or item_lng is None:
|
|
continue
|
|
distance = _haversine_km(lat, lng, item_lat, item_lng)
|
|
if distance > radius_km:
|
|
continue
|
|
matches.append(_project_context_item(layer, item, distance))
|
|
matches.sort(key=lambda entry: entry.get("distance_km", 0))
|
|
if matches:
|
|
out[layer] = matches[:limit_per_layer]
|
|
return out
|
|
|
|
|
|
def _entity_same_as_context(entity: dict[str, Any], context: dict[str, Any]) -> bool:
|
|
entity_ids = {
|
|
_norm_key(entity.get("id")),
|
|
_norm_key(entity.get("icao24")),
|
|
_norm_key(entity.get("registration")),
|
|
_norm_key(entity.get("mmsi")),
|
|
_norm_key(entity.get("imo")),
|
|
_norm_key(entity.get("callsign")),
|
|
_norm_key(entity.get("label")),
|
|
_norm_key(entity.get("name")),
|
|
}
|
|
context_ids = {
|
|
_norm_key(context.get("id")),
|
|
_norm_key(context.get("label")),
|
|
}
|
|
entity_ids.discard("")
|
|
context_ids.discard("")
|
|
return bool(entity_ids & context_ids)
|
|
|
|
|
|
def correlate_entity(
|
|
*,
|
|
query: str = "",
|
|
entity_type: str = "",
|
|
callsign: str = "",
|
|
registration: str = "",
|
|
icao24: str = "",
|
|
mmsi: str = "",
|
|
imo: str = "",
|
|
name: str = "",
|
|
owner: str = "",
|
|
radius_km: float = 100,
|
|
limit: int = 10,
|
|
) -> dict[str, Any]:
|
|
"""Build an evidence pack around a resolved entity.
|
|
|
|
This is intentionally not a verdict engine. It resolves the entity, finds
|
|
nearby live context, and labels correlation signals as hypotheses that an
|
|
agent or user can inspect.
|
|
"""
|
|
lookup = find_entity(
|
|
query=query,
|
|
entity_type=entity_type,
|
|
callsign=callsign,
|
|
registration=registration,
|
|
icao24=icao24,
|
|
mmsi=mmsi,
|
|
imo=imo,
|
|
name=name,
|
|
owner=owner,
|
|
limit=5,
|
|
)
|
|
best = lookup.get("best_match") if isinstance(lookup.get("best_match"), dict) else None
|
|
if not best:
|
|
return {
|
|
"status": "unresolved",
|
|
"claim_level": "no_entity_match",
|
|
"lookup": lookup,
|
|
"entity": None,
|
|
"center": None,
|
|
"signals": [],
|
|
"evidence": {},
|
|
"recommended_next": ["Try a callsign, tail number, MMSI, IMO, owner, or exact vessel/aircraft name."],
|
|
"version": get_data_version(),
|
|
}
|
|
|
|
lat = _coerce_float(best.get("lat") or best.get("latitude"))
|
|
lng = _coerce_float(best.get("lng") or best.get("lon") or best.get("longitude"))
|
|
if lat is None or lng is None:
|
|
return {
|
|
"status": "resolved_without_current_position",
|
|
"claim_level": "identity_only",
|
|
"lookup": lookup,
|
|
"entity": best,
|
|
"center": None,
|
|
"signals": [],
|
|
"evidence": {},
|
|
"recommended_next": ["Install a track_entity watch so the system can alert when this entity reappears with coordinates."],
|
|
"version": get_data_version(),
|
|
}
|
|
|
|
radius = _coerce_float(radius_km)
|
|
if radius is None:
|
|
radius = 100.0
|
|
radius = max(1.0, min(1000.0, radius))
|
|
limit = _coerce_limit(limit, default=10, maximum=50)
|
|
|
|
nearby = entities_near(
|
|
lat=lat,
|
|
lng=lng,
|
|
radius_km=radius,
|
|
entity_types=[
|
|
"tracked",
|
|
"military",
|
|
"jets",
|
|
"private",
|
|
"commercial",
|
|
"ships",
|
|
"uavs",
|
|
"satellites",
|
|
],
|
|
limit=limit + 5,
|
|
)
|
|
proximate_entities = [
|
|
item for item in nearby.get("results", [])
|
|
if not _entity_same_as_context(best, item)
|
|
][:limit]
|
|
|
|
context = _nearby_items_from_layers(
|
|
lat=lat,
|
|
lng=lng,
|
|
radius_km=radius,
|
|
layers=(
|
|
"correlations",
|
|
"sar_anomalies",
|
|
"internet_outages",
|
|
"weather_alerts",
|
|
"earthquakes",
|
|
"gps_jamming",
|
|
"news",
|
|
"gdelt",
|
|
"crowdthreat",
|
|
"frontlines",
|
|
"liveuamap",
|
|
"military_bases",
|
|
"datacenters",
|
|
"power_plants",
|
|
),
|
|
limit_per_layer=min(limit, 25),
|
|
)
|
|
|
|
signals: list[dict[str, Any]] = []
|
|
if context.get("correlations"):
|
|
signals.append({
|
|
"type": "existing_correlation_near_entity",
|
|
"confidence": 0.75,
|
|
"reason": f"{len(context['correlations'])} active correlation alert(s) within {radius:g} km",
|
|
"evidence_layers": ["correlations"],
|
|
})
|
|
if context.get("sar_anomalies"):
|
|
signals.append({
|
|
"type": "sar_anomaly_near_entity",
|
|
"confidence": 0.65,
|
|
"reason": f"{len(context['sar_anomalies'])} SAR anomaly record(s) within {radius:g} km",
|
|
"evidence_layers": ["sar_anomalies"],
|
|
})
|
|
if context.get("internet_outages"):
|
|
signals.append({
|
|
"type": "infrastructure_disruption_near_entity",
|
|
"confidence": 0.6,
|
|
"reason": f"{len(context['internet_outages'])} internet outage record(s) within {radius:g} km",
|
|
"evidence_layers": ["internet_outages"],
|
|
})
|
|
hazard_layers = [layer for layer in ("weather_alerts", "earthquakes", "gps_jamming") if context.get(layer)]
|
|
if hazard_layers:
|
|
signals.append({
|
|
"type": "environment_or_rf_hazard_near_entity",
|
|
"confidence": 0.55,
|
|
"reason": "Environmental or RF hazard context is nearby",
|
|
"evidence_layers": hazard_layers,
|
|
})
|
|
if proximate_entities:
|
|
signals.append({
|
|
"type": "nearby_live_entities",
|
|
"confidence": 0.5,
|
|
"reason": f"{len(proximate_entities)} other live tracked entities within {radius:g} km",
|
|
"evidence_layers": sorted({str(item.get("source_layer") or "") for item in proximate_entities if item.get("source_layer")}),
|
|
})
|
|
|
|
event_count = sum(len(context.get(layer, [])) for layer in ("news", "gdelt", "crowdthreat", "frontlines", "liveuamap"))
|
|
if event_count:
|
|
signals.append({
|
|
"type": "nearby_event_reporting",
|
|
"confidence": 0.45,
|
|
"reason": f"{event_count} nearby event/news record(s) within {radius:g} km",
|
|
"evidence_layers": [layer for layer in ("news", "gdelt", "crowdthreat", "frontlines", "liveuamap") if context.get(layer)],
|
|
})
|
|
|
|
status = "context_found" if signals else "no_nearby_context"
|
|
return {
|
|
"status": status,
|
|
"claim_level": "evidence_pack_not_verdict",
|
|
"lookup": lookup,
|
|
"entity": best,
|
|
"center": {"lat": lat, "lng": lng},
|
|
"radius_km": radius,
|
|
"signals": signals,
|
|
"evidence": {
|
|
"proximate_entities": proximate_entities,
|
|
"context_layers": context,
|
|
},
|
|
"recommended_next": [
|
|
"Use track_entity to keep monitoring this exact entity.",
|
|
"Use watch_area on the returned center if the area matters more than the entity.",
|
|
"Treat co-location as a lead, not proof of intent or causation.",
|
|
],
|
|
"version": get_data_version(),
|
|
}
|
|
|
|
|
|
def search_news(
|
|
*,
|
|
query: str,
|
|
limit: int = 10,
|
|
include_gdelt: bool = True,
|
|
) -> dict[str, Any]:
|
|
"""Search news and event layers server-side and return a compact result set."""
|
|
query_norm = _norm_text(query)
|
|
if not query_norm:
|
|
return {"results": [], "version": get_data_version(), "truncated": False}
|
|
|
|
snap = get_latest_data_subset_refs("news", "gdelt", "crowdthreat", "liveuamap", "frontlines")
|
|
out: list[dict[str, Any]] = []
|
|
limit = _coerce_limit(limit, default=10, maximum=50)
|
|
|
|
for article in snap.get("news") or []:
|
|
if not isinstance(article, dict):
|
|
continue
|
|
text = " ".join(
|
|
(
|
|
_norm_text(article.get("title")),
|
|
_norm_text(article.get("summary")),
|
|
_norm_text(article.get("description")),
|
|
_norm_text(article.get("source")),
|
|
)
|
|
)
|
|
if not _text_matches_query(query_norm, text):
|
|
continue
|
|
out.append(
|
|
{
|
|
"source_layer": "news",
|
|
"title": article.get("title") or "",
|
|
"summary": article.get("summary") or article.get("description") or "",
|
|
"source": article.get("source") or "",
|
|
"link": article.get("link") or article.get("url") or "",
|
|
"lat": article.get("lat"),
|
|
"lng": article.get("lng"),
|
|
"risk_score": article.get("risk_score"),
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
if include_gdelt:
|
|
for event in snap.get("gdelt") or []:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
props = event.get("properties") if isinstance(event.get("properties"), dict) else event
|
|
text = " ".join(
|
|
(
|
|
_norm_text(props.get("title")),
|
|
_norm_text(props.get("name")),
|
|
_norm_text(props.get("sourceurl")),
|
|
)
|
|
)
|
|
if not _text_matches_query(query_norm, text):
|
|
continue
|
|
coords = []
|
|
geometry = event.get("geometry")
|
|
if isinstance(geometry, dict):
|
|
coords = geometry.get("coordinates") or []
|
|
out.append(
|
|
{
|
|
"source_layer": "gdelt",
|
|
"title": props.get("title") or props.get("name") or "",
|
|
"summary": "",
|
|
"source": "GDELT",
|
|
"link": props.get("sourceurl") or "",
|
|
"lat": coords[1] if len(coords) >= 2 else None,
|
|
"lng": coords[0] if len(coords) >= 2 else None,
|
|
"risk_score": props.get("count"),
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
for event in snap.get("crowdthreat") or []:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
text = " ".join(
|
|
(
|
|
_norm_text(event.get("title")),
|
|
_norm_text(event.get("summary")),
|
|
_norm_text(event.get("description")),
|
|
_norm_text(event.get("category")),
|
|
_norm_text(event.get("city")),
|
|
_norm_text(event.get("state")),
|
|
)
|
|
)
|
|
if not _text_matches_query(query_norm, text):
|
|
continue
|
|
out.append(
|
|
{
|
|
"source_layer": "crowdthreat",
|
|
"title": event.get("title") or "",
|
|
"summary": event.get("summary") or event.get("description") or "",
|
|
"source": event.get("category") or "CrowdThreat",
|
|
"link": event.get("link") or event.get("url") or "",
|
|
"lat": event.get("lat") or event.get("latitude"),
|
|
"lng": event.get("lng") or event.get("lon") or event.get("longitude"),
|
|
"risk_score": event.get("risk_score") or event.get("severity") or event.get("score"),
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
for layer in ("liveuamap", "frontlines"):
|
|
for event in snap.get(layer) or []:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
text = " ".join(
|
|
(
|
|
_norm_text(event.get("title")),
|
|
_norm_text(event.get("name")),
|
|
_norm_text(event.get("description")),
|
|
_norm_text(event.get("category")),
|
|
_norm_text(event.get("place")),
|
|
)
|
|
)
|
|
if not _text_matches_query(query_norm, text):
|
|
continue
|
|
lat, lng = _extract_coords(event)
|
|
out.append(
|
|
{
|
|
"source_layer": layer,
|
|
"title": event.get("title") or event.get("name") or "",
|
|
"summary": event.get("description") or "",
|
|
"source": event.get("category") or layer,
|
|
"link": event.get("link") or event.get("url") or "",
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"risk_score": event.get("severity") or event.get("score"),
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
return {"results": out, "version": get_data_version(), "truncated": False}
|
|
|
|
|
|
def search_telemetry(
|
|
*,
|
|
query: str,
|
|
layers: list[str] | tuple[str, ...] | None = None,
|
|
limit: int = 25,
|
|
) -> dict[str, Any]:
|
|
"""Search compactly across the telemetry store without pulling whole layers."""
|
|
query_norm = _norm_text(query)
|
|
if not query_norm:
|
|
return {"results": [], "version": get_data_version(), "truncated": False, "searched_layers": []}
|
|
|
|
requested_layers = _resolve_layers(
|
|
layers,
|
|
_LAYER_ALIASES,
|
|
_UNIVERSAL_SEARCH_DEFAULT_LAYERS,
|
|
)
|
|
searchable_layers = [
|
|
layer for layer in requested_layers
|
|
if layer in _UNIVERSAL_SEARCH_SPECS
|
|
]
|
|
if not searchable_layers:
|
|
searchable_layers = [layer for layer in _UNIVERSAL_SEARCH_DEFAULT_LAYERS if layer in _UNIVERSAL_SEARCH_SPECS]
|
|
query_info = _parse_search_query(query_norm, searchable_layers)
|
|
preferred_layers = list(query_info.get("preferred_layers") or [])
|
|
if preferred_layers:
|
|
searchable_layers = preferred_layers + [layer for layer in searchable_layers if layer not in preferred_layers]
|
|
search_index = _get_search_index()
|
|
docs = list(search_index.get("docs") or [])
|
|
postings = dict(search_index.get("postings") or {})
|
|
vocabulary = set(search_index.get("vocabulary") or set())
|
|
layer_set = set(searchable_layers)
|
|
query_info["entity_tokens"] = _expand_query_terms(list(query_info.get("entity_tokens") or []), vocabulary)
|
|
query_info["anchor_tokens"] = _expand_query_terms(list(query_info.get("anchor_tokens") or []), vocabulary)
|
|
limit = _coerce_limit(limit, default=25, maximum=100)
|
|
out: list[dict[str, Any]] = []
|
|
candidate_ids: set[int] = set()
|
|
anchor_tokens = list(query_info.get("anchor_tokens") or [])
|
|
entity_tokens = list(query_info.get("entity_tokens") or [])
|
|
for token in anchor_tokens + entity_tokens:
|
|
candidate_ids.update(postings.get(token, set()))
|
|
if not candidate_ids:
|
|
candidate_ids = {
|
|
int(doc["id"])
|
|
for doc in docs
|
|
if doc.get("layer") in layer_set
|
|
}
|
|
|
|
for doc_id in candidate_ids:
|
|
if doc_id >= len(docs):
|
|
continue
|
|
doc = docs[doc_id]
|
|
layer = str(doc.get("layer") or "")
|
|
if layer not in layer_set:
|
|
continue
|
|
item = doc.get("candidate")
|
|
spec = doc.get("spec")
|
|
if not isinstance(item, dict) or not isinstance(spec, dict):
|
|
continue
|
|
match = _score_candidate(item, query_info, spec, layer)
|
|
if not match:
|
|
continue
|
|
out.append(
|
|
_compact_search_result(
|
|
layer,
|
|
item,
|
|
spec,
|
|
int(match["score"]),
|
|
matched_tokens=list(match.get("matched_tokens") or []),
|
|
confidence=float(match.get("confidence", 0.0) or 0.0),
|
|
)
|
|
)
|
|
|
|
out.sort(
|
|
key=lambda result: (
|
|
int(result.get("score", 0) or 0),
|
|
float(result.get("confidence", 0.0) or 0.0),
|
|
str(result.get("time", "")),
|
|
str(result.get("label", "")),
|
|
),
|
|
reverse=True,
|
|
)
|
|
truncated = len(out) > limit
|
|
limited = out[:limit]
|
|
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
for result in limited:
|
|
grouped.setdefault(str(result.get("group") or "other"), []).append(result)
|
|
return {
|
|
"results": limited,
|
|
"groups": [
|
|
{
|
|
"group": group,
|
|
"count": len(results),
|
|
"results": results,
|
|
}
|
|
for group, results in sorted(grouped.items(), key=lambda item: (-len(item[1]), item[0]))
|
|
],
|
|
"version": get_data_version(),
|
|
"truncated": truncated,
|
|
"searched_layers": searchable_layers,
|
|
}
|
|
|
|
|
|
def get_layer_slice(
|
|
*,
|
|
layers: list[str] | tuple[str, ...],
|
|
limit_per_layer: int | None = None,
|
|
since_version: int | None = None,
|
|
since_layer_versions: dict[str, int] | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Return only the requested top-level telemetry layers, optionally version-gated.
|
|
|
|
Two incremental modes (``since_layer_versions`` takes precedence):
|
|
|
|
1. **Global** (``since_version``): cheap all-or-nothing check against a
|
|
single monotonic counter. Almost never returns "no change" because
|
|
*any* layer update bumps the counter.
|
|
|
|
2. **Per-layer** (``since_layer_versions``): the agent sends a dict of
|
|
``{layer_name: version}`` representing the versions it already holds.
|
|
Only layers whose server-side version is *newer* than the agent's
|
|
version are serialized and returned. Layers the agent is already
|
|
current on are omitted entirely — zero serialization, zero transfer.
|
|
This is the preferred mode for SSE-connected agents.
|
|
"""
|
|
current_version = get_data_version()
|
|
current_layer_versions = get_layer_versions()
|
|
limit_per_layer = _coerce_optional_limit(limit_per_layer)
|
|
available_layers = set(_available_layer_names())
|
|
requested: list[str] = []
|
|
seen: set[str] = set()
|
|
for layer in layers or []:
|
|
canonical = _LAYER_ALIASES.get(_norm_key(layer), _norm_key(layer))
|
|
if canonical in available_layers and canonical not in seen:
|
|
seen.add(canonical)
|
|
requested.append(canonical)
|
|
|
|
# --- Per-layer incremental (preferred) ---
|
|
if since_layer_versions is not None and isinstance(since_layer_versions, dict):
|
|
# Determine which requested layers actually changed
|
|
stale_layers: list[str] = []
|
|
for layer in requested:
|
|
agent_ver = since_layer_versions.get(layer)
|
|
server_ver = current_layer_versions.get(layer, 0)
|
|
if agent_ver is None or int(agent_ver) < server_ver:
|
|
stale_layers.append(layer)
|
|
|
|
if not stale_layers:
|
|
return {
|
|
"version": current_version,
|
|
"layer_versions": {l: current_layer_versions.get(l, 0) for l in requested},
|
|
"changed": False,
|
|
"layers": {},
|
|
"requested_layers": requested,
|
|
"missing_layers": [],
|
|
"truncated": {},
|
|
}
|
|
# Only serialize the stale layers
|
|
requested_to_serialize = stale_layers
|
|
else:
|
|
# --- Global incremental (legacy fallback) ---
|
|
if since_version is not None:
|
|
try:
|
|
requested_version = int(since_version)
|
|
except (TypeError, ValueError):
|
|
requested_version = -1
|
|
if requested_version == current_version:
|
|
return {
|
|
"version": current_version,
|
|
"layer_versions": {l: current_layer_versions.get(l, 0) for l in requested},
|
|
"changed": False,
|
|
"layers": {},
|
|
"requested_layers": requested,
|
|
"missing_layers": [],
|
|
"truncated": {},
|
|
}
|
|
requested_to_serialize = requested
|
|
|
|
if not requested:
|
|
return {
|
|
"version": current_version,
|
|
"layer_versions": current_layer_versions,
|
|
"changed": True,
|
|
"layers": {},
|
|
"requested_layers": [],
|
|
"missing_layers": list(layers or []),
|
|
"available_layers": sorted(available_layers),
|
|
"truncated": {},
|
|
}
|
|
|
|
snap = get_latest_data_subset_refs(*requested_to_serialize)
|
|
result: dict[str, Any] = {}
|
|
truncated: dict[str, int] = {}
|
|
for layer in requested_to_serialize:
|
|
value = snap.get(layer)
|
|
if isinstance(value, list):
|
|
if limit_per_layer is None:
|
|
result[layer] = list(value)
|
|
else:
|
|
result[layer] = list(value[:limit_per_layer])
|
|
if len(value) > limit_per_layer:
|
|
truncated[layer] = len(value) - limit_per_layer
|
|
continue
|
|
if isinstance(value, dict):
|
|
compact: dict[str, Any] = {}
|
|
for key, item in value.items():
|
|
if isinstance(item, list):
|
|
if limit_per_layer is None:
|
|
compact[key] = list(item)
|
|
else:
|
|
compact[key] = list(item[:limit_per_layer])
|
|
if len(item) > limit_per_layer:
|
|
truncated[f"{layer}.{key}"] = len(item) - limit_per_layer
|
|
else:
|
|
compact[key] = item
|
|
result[layer] = compact
|
|
continue
|
|
result[layer] = value
|
|
|
|
missing = [
|
|
layer for layer in layers or []
|
|
if _LAYER_ALIASES.get(_norm_key(layer), _norm_key(layer)) not in requested
|
|
]
|
|
return {
|
|
"version": current_version,
|
|
"layer_versions": {l: current_layer_versions.get(l, 0) for l in requested},
|
|
"changed": True,
|
|
"layers": result,
|
|
"requested_layers": requested,
|
|
"missing_layers": missing,
|
|
"available_layers": sorted(available_layers),
|
|
"truncated": truncated,
|
|
}
|
|
|
|
|
|
def entities_near(
|
|
*,
|
|
lat: float,
|
|
lng: float,
|
|
radius_km: float = 50,
|
|
entity_types: list[str] | tuple[str, ...] | None = None,
|
|
limit: int = 25,
|
|
) -> dict[str, Any]:
|
|
"""Return a compact proximity search across selected telemetry layers."""
|
|
center_lat = _coerce_float(lat)
|
|
center_lng = _coerce_float(lng)
|
|
radius = _coerce_float(radius_km)
|
|
if center_lat is None or center_lng is None:
|
|
return {"results": [], "version": get_data_version(), "truncated": False}
|
|
if radius is None:
|
|
radius = 50.0
|
|
radius = max(1.0, min(5000.0, radius))
|
|
limit = _coerce_limit(limit)
|
|
layers = _resolve_layers(
|
|
entity_types,
|
|
_ENTITY_LAYER_ALIASES,
|
|
("tracked_flights", "military_flights", "private_jets", "ships", "uavs", "satellites"),
|
|
)
|
|
snap = get_latest_data_subset_refs(*layers)
|
|
out: list[dict[str, Any]] = []
|
|
|
|
for layer in layers:
|
|
items = snap.get(layer) or []
|
|
if isinstance(items, dict):
|
|
items = items.get("vessels", []) or items.get("items", [])
|
|
if not isinstance(items, list):
|
|
continue
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
item_lat = _coerce_float(item.get("lat") or item.get("latitude"))
|
|
item_lng = _coerce_float(item.get("lng") or item.get("lon") or item.get("longitude"))
|
|
if item_lat is None or item_lng is None:
|
|
continue
|
|
distance = _haversine_km(center_lat, center_lng, item_lat, item_lng)
|
|
if distance > radius:
|
|
continue
|
|
out.append(
|
|
{
|
|
"source_layer": layer,
|
|
"label": item.get("callsign")
|
|
or item.get("flight")
|
|
or item.get("name")
|
|
or item.get("shipName")
|
|
or item.get("title")
|
|
or item.get("id")
|
|
or item.get("norad_id")
|
|
or "",
|
|
"lat": item_lat,
|
|
"lng": item_lng,
|
|
"distance_km": round(distance, 2),
|
|
"type": item.get("type")
|
|
or item.get("shipType")
|
|
or item.get("category")
|
|
or item.get("t")
|
|
or "",
|
|
"id": item.get("icao24")
|
|
or item.get("mmsi")
|
|
or item.get("id")
|
|
or item.get("norad_id")
|
|
or "",
|
|
}
|
|
)
|
|
if len(out) >= limit:
|
|
out.sort(key=lambda entry: entry.get("distance_km", 0))
|
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
|
|
|
out.sort(key=lambda entry: entry.get("distance_km", 0))
|
|
return {"results": out, "version": get_data_version(), "truncated": False}
|