mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-07 18:06:49 +02:00
784 lines
29 KiB
Python
784 lines
29 KiB
Python
"""
|
|
Emergent Intelligence — Cross-layer correlation engine.
|
|
|
|
Scans co-located events across multiple data layers and emits composite
|
|
alerts that no single source could generate alone.
|
|
|
|
Correlation types:
|
|
- RF Anomaly: GPS jamming + internet outage (both required)
|
|
- Military Buildup: Military flights + naval vessels + GDELT conflict events
|
|
- Infrastructure Cascade: Internet outage + KiwiSDR offline in same zone
|
|
- Possible Contradiction: Official denial/statement + infrastructure disruption
|
|
in same region — hypothesis generator, NOT verdict
|
|
"""
|
|
|
|
import logging
|
|
import math
|
|
import re
|
|
from collections import defaultdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Grid cell size in degrees — 1° ≈ 111 km at equator.
|
|
# Tighter than the previous 2° to reduce false co-locations.
|
|
_CELL_SIZE = 1
|
|
|
|
# Quality gates for RF anomaly correlation — only high-confidence inputs.
|
|
# GPS jamming + internet outage overlap in a 111km cell is easily a coincidence
|
|
# (IODA returns ~100 regional outages; GPS NACp dips are common in busy airspace).
|
|
# Only fire when the evidence is strong enough to indicate deliberate RF interference.
|
|
_RF_CORR_MIN_GPS_RATIO = 0.60 # Need strong jamming signal, not marginal NACp dips
|
|
_RF_CORR_MIN_OUTAGE_PCT = 40 # Need a serious outage, not routine BGP fluctuation
|
|
_RF_CORR_MIN_INDICATORS = 3 # Require 3+ corroborating signals (not just GPS+outage)
|
|
|
|
|
|
def _cell_key(lat: float, lng: float) -> str:
|
|
"""Convert lat/lng to a grid cell key."""
|
|
clat = int(lat // _CELL_SIZE) * _CELL_SIZE
|
|
clng = int(lng // _CELL_SIZE) * _CELL_SIZE
|
|
return f"{clat},{clng}"
|
|
|
|
|
|
def _cell_center(key: str) -> tuple[float, float]:
|
|
"""Get center lat/lng from a cell key."""
|
|
parts = key.split(",")
|
|
return float(parts[0]) + _CELL_SIZE / 2, float(parts[1]) + _CELL_SIZE / 2
|
|
|
|
|
|
def _severity(indicator_count: int) -> str:
|
|
if indicator_count >= 3:
|
|
return "high"
|
|
if indicator_count >= 2:
|
|
return "medium"
|
|
return "low"
|
|
|
|
|
|
def _severity_score(sev: str) -> float:
|
|
return {"high": 90, "medium": 60, "low": 30}.get(sev, 0)
|
|
|
|
|
|
def _outage_pct(outage: dict) -> float:
|
|
"""Extract outage severity percentage from an outage dict."""
|
|
return float(outage.get("severity", 0) or outage.get("severity_pct", 0) or 0)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# RF Anomaly: GPS jamming + internet outage (both must be present)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _detect_rf_anomalies(data: dict) -> list[dict]:
|
|
gps_jamming = data.get("gps_jamming") or []
|
|
internet_outages = data.get("internet_outages") or []
|
|
|
|
if not gps_jamming:
|
|
return [] # No GPS jamming → no RF anomalies possible
|
|
|
|
# Build grid of indicators
|
|
cells: dict[str, dict] = defaultdict(lambda: {
|
|
"gps_jam": False, "gps_ratio": 0.0,
|
|
"outage": False, "outage_pct": 0.0,
|
|
})
|
|
|
|
for z in gps_jamming:
|
|
lat, lng = z.get("lat"), z.get("lng")
|
|
if lat is None or lng is None:
|
|
continue
|
|
ratio = z.get("ratio", 0)
|
|
if ratio < _RF_CORR_MIN_GPS_RATIO:
|
|
continue # Skip marginal jamming zones
|
|
key = _cell_key(lat, lng)
|
|
cells[key]["gps_jam"] = True
|
|
cells[key]["gps_ratio"] = max(cells[key]["gps_ratio"], ratio)
|
|
|
|
for o in internet_outages:
|
|
lat = o.get("lat") or o.get("latitude")
|
|
lng = o.get("lng") or o.get("lon") or o.get("longitude")
|
|
if lat is None or lng is None:
|
|
continue
|
|
pct = _outage_pct(o)
|
|
if pct < _RF_CORR_MIN_OUTAGE_PCT:
|
|
continue # Skip minor outages (ISP maintenance noise)
|
|
key = _cell_key(float(lat), float(lng))
|
|
cells[key]["outage"] = True
|
|
cells[key]["outage_pct"] = max(cells[key]["outage_pct"], pct)
|
|
|
|
# PSK Reporter: presence = healthy RF. Only used as a bonus indicator,
|
|
# NOT as a standalone trigger (absence is normal in most cells).
|
|
psk_reporter = data.get("psk_reporter") or []
|
|
psk_cells: set[str] = set()
|
|
for s in psk_reporter:
|
|
lat, lng = s.get("lat"), s.get("lon")
|
|
if lat is not None and lng is not None:
|
|
psk_cells.add(_cell_key(lat, lng))
|
|
|
|
# When PSK data is unavailable, we can't get a 3rd indicator, so require
|
|
# an even higher GPS jamming ratio to compensate (real EW shows 75%+).
|
|
psk_available = len(psk_reporter) > 0
|
|
|
|
alerts: list[dict] = []
|
|
for key, c in cells.items():
|
|
# GPS jamming is the anchor — required for every RF anomaly alert
|
|
if not c["gps_jam"]:
|
|
continue
|
|
if not c["outage"]:
|
|
continue # Both GPS jamming AND outage are always required
|
|
|
|
indicators = 2 # GPS jamming + outage
|
|
drivers: list[str] = [f"GPS jamming {int(c['gps_ratio'] * 100)}%"]
|
|
pct = c["outage_pct"]
|
|
drivers.append(f"Internet outage{f' {pct:.0f}%' if pct else ''}")
|
|
|
|
# PSK absence confirms RF environment is disrupted
|
|
if psk_available and key not in psk_cells:
|
|
indicators += 1
|
|
drivers.append("No HF digital activity (PSK Reporter)")
|
|
|
|
if indicators < _RF_CORR_MIN_INDICATORS:
|
|
# Without PSK data, only allow through if GPS ratio is extreme
|
|
# (75%+ indicates deliberate, sustained jamming — not noise)
|
|
if not psk_available and c["gps_ratio"] >= 0.75 and pct >= 50:
|
|
pass # Allow this high-confidence 2-indicator alert through
|
|
else:
|
|
continue
|
|
|
|
lat, lng = _cell_center(key)
|
|
sev = _severity(indicators)
|
|
alerts.append({
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"type": "rf_anomaly",
|
|
"severity": sev,
|
|
"score": _severity_score(sev),
|
|
"drivers": drivers[:3],
|
|
"cell_size": _CELL_SIZE,
|
|
})
|
|
|
|
return alerts
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Military Buildup: flights + ships + GDELT conflict
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _detect_military_buildups(data: dict) -> list[dict]:
|
|
mil_flights = data.get("military_flights") or []
|
|
ships = data.get("ships") or []
|
|
gdelt = data.get("gdelt") or []
|
|
|
|
cells: dict[str, dict] = defaultdict(lambda: {
|
|
"mil_flights": 0, "mil_ships": 0, "gdelt_events": 0,
|
|
})
|
|
|
|
for f in mil_flights:
|
|
lat = f.get("lat") or f.get("latitude")
|
|
lng = f.get("lng") or f.get("lon") or f.get("longitude")
|
|
if lat is None or lng is None:
|
|
continue
|
|
try:
|
|
key = _cell_key(float(lat), float(lng))
|
|
cells[key]["mil_flights"] += 1
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
mil_ship_types = {"military_vessel", "military", "warship", "patrol", "destroyer",
|
|
"frigate", "corvette", "carrier", "submarine", "cruiser"}
|
|
for s in ships:
|
|
stype = (s.get("type") or s.get("ship_type") or "").lower()
|
|
if not any(mt in stype for mt in mil_ship_types):
|
|
continue
|
|
lat = s.get("lat") or s.get("latitude")
|
|
lng = s.get("lng") or s.get("lon") or s.get("longitude")
|
|
if lat is None or lng is None:
|
|
continue
|
|
try:
|
|
key = _cell_key(float(lat), float(lng))
|
|
cells[key]["mil_ships"] += 1
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
for g in gdelt:
|
|
lat = g.get("lat") or g.get("latitude") or g.get("actionGeo_Lat")
|
|
lng = g.get("lng") or g.get("lon") or g.get("longitude") or g.get("actionGeo_Long")
|
|
if lat is None or lng is None:
|
|
continue
|
|
try:
|
|
key = _cell_key(float(lat), float(lng))
|
|
cells[key]["gdelt_events"] += 1
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
alerts: list[dict] = []
|
|
for key, c in cells.items():
|
|
mil_total = c["mil_flights"] + c["mil_ships"]
|
|
has_gdelt = c["gdelt_events"] > 0
|
|
|
|
# Need meaningful military presence AND a conflict indicator
|
|
if mil_total < 3 or not has_gdelt:
|
|
continue
|
|
|
|
drivers: list[str] = []
|
|
if c["mil_flights"]:
|
|
drivers.append(f"{c['mil_flights']} military aircraft")
|
|
if c["mil_ships"]:
|
|
drivers.append(f"{c['mil_ships']} military vessels")
|
|
if c["gdelt_events"]:
|
|
drivers.append(f"{c['gdelt_events']} conflict events")
|
|
|
|
if mil_total >= 11:
|
|
sev = "high"
|
|
elif mil_total >= 6:
|
|
sev = "medium"
|
|
else:
|
|
sev = "low"
|
|
|
|
lat, lng = _cell_center(key)
|
|
alerts.append({
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"type": "military_buildup",
|
|
"severity": sev,
|
|
"score": _severity_score(sev),
|
|
"drivers": drivers[:3],
|
|
"cell_size": _CELL_SIZE,
|
|
})
|
|
|
|
return alerts
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Infrastructure Cascade: outage + KiwiSDR co-location
|
|
#
|
|
# Power plants are removed from this detector — with 35K plants globally,
|
|
# virtually every 2° cell contains one, making every outage a false hit.
|
|
# KiwiSDR receivers (~300 worldwide) are sparse enough to be meaningful:
|
|
# an outage in the same cell as a KiwiSDR indicates real infrastructure
|
|
# disruption affecting radio monitoring capability.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _detect_infra_cascades(data: dict) -> list[dict]:
|
|
internet_outages = data.get("internet_outages") or []
|
|
kiwisdr = data.get("kiwisdr") or []
|
|
|
|
if not kiwisdr:
|
|
return []
|
|
|
|
# Build set of cells with KiwiSDR receivers
|
|
kiwi_cells: set[str] = set()
|
|
for k in kiwisdr:
|
|
lat, lng = k.get("lat"), k.get("lon") or k.get("lng")
|
|
if lat is not None and lng is not None:
|
|
try:
|
|
kiwi_cells.add(_cell_key(float(lat), float(lng)))
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
if not kiwi_cells:
|
|
return []
|
|
|
|
alerts: list[dict] = []
|
|
for o in internet_outages:
|
|
lat = o.get("lat") or o.get("latitude")
|
|
lng = o.get("lng") or o.get("lon") or o.get("longitude")
|
|
if lat is None or lng is None:
|
|
continue
|
|
try:
|
|
key = _cell_key(float(lat), float(lng))
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
if key not in kiwi_cells:
|
|
continue
|
|
|
|
pct = _outage_pct(o)
|
|
drivers = [f"Internet outage{f' {pct:.0f}%' if pct else ''}",
|
|
"KiwiSDR receivers in affected zone"]
|
|
|
|
lat_c, lng_c = _cell_center(key)
|
|
alerts.append({
|
|
"lat": lat_c,
|
|
"lng": lng_c,
|
|
"type": "infra_cascade",
|
|
"severity": "medium",
|
|
"score": _severity_score("medium"),
|
|
"drivers": drivers,
|
|
"cell_size": _CELL_SIZE,
|
|
})
|
|
|
|
return alerts
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Possible Contradiction: official denial/statement + infra disruption
|
|
#
|
|
# This is a HYPOTHESIS GENERATOR, not a verdict engine. It says "LOOK HERE"
|
|
# when an official statement (denial, clarification, refusal) co-locates with
|
|
# infrastructure disruption (internet outage, sigint change). The human or
|
|
# higher-order reasoning decides what actually happened.
|
|
#
|
|
# Context ratings:
|
|
# STRONG — denial + outage + prediction market movement in same region
|
|
# MODERATE — denial + outage (no market signal)
|
|
# WEAK — denial + minor outage or distant co-location
|
|
# DETECTION_GAP — denial found but NO telemetry to verify (equally valuable)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Denial / official-statement patterns in headlines and URL slugs
|
|
_DENIAL_PATTERNS = [
|
|
re.compile(p, re.IGNORECASE) for p in [
|
|
r"\bden(?:y|ies|ied|ial)\b",
|
|
r"\brefut(?:e[ds]?|ing)\b",
|
|
r"\breject(?:s|ed|ing)?\b",
|
|
r"\bclarif(?:y|ies|ied|ication)\b",
|
|
r"\bdismiss(?:es|ed|ing)?\b",
|
|
r"\bno\s+attack\b",
|
|
r"\bdid\s+not\s+(?:attack|strike|bomb|target|order|invade|kill)\b",
|
|
r"\bnever\s+(?:attack|strike|bomb|target|order|invade|happen)\b",
|
|
r"\bfalse\s+(?:report|claim|allegation|rumor|narrative)\b",
|
|
r"\bmisinformation\b",
|
|
r"\bdisinformation\b",
|
|
r"\bpropaganda\b",
|
|
r"\b(?:army|military|government|ministry|official)\s+(?:says|clarifies|denies|refutes)\b",
|
|
r"\brumor[s]?\b.*\buntrue\b",
|
|
r"\bcategorically\b",
|
|
r"\bbaseless\b",
|
|
]
|
|
]
|
|
|
|
# Broader cell radius for sparse telemetry regions (Africa, Central Asia, etc.)
|
|
# These regions have fewer IODA/RIPE probes so outage data is sparser
|
|
_SPARSE_REGIONS_LAT_RANGES = [
|
|
(-35, 37), # Africa roughly
|
|
(25, 50), # Central Asia band (when lng 40-90)
|
|
]
|
|
|
|
|
|
def _is_sparse_region(lat: float, lng: float) -> bool:
|
|
"""Check if coordinates fall in a region with sparse telemetry coverage."""
|
|
# Africa
|
|
if -35 <= lat <= 37 and -20 <= lng <= 55:
|
|
return True
|
|
# Central Asia
|
|
if 25 <= lat <= 50 and 40 <= lng <= 90:
|
|
return True
|
|
# South America interior
|
|
if -55 <= lat <= 12 and -80 <= lng <= -35:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
|
"""Great-circle distance in km."""
|
|
R = 6371.0
|
|
dlat = math.radians(lat2 - lat1)
|
|
dlon = math.radians(lon2 - lon1)
|
|
a = (math.sin(dlat / 2) ** 2 +
|
|
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
|
math.sin(dlon / 2) ** 2)
|
|
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
|
|
|
|
|
def _matches_denial(text: str) -> bool:
|
|
"""Check if text matches any denial/official-statement pattern."""
|
|
return any(p.search(text) for p in _DENIAL_PATTERNS)
|
|
|
|
|
|
def _detect_contradictions(data: dict) -> list[dict]:
|
|
"""Detect possible contradictions between official statements and telemetry.
|
|
|
|
Scans GDELT headlines for denial language, then checks whether internet
|
|
outages or other infrastructure disruptions exist in the same geographic
|
|
region. Scores confidence and lists alternative explanations.
|
|
"""
|
|
gdelt = data.get("gdelt") or []
|
|
internet_outages = data.get("internet_outages") or []
|
|
news = data.get("news") or []
|
|
prediction_markets = data.get("prediction_markets") or []
|
|
|
|
# ── Step 1: Find GDELT events with denial/official-statement language ──
|
|
denial_events: list[dict] = []
|
|
|
|
# GDELT comes as GeoJSON features
|
|
gdelt_features = gdelt
|
|
if isinstance(gdelt, dict):
|
|
gdelt_features = gdelt.get("features", [])
|
|
|
|
for feature in gdelt_features:
|
|
# Handle both GeoJSON features and flat dicts
|
|
if "properties" in feature and "geometry" in feature:
|
|
props = feature.get("properties", {})
|
|
geom = feature.get("geometry", {})
|
|
coords = geom.get("coordinates", [])
|
|
if len(coords) >= 2:
|
|
lng, lat = float(coords[0]), float(coords[1])
|
|
else:
|
|
continue
|
|
headlines = props.get("_headlines_list", [])
|
|
urls = props.get("_urls_list", [])
|
|
name = props.get("name", "")
|
|
count = props.get("count", 1)
|
|
else:
|
|
lat = feature.get("lat") or feature.get("actionGeo_Lat")
|
|
lng = feature.get("lng") or feature.get("lon") or feature.get("actionGeo_Long")
|
|
if lat is None or lng is None:
|
|
continue
|
|
lat, lng = float(lat), float(lng)
|
|
headlines = [feature.get("title", "")]
|
|
urls = [feature.get("sourceurl", "")]
|
|
name = feature.get("name", "")
|
|
count = 1
|
|
|
|
# Check all headlines + URL slugs for denial patterns
|
|
all_text = " ".join(str(h) for h in headlines if h)
|
|
all_text += " " + " ".join(str(u) for u in urls if u)
|
|
|
|
if _matches_denial(all_text):
|
|
denial_events.append({
|
|
"lat": lat,
|
|
"lng": lng,
|
|
"headlines": [h for h in headlines if h][:5],
|
|
"urls": [u for u in urls if u][:3],
|
|
"location_name": name,
|
|
"event_count": count,
|
|
})
|
|
|
|
# Also scan news articles for denial language
|
|
for article in news:
|
|
title = str(article.get("title", "") or "")
|
|
desc = str(article.get("description", "") or article.get("summary", "") or "")
|
|
if not _matches_denial(title + " " + desc):
|
|
continue
|
|
# News articles often lack coordinates — try to match to GDELT locations
|
|
# For now, only include if we have coordinates
|
|
lat = article.get("lat") or article.get("latitude")
|
|
lng = article.get("lng") or article.get("lon") or article.get("longitude")
|
|
if lat is not None and lng is not None:
|
|
denial_events.append({
|
|
"lat": float(lat),
|
|
"lng": float(lng),
|
|
"headlines": [title],
|
|
"urls": [article.get("url") or article.get("link") or ""],
|
|
"location_name": "",
|
|
"event_count": 1,
|
|
})
|
|
|
|
if not denial_events:
|
|
return []
|
|
|
|
# ── Step 2: Cross-reference with internet outages ──
|
|
alerts: list[dict] = []
|
|
|
|
for denial in denial_events:
|
|
d_lat, d_lng = denial["lat"], denial["lng"]
|
|
sparse = _is_sparse_region(d_lat, d_lng)
|
|
search_radius_km = 1500.0 if sparse else 500.0
|
|
|
|
# Find nearby outages
|
|
nearby_outages: list[dict] = []
|
|
for outage in internet_outages:
|
|
o_lat = outage.get("lat") or outage.get("latitude")
|
|
o_lng = outage.get("lng") or outage.get("lon") or outage.get("longitude")
|
|
if o_lat is None or o_lng is None:
|
|
continue
|
|
try:
|
|
dist = _haversine_km(d_lat, d_lng, float(o_lat), float(o_lng))
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if dist <= search_radius_km:
|
|
nearby_outages.append({
|
|
"region": outage.get("region_name") or outage.get("country_name", ""),
|
|
"severity": _outage_pct(outage),
|
|
"distance_km": round(dist, 0),
|
|
"level": outage.get("level", ""),
|
|
})
|
|
|
|
# ── Step 3: Check prediction markets for related movements ──
|
|
denial_text = " ".join(denial["headlines"]).lower()
|
|
related_markets: list[dict] = []
|
|
for market in prediction_markets:
|
|
m_title = str(market.get("title", "") or market.get("question", "") or "").lower()
|
|
# Look for keyword overlap between denial and market
|
|
denial_words = set(re.findall(r"[a-z]{4,}", denial_text))
|
|
market_words = set(re.findall(r"[a-z]{4,}", m_title))
|
|
overlap = denial_words & market_words - {"that", "this", "with", "from", "have", "been", "were", "will", "says", "said"}
|
|
if len(overlap) >= 2:
|
|
prob = market.get("probability") or market.get("lastTradePrice") or market.get("yes_price")
|
|
if prob is not None:
|
|
related_markets.append({
|
|
"title": market.get("title") or market.get("question"),
|
|
"probability": float(prob),
|
|
})
|
|
|
|
# ── Step 4: Score confidence and assign context rating ──
|
|
indicators = 1 # denial itself
|
|
drivers: list[str] = []
|
|
|
|
# Primary driver: the denial headline
|
|
headline_display = denial["headlines"][0] if denial["headlines"] else "Official statement"
|
|
if len(headline_display) > 80:
|
|
headline_display = headline_display[:77] + "..."
|
|
drivers.append(f'"{headline_display}"')
|
|
|
|
# Outage co-location
|
|
has_outage = False
|
|
if nearby_outages:
|
|
best_outage = max(nearby_outages, key=lambda o: o["severity"])
|
|
if best_outage["severity"] >= 10:
|
|
indicators += 1
|
|
has_outage = True
|
|
drivers.append(
|
|
f"Internet outage {best_outage['severity']:.0f}% "
|
|
f"({best_outage['region']}, {best_outage['distance_km']:.0f}km away)"
|
|
)
|
|
elif best_outage["severity"] > 0:
|
|
indicators += 0.5 # minor outage, partial indicator
|
|
has_outage = True
|
|
drivers.append(
|
|
f"Minor outage ({best_outage['region']}, "
|
|
f"{best_outage['distance_km']:.0f}km away)"
|
|
)
|
|
|
|
# Prediction market signal
|
|
has_market = False
|
|
if related_markets:
|
|
indicators += 1
|
|
has_market = True
|
|
top_market = related_markets[0]
|
|
drivers.append(
|
|
f"Market: \"{top_market['title'][:50]}\" "
|
|
f"at {top_market['probability']:.0%}"
|
|
)
|
|
|
|
# Multiple denial sources strengthen the signal
|
|
if denial["event_count"] > 1:
|
|
indicators += 0.5
|
|
drivers.append(f"{denial['event_count']} sources reporting")
|
|
|
|
# Context rating
|
|
if has_outage and has_market:
|
|
context = "STRONG"
|
|
elif has_outage:
|
|
context = "MODERATE"
|
|
elif has_market:
|
|
context = "WEAK" # market signal without infra disruption
|
|
else:
|
|
context = "DETECTION_GAP"
|
|
|
|
# Severity mapping
|
|
if context == "STRONG":
|
|
sev = "high"
|
|
elif context == "MODERATE":
|
|
sev = "medium"
|
|
else:
|
|
sev = "low"
|
|
|
|
# Alternative explanations (always present — this is a hypothesis generator)
|
|
alternatives: list[str] = []
|
|
if has_outage:
|
|
alternatives.append("Routine infrastructure maintenance or cable damage")
|
|
alternatives.append("Weather-related outage coinciding with news cycle")
|
|
if not has_outage and context == "DETECTION_GAP":
|
|
alternatives.append("Statement may be truthful — no contradicting telemetry found")
|
|
alternatives.append("Telemetry coverage gap in this region")
|
|
alternatives.append("Denial may be responding to social media rumors, not real events")
|
|
|
|
lat_c, lng_c = _cell_center(_cell_key(d_lat, d_lng))
|
|
alerts.append({
|
|
"lat": lat_c,
|
|
"lng": lng_c,
|
|
"type": "contradiction",
|
|
"severity": sev,
|
|
"score": _severity_score(sev),
|
|
"drivers": drivers[:4],
|
|
"cell_size": _CELL_SIZE,
|
|
"context": context,
|
|
"alternatives": alternatives[:3],
|
|
"location_name": denial.get("location_name", ""),
|
|
"headlines": denial["headlines"][:3],
|
|
"related_markets": related_markets[:3],
|
|
"nearby_outages": nearby_outages[:5],
|
|
})
|
|
|
|
# Deduplicate: keep highest-scored alert per cell
|
|
seen_cells: dict[str, dict] = {}
|
|
for alert in alerts:
|
|
key = _cell_key(alert["lat"], alert["lng"])
|
|
if key not in seen_cells or alert["score"] > seen_cells[key]["score"]:
|
|
seen_cells[key] = alert
|
|
|
|
result = list(seen_cells.values())
|
|
if result:
|
|
by_context = defaultdict(int)
|
|
for a in result:
|
|
by_context[a["context"]] += 1
|
|
logger.info(
|
|
"Contradictions: %d possible (%s)",
|
|
len(result),
|
|
", ".join(f"{v} {k}" for k, v in sorted(by_context.items())),
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Correlation → Pin bridge
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Types and their pin categories
|
|
_CORR_PIN_CATEGORIES = {
|
|
"rf_anomaly": "anomaly",
|
|
"military_buildup": "military",
|
|
"infra_cascade": "infrastructure",
|
|
"contradiction": "research",
|
|
}
|
|
|
|
# Deduplicate: don't re-pin the same cell within this window (seconds).
|
|
_CORR_PIN_DEDUP_WINDOW = 600 # 10 minutes
|
|
_recent_corr_pins: dict[str, float] = {}
|
|
|
|
|
|
def _auto_pin_correlations(alerts: list[dict]) -> int:
|
|
"""Create AI Intel pins for high-severity correlation alerts.
|
|
|
|
Only pins alerts with severity >= medium. Uses cell-key dedup so the
|
|
same grid cell doesn't get re-pinned every fetch cycle.
|
|
|
|
Returns the number of pins created this cycle.
|
|
"""
|
|
import time as _time
|
|
|
|
now = _time.time()
|
|
|
|
# Evict stale dedup entries
|
|
expired = [k for k, ts in _recent_corr_pins.items() if now - ts > _CORR_PIN_DEDUP_WINDOW]
|
|
for k in expired:
|
|
_recent_corr_pins.pop(k, None)
|
|
|
|
created = 0
|
|
for alert in alerts:
|
|
sev = alert.get("severity", "low")
|
|
if sev == "low":
|
|
continue # Don't pin low-severity noise
|
|
|
|
lat = alert.get("lat")
|
|
lng = alert.get("lng")
|
|
if lat is None or lng is None:
|
|
continue
|
|
|
|
# Dedup key: type + cell
|
|
dedup_key = f"{alert['type']}:{_cell_key(lat, lng)}"
|
|
if dedup_key in _recent_corr_pins:
|
|
continue
|
|
|
|
category = _CORR_PIN_CATEGORIES.get(alert["type"], "anomaly")
|
|
drivers = alert.get("drivers", [])
|
|
atype = alert["type"]
|
|
|
|
if atype == "contradiction":
|
|
ctx = alert.get("context", "")
|
|
label = f"[{ctx}] Possible Contradiction"
|
|
parts = list(drivers)
|
|
if alert.get("alternatives"):
|
|
parts.append("Alternatives: " + "; ".join(alert["alternatives"][:2]))
|
|
description = " | ".join(parts) if parts else "Narrative contradiction detected"
|
|
else:
|
|
label = f"[{sev.upper()}] {atype.replace('_', ' ').title()}"
|
|
description = "; ".join(drivers) if drivers else "Multi-layer correlation alert"
|
|
|
|
try:
|
|
from services.ai_pin_store import create_pin
|
|
|
|
meta = {
|
|
"correlation_type": atype,
|
|
"severity": sev,
|
|
"drivers": drivers,
|
|
"cell_size": alert.get("cell_size", _CELL_SIZE),
|
|
}
|
|
# Add contradiction-specific metadata
|
|
if atype == "contradiction":
|
|
meta["context_rating"] = alert.get("context", "")
|
|
meta["alternatives"] = alert.get("alternatives", [])
|
|
meta["headlines"] = alert.get("headlines", [])
|
|
meta["location_name"] = alert.get("location_name", "")
|
|
if alert.get("related_markets"):
|
|
meta["related_markets"] = alert["related_markets"]
|
|
|
|
create_pin(
|
|
lat=lat,
|
|
lng=lng,
|
|
label=label,
|
|
category=category,
|
|
description=description,
|
|
source="correlation_engine",
|
|
confidence=alert.get("score", 60) / 100.0,
|
|
ttl_hours=2.0, # Auto-expire correlation pins after 2 hours
|
|
metadata=meta,
|
|
)
|
|
_recent_corr_pins[dedup_key] = now
|
|
created += 1
|
|
except Exception as exc:
|
|
logger.warning("Failed to auto-pin correlation: %s", exc)
|
|
|
|
if created:
|
|
logger.info("Correlation engine auto-pinned %d alerts", created)
|
|
return created
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def compute_correlations(data: dict) -> list[dict]:
|
|
"""Run all correlation detectors and return merged alert list."""
|
|
alerts: list[dict] = []
|
|
|
|
try:
|
|
alerts.extend(_detect_rf_anomalies(data))
|
|
except Exception as e:
|
|
logger.error("Correlation engine RF anomaly error: %s", e)
|
|
|
|
try:
|
|
alerts.extend(_detect_military_buildups(data))
|
|
except Exception as e:
|
|
logger.error("Correlation engine military buildup error: %s", e)
|
|
|
|
try:
|
|
alerts.extend(_detect_infra_cascades(data))
|
|
except Exception as e:
|
|
logger.error("Correlation engine infra cascade error: %s", e)
|
|
|
|
# Contradiction detection removed from automated engine — too many false
|
|
# positives from regex headline matching. Contradiction/analysis alerts are
|
|
# now placed by OpenClaw agents via place_analysis_zone, which lets an LLM
|
|
# reason about the evidence rather than pattern-matching keywords.
|
|
try:
|
|
from services.analysis_zone_store import get_live_zones
|
|
alerts.extend(get_live_zones())
|
|
except Exception as e:
|
|
logger.error("Analysis zone merge error: %s", e)
|
|
|
|
rf = sum(1 for a in alerts if a["type"] == "rf_anomaly")
|
|
mil = sum(1 for a in alerts if a["type"] == "military_buildup")
|
|
infra = sum(1 for a in alerts if a["type"] == "infra_cascade")
|
|
contra = sum(1 for a in alerts if a["type"] == "contradiction")
|
|
if alerts:
|
|
logger.info(
|
|
"Correlations: %d alerts (%d rf, %d mil, %d infra, %d contra)",
|
|
len(alerts), rf, mil, infra, contra,
|
|
)
|
|
|
|
# Correlation alerts are returned in the correlations data feed only.
|
|
# They are NOT auto-pinned to AI Intel — that layer is reserved for
|
|
# user / OpenClaw pins. Correlations are visualised via the dedicated
|
|
# correlations overlay on the map.
|
|
|
|
return alerts
|