""" Emergent Intelligence — Cross-layer correlation engine. Scans co-located events across multiple data layers and emits composite alerts that no single source could generate alone. Correlation types: - RF Anomaly: GPS jamming + internet outage (both required) - Military Buildup: Military flights + naval vessels + GDELT conflict events - Infrastructure Cascade: Internet outage + KiwiSDR offline in same zone - Possible Contradiction: Official denial/statement + infrastructure disruption in same region — hypothesis generator, NOT verdict """ import logging import math import re from collections import defaultdict logger = logging.getLogger(__name__) # Grid cell size in degrees — 1° ≈ 111 km at equator. # Tighter than the previous 2° to reduce false co-locations. _CELL_SIZE = 1 # Quality gates for RF anomaly correlation — only high-confidence inputs. # GPS jamming + internet outage overlap in a 111km cell is easily a coincidence # (IODA returns ~100 regional outages; GPS NACp dips are common in busy airspace). # Only fire when the evidence is strong enough to indicate deliberate RF interference. _RF_CORR_MIN_GPS_RATIO = 0.60 # Need strong jamming signal, not marginal NACp dips _RF_CORR_MIN_OUTAGE_PCT = 40 # Need a serious outage, not routine BGP fluctuation _RF_CORR_MIN_INDICATORS = 3 # Require 3+ corroborating signals (not just GPS+outage) def _cell_key(lat: float, lng: float) -> str: """Convert lat/lng to a grid cell key.""" clat = int(lat // _CELL_SIZE) * _CELL_SIZE clng = int(lng // _CELL_SIZE) * _CELL_SIZE return f"{clat},{clng}" def _cell_center(key: str) -> tuple[float, float]: """Get center lat/lng from a cell key.""" parts = key.split(",") return float(parts[0]) + _CELL_SIZE / 2, float(parts[1]) + _CELL_SIZE / 2 def _severity(indicator_count: int) -> str: if indicator_count >= 3: return "high" if indicator_count >= 2: return "medium" return "low" def _severity_score(sev: str) -> float: return {"high": 90, "medium": 60, "low": 30}.get(sev, 0) def _outage_pct(outage: dict) -> float: """Extract outage severity percentage from an outage dict.""" return float(outage.get("severity", 0) or outage.get("severity_pct", 0) or 0) # --------------------------------------------------------------------------- # RF Anomaly: GPS jamming + internet outage (both must be present) # --------------------------------------------------------------------------- def _detect_rf_anomalies(data: dict) -> list[dict]: gps_jamming = data.get("gps_jamming") or [] internet_outages = data.get("internet_outages") or [] if not gps_jamming: return [] # No GPS jamming → no RF anomalies possible # Build grid of indicators cells: dict[str, dict] = defaultdict(lambda: { "gps_jam": False, "gps_ratio": 0.0, "outage": False, "outage_pct": 0.0, }) for z in gps_jamming: lat, lng = z.get("lat"), z.get("lng") if lat is None or lng is None: continue ratio = z.get("ratio", 0) if ratio < _RF_CORR_MIN_GPS_RATIO: continue # Skip marginal jamming zones key = _cell_key(lat, lng) cells[key]["gps_jam"] = True cells[key]["gps_ratio"] = max(cells[key]["gps_ratio"], ratio) for o in internet_outages: lat = o.get("lat") or o.get("latitude") lng = o.get("lng") or o.get("lon") or o.get("longitude") if lat is None or lng is None: continue pct = _outage_pct(o) if pct < _RF_CORR_MIN_OUTAGE_PCT: continue # Skip minor outages (ISP maintenance noise) key = _cell_key(float(lat), float(lng)) cells[key]["outage"] = True cells[key]["outage_pct"] = max(cells[key]["outage_pct"], pct) # PSK Reporter: presence = healthy RF. Only used as a bonus indicator, # NOT as a standalone trigger (absence is normal in most cells). psk_reporter = data.get("psk_reporter") or [] psk_cells: set[str] = set() for s in psk_reporter: lat, lng = s.get("lat"), s.get("lon") if lat is not None and lng is not None: psk_cells.add(_cell_key(lat, lng)) # When PSK data is unavailable, we can't get a 3rd indicator, so require # an even higher GPS jamming ratio to compensate (real EW shows 75%+). psk_available = len(psk_reporter) > 0 alerts: list[dict] = [] for key, c in cells.items(): # GPS jamming is the anchor — required for every RF anomaly alert if not c["gps_jam"]: continue if not c["outage"]: continue # Both GPS jamming AND outage are always required indicators = 2 # GPS jamming + outage drivers: list[str] = [f"GPS jamming {int(c['gps_ratio'] * 100)}%"] pct = c["outage_pct"] drivers.append(f"Internet outage{f' {pct:.0f}%' if pct else ''}") # PSK absence confirms RF environment is disrupted if psk_available and key not in psk_cells: indicators += 1 drivers.append("No HF digital activity (PSK Reporter)") if indicators < _RF_CORR_MIN_INDICATORS: # Without PSK data, only allow through if GPS ratio is extreme # (75%+ indicates deliberate, sustained jamming — not noise) if not psk_available and c["gps_ratio"] >= 0.75 and pct >= 50: pass # Allow this high-confidence 2-indicator alert through else: continue lat, lng = _cell_center(key) sev = _severity(indicators) alerts.append({ "lat": lat, "lng": lng, "type": "rf_anomaly", "severity": sev, "score": _severity_score(sev), "drivers": drivers[:3], "cell_size": _CELL_SIZE, }) return alerts # --------------------------------------------------------------------------- # Military Buildup: flights + ships + GDELT conflict # --------------------------------------------------------------------------- def _detect_military_buildups(data: dict) -> list[dict]: mil_flights = data.get("military_flights") or [] ships = data.get("ships") or [] gdelt = data.get("gdelt") or [] cells: dict[str, dict] = defaultdict(lambda: { "mil_flights": 0, "mil_ships": 0, "gdelt_events": 0, }) for f in mil_flights: lat = f.get("lat") or f.get("latitude") lng = f.get("lng") or f.get("lon") or f.get("longitude") if lat is None or lng is None: continue try: key = _cell_key(float(lat), float(lng)) cells[key]["mil_flights"] += 1 except (ValueError, TypeError): continue mil_ship_types = {"military_vessel", "military", "warship", "patrol", "destroyer", "frigate", "corvette", "carrier", "submarine", "cruiser"} for s in ships: stype = (s.get("type") or s.get("ship_type") or "").lower() if not any(mt in stype for mt in mil_ship_types): continue lat = s.get("lat") or s.get("latitude") lng = s.get("lng") or s.get("lon") or s.get("longitude") if lat is None or lng is None: continue try: key = _cell_key(float(lat), float(lng)) cells[key]["mil_ships"] += 1 except (ValueError, TypeError): continue for g in gdelt: lat = g.get("lat") or g.get("latitude") or g.get("actionGeo_Lat") lng = g.get("lng") or g.get("lon") or g.get("longitude") or g.get("actionGeo_Long") if lat is None or lng is None: continue try: key = _cell_key(float(lat), float(lng)) cells[key]["gdelt_events"] += 1 except (ValueError, TypeError): continue alerts: list[dict] = [] for key, c in cells.items(): mil_total = c["mil_flights"] + c["mil_ships"] has_gdelt = c["gdelt_events"] > 0 # Need meaningful military presence AND a conflict indicator if mil_total < 3 or not has_gdelt: continue drivers: list[str] = [] if c["mil_flights"]: drivers.append(f"{c['mil_flights']} military aircraft") if c["mil_ships"]: drivers.append(f"{c['mil_ships']} military vessels") if c["gdelt_events"]: drivers.append(f"{c['gdelt_events']} conflict events") if mil_total >= 11: sev = "high" elif mil_total >= 6: sev = "medium" else: sev = "low" lat, lng = _cell_center(key) alerts.append({ "lat": lat, "lng": lng, "type": "military_buildup", "severity": sev, "score": _severity_score(sev), "drivers": drivers[:3], "cell_size": _CELL_SIZE, }) return alerts # --------------------------------------------------------------------------- # Infrastructure Cascade: outage + KiwiSDR co-location # # Power plants are removed from this detector — with 35K plants globally, # virtually every 2° cell contains one, making every outage a false hit. # KiwiSDR receivers (~300 worldwide) are sparse enough to be meaningful: # an outage in the same cell as a KiwiSDR indicates real infrastructure # disruption affecting radio monitoring capability. # --------------------------------------------------------------------------- def _detect_infra_cascades(data: dict) -> list[dict]: internet_outages = data.get("internet_outages") or [] kiwisdr = data.get("kiwisdr") or [] if not kiwisdr: return [] # Build set of cells with KiwiSDR receivers kiwi_cells: set[str] = set() for k in kiwisdr: lat, lng = k.get("lat"), k.get("lon") or k.get("lng") if lat is not None and lng is not None: try: kiwi_cells.add(_cell_key(float(lat), float(lng))) except (ValueError, TypeError): pass if not kiwi_cells: return [] alerts: list[dict] = [] for o in internet_outages: lat = o.get("lat") or o.get("latitude") lng = o.get("lng") or o.get("lon") or o.get("longitude") if lat is None or lng is None: continue try: key = _cell_key(float(lat), float(lng)) except (ValueError, TypeError): continue if key not in kiwi_cells: continue pct = _outage_pct(o) drivers = [f"Internet outage{f' {pct:.0f}%' if pct else ''}", "KiwiSDR receivers in affected zone"] lat_c, lng_c = _cell_center(key) alerts.append({ "lat": lat_c, "lng": lng_c, "type": "infra_cascade", "severity": "medium", "score": _severity_score("medium"), "drivers": drivers, "cell_size": _CELL_SIZE, }) return alerts # --------------------------------------------------------------------------- # Possible Contradiction: official denial/statement + infra disruption # # This is a HYPOTHESIS GENERATOR, not a verdict engine. It says "LOOK HERE" # when an official statement (denial, clarification, refusal) co-locates with # infrastructure disruption (internet outage, sigint change). The human or # higher-order reasoning decides what actually happened. # # Context ratings: # STRONG — denial + outage + prediction market movement in same region # MODERATE — denial + outage (no market signal) # WEAK — denial + minor outage or distant co-location # DETECTION_GAP — denial found but NO telemetry to verify (equally valuable) # --------------------------------------------------------------------------- # Denial / official-statement patterns in headlines and URL slugs _DENIAL_PATTERNS = [ re.compile(p, re.IGNORECASE) for p in [ r"\bden(?:y|ies|ied|ial)\b", r"\brefut(?:e[ds]?|ing)\b", r"\breject(?:s|ed|ing)?\b", r"\bclarif(?:y|ies|ied|ication)\b", r"\bdismiss(?:es|ed|ing)?\b", r"\bno\s+attack\b", r"\bdid\s+not\s+(?:attack|strike|bomb|target|order|invade|kill)\b", r"\bnever\s+(?:attack|strike|bomb|target|order|invade|happen)\b", r"\bfalse\s+(?:report|claim|allegation|rumor|narrative)\b", r"\bmisinformation\b", r"\bdisinformation\b", r"\bpropaganda\b", r"\b(?:army|military|government|ministry|official)\s+(?:says|clarifies|denies|refutes)\b", r"\brumor[s]?\b.*\buntrue\b", r"\bcategorically\b", r"\bbaseless\b", ] ] # Broader cell radius for sparse telemetry regions (Africa, Central Asia, etc.) # These regions have fewer IODA/RIPE probes so outage data is sparser _SPARSE_REGIONS_LAT_RANGES = [ (-35, 37), # Africa roughly (25, 50), # Central Asia band (when lng 40-90) ] def _is_sparse_region(lat: float, lng: float) -> bool: """Check if coordinates fall in a region with sparse telemetry coverage.""" # Africa if -35 <= lat <= 37 and -20 <= lng <= 55: return True # Central Asia if 25 <= lat <= 50 and 40 <= lng <= 90: return True # South America interior if -55 <= lat <= 12 and -80 <= lng <= -35: return True return False def _haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float: """Great-circle distance in km.""" R = 6371.0 dlat = math.radians(lat2 - lat1) dlon = math.radians(lon2 - lon1) a = (math.sin(dlat / 2) ** 2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) ** 2) return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) def _matches_denial(text: str) -> bool: """Check if text matches any denial/official-statement pattern.""" return any(p.search(text) for p in _DENIAL_PATTERNS) def _detect_contradictions(data: dict) -> list[dict]: """Detect possible contradictions between official statements and telemetry. Scans GDELT headlines for denial language, then checks whether internet outages or other infrastructure disruptions exist in the same geographic region. Scores confidence and lists alternative explanations. """ gdelt = data.get("gdelt") or [] internet_outages = data.get("internet_outages") or [] news = data.get("news") or [] prediction_markets = data.get("prediction_markets") or [] # ── Step 1: Find GDELT events with denial/official-statement language ── denial_events: list[dict] = [] # GDELT comes as GeoJSON features gdelt_features = gdelt if isinstance(gdelt, dict): gdelt_features = gdelt.get("features", []) for feature in gdelt_features: # Handle both GeoJSON features and flat dicts if "properties" in feature and "geometry" in feature: props = feature.get("properties", {}) geom = feature.get("geometry", {}) coords = geom.get("coordinates", []) if len(coords) >= 2: lng, lat = float(coords[0]), float(coords[1]) else: continue headlines = props.get("_headlines_list", []) urls = props.get("_urls_list", []) name = props.get("name", "") count = props.get("count", 1) else: lat = feature.get("lat") or feature.get("actionGeo_Lat") lng = feature.get("lng") or feature.get("lon") or feature.get("actionGeo_Long") if lat is None or lng is None: continue lat, lng = float(lat), float(lng) headlines = [feature.get("title", "")] urls = [feature.get("sourceurl", "")] name = feature.get("name", "") count = 1 # Check all headlines + URL slugs for denial patterns all_text = " ".join(str(h) for h in headlines if h) all_text += " " + " ".join(str(u) for u in urls if u) if _matches_denial(all_text): denial_events.append({ "lat": lat, "lng": lng, "headlines": [h for h in headlines if h][:5], "urls": [u for u in urls if u][:3], "location_name": name, "event_count": count, }) # Also scan news articles for denial language for article in news: title = str(article.get("title", "") or "") desc = str(article.get("description", "") or article.get("summary", "") or "") if not _matches_denial(title + " " + desc): continue # News articles often lack coordinates — try to match to GDELT locations # For now, only include if we have coordinates lat = article.get("lat") or article.get("latitude") lng = article.get("lng") or article.get("lon") or article.get("longitude") if lat is not None and lng is not None: denial_events.append({ "lat": float(lat), "lng": float(lng), "headlines": [title], "urls": [article.get("url") or article.get("link") or ""], "location_name": "", "event_count": 1, }) if not denial_events: return [] # ── Step 2: Cross-reference with internet outages ── alerts: list[dict] = [] for denial in denial_events: d_lat, d_lng = denial["lat"], denial["lng"] sparse = _is_sparse_region(d_lat, d_lng) search_radius_km = 1500.0 if sparse else 500.0 # Find nearby outages nearby_outages: list[dict] = [] for outage in internet_outages: o_lat = outage.get("lat") or outage.get("latitude") o_lng = outage.get("lng") or outage.get("lon") or outage.get("longitude") if o_lat is None or o_lng is None: continue try: dist = _haversine_km(d_lat, d_lng, float(o_lat), float(o_lng)) except (ValueError, TypeError): continue if dist <= search_radius_km: nearby_outages.append({ "region": outage.get("region_name") or outage.get("country_name", ""), "severity": _outage_pct(outage), "distance_km": round(dist, 0), "level": outage.get("level", ""), }) # ── Step 3: Check prediction markets for related movements ── denial_text = " ".join(denial["headlines"]).lower() related_markets: list[dict] = [] for market in prediction_markets: m_title = str(market.get("title", "") or market.get("question", "") or "").lower() # Look for keyword overlap between denial and market denial_words = set(re.findall(r"[a-z]{4,}", denial_text)) market_words = set(re.findall(r"[a-z]{4,}", m_title)) overlap = denial_words & market_words - {"that", "this", "with", "from", "have", "been", "were", "will", "says", "said"} if len(overlap) >= 2: prob = market.get("probability") or market.get("lastTradePrice") or market.get("yes_price") if prob is not None: related_markets.append({ "title": market.get("title") or market.get("question"), "probability": float(prob), }) # ── Step 4: Score confidence and assign context rating ── indicators = 1 # denial itself drivers: list[str] = [] # Primary driver: the denial headline headline_display = denial["headlines"][0] if denial["headlines"] else "Official statement" if len(headline_display) > 80: headline_display = headline_display[:77] + "..." drivers.append(f'"{headline_display}"') # Outage co-location has_outage = False if nearby_outages: best_outage = max(nearby_outages, key=lambda o: o["severity"]) if best_outage["severity"] >= 10: indicators += 1 has_outage = True drivers.append( f"Internet outage {best_outage['severity']:.0f}% " f"({best_outage['region']}, {best_outage['distance_km']:.0f}km away)" ) elif best_outage["severity"] > 0: indicators += 0.5 # minor outage, partial indicator has_outage = True drivers.append( f"Minor outage ({best_outage['region']}, " f"{best_outage['distance_km']:.0f}km away)" ) # Prediction market signal has_market = False if related_markets: indicators += 1 has_market = True top_market = related_markets[0] drivers.append( f"Market: \"{top_market['title'][:50]}\" " f"at {top_market['probability']:.0%}" ) # Multiple denial sources strengthen the signal if denial["event_count"] > 1: indicators += 0.5 drivers.append(f"{denial['event_count']} sources reporting") # Context rating if has_outage and has_market: context = "STRONG" elif has_outage: context = "MODERATE" elif has_market: context = "WEAK" # market signal without infra disruption else: context = "DETECTION_GAP" # Severity mapping if context == "STRONG": sev = "high" elif context == "MODERATE": sev = "medium" else: sev = "low" # Alternative explanations (always present — this is a hypothesis generator) alternatives: list[str] = [] if has_outage: alternatives.append("Routine infrastructure maintenance or cable damage") alternatives.append("Weather-related outage coinciding with news cycle") if not has_outage and context == "DETECTION_GAP": alternatives.append("Statement may be truthful — no contradicting telemetry found") alternatives.append("Telemetry coverage gap in this region") alternatives.append("Denial may be responding to social media rumors, not real events") lat_c, lng_c = _cell_center(_cell_key(d_lat, d_lng)) alerts.append({ "lat": lat_c, "lng": lng_c, "type": "contradiction", "severity": sev, "score": _severity_score(sev), "drivers": drivers[:4], "cell_size": _CELL_SIZE, "context": context, "alternatives": alternatives[:3], "location_name": denial.get("location_name", ""), "headlines": denial["headlines"][:3], "related_markets": related_markets[:3], "nearby_outages": nearby_outages[:5], }) # Deduplicate: keep highest-scored alert per cell seen_cells: dict[str, dict] = {} for alert in alerts: key = _cell_key(alert["lat"], alert["lng"]) if key not in seen_cells or alert["score"] > seen_cells[key]["score"]: seen_cells[key] = alert result = list(seen_cells.values()) if result: by_context = defaultdict(int) for a in result: by_context[a["context"]] += 1 logger.info( "Contradictions: %d possible (%s)", len(result), ", ".join(f"{v} {k}" for k, v in sorted(by_context.items())), ) return result # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # Correlation → Pin bridge # --------------------------------------------------------------------------- # Types and their pin categories _CORR_PIN_CATEGORIES = { "rf_anomaly": "anomaly", "military_buildup": "military", "infra_cascade": "infrastructure", "contradiction": "research", } # Deduplicate: don't re-pin the same cell within this window (seconds). _CORR_PIN_DEDUP_WINDOW = 600 # 10 minutes _recent_corr_pins: dict[str, float] = {} def _auto_pin_correlations(alerts: list[dict]) -> int: """Create AI Intel pins for high-severity correlation alerts. Only pins alerts with severity >= medium. Uses cell-key dedup so the same grid cell doesn't get re-pinned every fetch cycle. Returns the number of pins created this cycle. """ import time as _time now = _time.time() # Evict stale dedup entries expired = [k for k, ts in _recent_corr_pins.items() if now - ts > _CORR_PIN_DEDUP_WINDOW] for k in expired: _recent_corr_pins.pop(k, None) created = 0 for alert in alerts: sev = alert.get("severity", "low") if sev == "low": continue # Don't pin low-severity noise lat = alert.get("lat") lng = alert.get("lng") if lat is None or lng is None: continue # Dedup key: type + cell dedup_key = f"{alert['type']}:{_cell_key(lat, lng)}" if dedup_key in _recent_corr_pins: continue category = _CORR_PIN_CATEGORIES.get(alert["type"], "anomaly") drivers = alert.get("drivers", []) atype = alert["type"] if atype == "contradiction": ctx = alert.get("context", "") label = f"[{ctx}] Possible Contradiction" parts = list(drivers) if alert.get("alternatives"): parts.append("Alternatives: " + "; ".join(alert["alternatives"][:2])) description = " | ".join(parts) if parts else "Narrative contradiction detected" else: label = f"[{sev.upper()}] {atype.replace('_', ' ').title()}" description = "; ".join(drivers) if drivers else "Multi-layer correlation alert" try: from services.ai_pin_store import create_pin meta = { "correlation_type": atype, "severity": sev, "drivers": drivers, "cell_size": alert.get("cell_size", _CELL_SIZE), } # Add contradiction-specific metadata if atype == "contradiction": meta["context_rating"] = alert.get("context", "") meta["alternatives"] = alert.get("alternatives", []) meta["headlines"] = alert.get("headlines", []) meta["location_name"] = alert.get("location_name", "") if alert.get("related_markets"): meta["related_markets"] = alert["related_markets"] create_pin( lat=lat, lng=lng, label=label, category=category, description=description, source="correlation_engine", confidence=alert.get("score", 60) / 100.0, ttl_hours=2.0, # Auto-expire correlation pins after 2 hours metadata=meta, ) _recent_corr_pins[dedup_key] = now created += 1 except Exception as exc: logger.warning("Failed to auto-pin correlation: %s", exc) if created: logger.info("Correlation engine auto-pinned %d alerts", created) return created # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def compute_correlations(data: dict) -> list[dict]: """Run all correlation detectors and return merged alert list.""" alerts: list[dict] = [] try: alerts.extend(_detect_rf_anomalies(data)) except Exception as e: logger.error("Correlation engine RF anomaly error: %s", e) try: alerts.extend(_detect_military_buildups(data)) except Exception as e: logger.error("Correlation engine military buildup error: %s", e) try: alerts.extend(_detect_infra_cascades(data)) except Exception as e: logger.error("Correlation engine infra cascade error: %s", e) # Contradiction detection removed from automated engine — too many false # positives from regex headline matching. Contradiction/analysis alerts are # now placed by OpenClaw agents via place_analysis_zone, which lets an LLM # reason about the evidence rather than pattern-matching keywords. try: from services.analysis_zone_store import get_live_zones alerts.extend(get_live_zones()) except Exception as e: logger.error("Analysis zone merge error: %s", e) rf = sum(1 for a in alerts if a["type"] == "rf_anomaly") mil = sum(1 for a in alerts if a["type"] == "military_buildup") infra = sum(1 for a in alerts if a["type"] == "infra_cascade") contra = sum(1 for a in alerts if a["type"] == "contradiction") if alerts: logger.info( "Correlations: %d alerts (%d rf, %d mil, %d infra, %d contra)", len(alerts), rf, mil, infra, contra, ) # Correlation alerts are returned in the correlations data feed only. # They are NOT auto-pinned to AI Intel — that layer is reserved for # user / OpenClaw pins. Correlations are visualised via the dedicated # correlations overlay on the map. return alerts