"""Oracle Service — deterministic intelligence ranking for news items. Enriches news items with: - oracle_score: risk_score weighted by source confidence (0–10) - sentiment: VADER compound score (-1.0 to +1.0) - prediction_odds: matched prediction market probabilities (or None) - machine_assessment: structured human-readable analysis string """ import logging logger = logging.getLogger(__name__) _analyzer = None def _get_analyzer(): global _analyzer if _analyzer is None: from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer _analyzer = SentimentIntensityAnalyzer() return _analyzer def compute_sentiment(headline: str) -> float: """VADER compound sentiment score for a headline. Range: -1.0 to +1.0.""" if not headline: return 0.0 return _get_analyzer().polarity_scores(headline)["compound"] def compute_oracle_score(risk_score: int, source_weight: float) -> float: """Weighted oracle score: risk_score scaled by source confidence. source_weight is 1–5 (from feed config). Normalised to 0.2–1.0 multiplier. Result range: 0.0–10.0. """ multiplier = source_weight / 5.0 # 1→0.2, 5→1.0 return round(risk_score * multiplier, 1) _STOP_WORDS = frozenset({ "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "shall", "can", "this", "that", "these", "those", "it", "its", "if", "not", "no", "so", "as", "up", "out", "about", "into", "over", "after", "before", "between", "under", "than", "then", "more", "most", "other", "some", "such", "only", "own", "same", "also", "just", "how", "what", "which", "who", "whom", "when", "where", "why", "all", "each", "every", "both", "few", "many", "much", "any", "very", "too", "here", "there", "now", "new", "says", "said", "-", "--", "—", "vs", "vs.", "&", "he", "she", "they", "we", "you", "his", "her", "my", "our", "your", "their", "him", "us", "them", }) def _tokenize(text: str) -> set[str]: """Lowercase, strip punctuation, remove stop words.""" import re words = re.findall(r"[a-z0-9]+(?:'[a-z]+)?", text.lower()) return {w for w in words if w not in _STOP_WORDS and len(w) > 1} def _match_prediction_markets(title: str, markets: list[dict]) -> dict | None: """Find best-matching prediction market for a news headline. Uses Jaccard similarity on meaningful tokens (stop words removed). Requires at least 2 meaningful keyword overlaps AND Jaccard >= 0.15. """ if not markets or not title: return None title_words = _tokenize(title) if len(title_words) < 2: return None best_match = None best_score = 0.0 for market in markets: market_title = market.get("title", "") market_words = _tokenize(market_title) if len(market_words) < 2: continue intersection = title_words & market_words if len(intersection) < 2: continue union = title_words | market_words jaccard = len(intersection) / len(union) if union else 0.0 if jaccard > best_score and jaccard >= 0.15: best_score = jaccard best_match = market if not best_match: return None return { "title": best_match.get("title", ""), "polymarket_pct": best_match.get("polymarket_pct"), "kalshi_pct": best_match.get("kalshi_pct"), "consensus_pct": best_match.get("consensus_pct"), "match_score": round(best_score, 2), "slug": best_match.get("slug", ""), "kalshi_ticker": best_match.get("kalshi_ticker", ""), } def _build_assessment(oracle_score: float, sentiment: float, prediction: dict | None) -> str: """Build structured machine_assessment string.""" parts = [] # Oracle tier if oracle_score >= 7: tier = "CRITICAL" elif oracle_score >= 4: tier = "ELEVATED" else: tier = "ROUTINE" parts.append(f"ORACLE: {oracle_score}/10 [{tier}]") # Sentiment if sentiment >= 0.05: sdir = "POSITIVE" elif sentiment <= -0.05: sdir = "NEGATIVE" else: sdir = "NEUTRAL" parts.append(f"SENTIMENT: {sentiment:+.2f} [{sdir}]") # Prediction market if prediction: consensus = prediction.get("consensus_pct") if consensus is not None: parts.append(f"MKT CONSENSUS: {consensus}%") poly = prediction.get("polymarket_pct") kalshi = prediction.get("kalshi_pct") sources = [] if poly is not None: sources.append(f"Polymarket {poly}%") if kalshi is not None: sources.append(f"Kalshi {kalshi}%") if sources: parts.append(f" Sources: {' | '.join(sources)}") return " // ".join(parts[:3]) + ("\n" + parts[3] if len(parts) > 3 else "") def enrich_news_items( news_items: list[dict], source_weights: dict[str, float], markets: list[dict] | None = None ) -> list[dict]: """Enrich news items with oracle scores, sentiment, and prediction market odds. Args: news_items: list of news item dicts (modified in-place) source_weights: {source_name: weight} from feed config (1–5 scale) markets: merged prediction market events list (or None) Returns: The same list, enriched with oracle_score, sentiment, prediction_odds, machine_assessment. """ if markets is None: markets = [] for item in news_items: title = item.get("title", "") source = item.get("source", "") risk_score = item.get("risk_score", 1) weight = source_weights.get(source, 3) # default weight 3 (mid-range) sentiment = compute_sentiment(title) oracle_score = compute_oracle_score(risk_score, weight) prediction = _match_prediction_markets(title, markets) item["sentiment"] = sentiment item["oracle_score"] = oracle_score item["prediction_odds"] = prediction item["machine_assessment"] = _build_assessment(oracle_score, sentiment, prediction) return news_items # --------------------------------------------------------------------------- # Global threat level # --------------------------------------------------------------------------- _THREAT_TIERS = [ (80, "SEVERE", "#ef4444"), # red (60, "HIGH", "#f97316"), # orange (40, "ELEVATED", "#eab308"), # yellow (20, "GUARDED", "#3b82f6"), # blue (0, "GREEN", "#22c55e"), # green ] def compute_global_threat_level( news_items: list[dict], markets: list[dict] | None = None, military_flights: list[dict] | None = None, gps_jamming: list[dict] | None = None, ships: list[dict] | None = None, correlations: list[dict] | None = None, ) -> dict: """Fuse news sentiment, prediction-market conflict odds, event frequency, military activity, GPS jamming, and cross-layer correlations into a single 0-100 threat score. Formula (weights sum to 1.0): 0.25 × negative_sentiment_intensity 0.25 × conflict_market_avg_probability 0.10 × high_risk_event_ratio 0.10 × max_oracle_score (normalised to 0-100) 0.10 × military_activity_anomaly 0.10 × gps_jamming_indicator 0.10 × correlation_alerts """ if not news_items: return {"score": 0, "level": "GREEN", "color": "#22c55e", "drivers": []} # --- Component 1: negative sentiment intensity (0-100) --- neg_scores = [abs(it.get("sentiment", 0)) for it in news_items if (it.get("sentiment") or 0) <= -0.05] neg_intensity = (sum(neg_scores) / len(news_items)) * 100 if news_items else 0 neg_intensity = min(100, neg_intensity * 2.5) # scale up — avg abs sentiment rarely > 0.4 # --- Component 2: conflict market avg probability (0-100) --- conflict_probs: list[float] = [] for m in (markets or []): if m.get("category") == "CONFLICT": pct = m.get("consensus_pct") or m.get("polymarket_pct") or m.get("kalshi_pct") if pct is not None: conflict_probs.append(float(pct)) conflict_avg = sum(conflict_probs) / len(conflict_probs) if conflict_probs else 0 # --- Component 3: high-risk event ratio (0-100) --- high_risk = sum(1 for it in news_items if (it.get("risk_score") or 0) >= 7) event_ratio = (high_risk / len(news_items)) * 100 if news_items else 0 # --- Component 4: max oracle score (0-100) --- max_oracle = max((it.get("oracle_score") or 0) for it in news_items) max_oracle_pct = max_oracle * 10 # 0-10 → 0-100 # --- Component 5: military activity anomaly (0-100) --- mil_count = len(military_flights or []) # Baseline: ~20-50 military flights is normal. Spike above 80 is anomalous. mil_anomaly = min(100, max(0, (mil_count - 30) * 2)) if mil_count > 30 else 0 # --- Component 6: GPS jamming indicator (0-100) --- jam_zones = gps_jamming or [] high_jam = sum(1 for z in jam_zones if z.get("severity") == "high") med_jam = sum(1 for z in jam_zones if z.get("severity") == "medium") jam_score = min(100, high_jam * 25 + med_jam * 10) # --- Component 7: cross-layer correlation alerts (0-100) --- corr_list: list[dict] = correlations if correlations else [] corr_points = sum( 15 if a.get("severity") == "high" else 8 if a.get("severity") == "medium" else 3 for a in corr_list ) corr_score = min(100, corr_points) # --- Weighted fusion --- score = ( 0.25 * neg_intensity + 0.25 * conflict_avg + 0.10 * event_ratio + 0.10 * max_oracle_pct + 0.10 * mil_anomaly + 0.10 * jam_score + 0.10 * corr_score ) score = max(0, min(100, round(score))) # --- Tier --- level, color = "GREEN", "#22c55e" for threshold, name, c in _THREAT_TIERS: if score >= threshold: level, color = name, c break # --- Drivers (top reasons for current level) --- drivers: list[str] = [] if high_risk: drivers.append(f"{high_risk} CRITICAL-tier news item{'s' if high_risk != 1 else ''}") if conflict_avg >= 30: drivers.append(f"CONFLICT markets avg {conflict_avg:.0f}%") if neg_intensity >= 40: drivers.append(f"Negative sentiment intensity {neg_intensity:.0f}/100") if max_oracle >= 7: drivers.append(f"Max oracle score {max_oracle}/10") if mil_anomaly >= 30: drivers.append(f"Military flight spike: {mil_count} tracked") if jam_score >= 25: drivers.append(f"GPS jamming: {high_jam} HIGH + {med_jam} MED zones") if corr_score >= 15: corr_high = sum(1 for a in corr_list if a.get("severity") == "high") corr_med = sum(1 for a in corr_list if a.get("severity") == "medium") drivers.append(f"Cross-layer correlations: {corr_high} HIGH + {corr_med} MED") if not drivers: drivers.append("Baseline — no significant threat indicators") return { "score": score, "level": level, "color": color, "drivers": drivers[:4], } def detect_breaking_events(news_items: list[dict]) -> None: """Mark news items as 'breaking' when multiple credible sources converge. Criteria: cluster_count >= 3 AND risk_score >= 7. Modifies items in-place by setting ``breaking = True``. """ for item in news_items: cluster = item.get("cluster_count", 1) risk = item.get("risk_score", 0) if cluster >= 3 and risk >= 7: item["breaking"] = True # --------------------------------------------------------------------------- # Region oracle intel (for map entity tooltips) # --------------------------------------------------------------------------- _region_cache: dict[str, tuple[float, dict]] = {} # "lat,lng" -> (timestamp, result) _REGION_CACHE_TTL = 60 # seconds _REGION_RADIUS_DEG = 5.0 # ~500km at equator def get_region_oracle_intel(lat: float, lng: float, news_items: list[dict]) -> dict: """Get oracle intelligence summary for a geographic region. Finds news items within ~5 degrees, returns top oracle_score item, average sentiment, and best market match. Cached on 0.5-degree grid. """ import time # Grid-snap for cache key (0.5 degree grid) grid_lat = round(lat * 2) / 2 grid_lng = round(lng * 2) / 2 cache_key = f"{grid_lat},{grid_lng}" now = time.time() if cache_key in _region_cache: ts, cached_result = _region_cache[cache_key] if now - ts < _REGION_CACHE_TTL: return cached_result # Find nearby news items nearby = [] for item in news_items: coords = item.get("coords") if not coords or len(coords) < 2: continue ilat, ilng = coords[0], coords[1] if abs(ilat - lat) <= _REGION_RADIUS_DEG and abs(ilng - lng) <= _REGION_RADIUS_DEG: nearby.append(item) if not nearby: result = {"found": False} _region_cache[cache_key] = (now, result) return result # Top oracle score item top = max(nearby, key=lambda x: x.get("oracle_score", 0)) avg_sentiment = sum(it.get("sentiment", 0) for it in nearby) / len(nearby) # Best market match from nearby items best_market = None for it in nearby: po = it.get("prediction_odds") if po and po.get("consensus_pct") is not None: if best_market is None or (po.get("consensus_pct") or 0) > (best_market.get("consensus_pct") or 0): best_market = po # Oracle tier oracle_score = top.get("oracle_score", 0) tier = "CRITICAL" if oracle_score >= 7 else "ELEVATED" if oracle_score >= 4 else "ROUTINE" result = { "found": True, "top_headline": top.get("title", ""), "oracle_score": oracle_score, "tier": tier, "avg_sentiment": round(avg_sentiment, 2), "nearby_count": len(nearby), "market": { "title": best_market.get("title", ""), "consensus_pct": best_market.get("consensus_pct"), } if best_market else None, } _region_cache[cache_key] = (now, result) return result