mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-04-23 19:16:06 +02:00
668ce16dc7
Gate messages now propagate via the Infonet hashchain as encrypted blobs — every node syncs them through normal chain sync while only Gate members with MLS keys can decrypt. Added mesh reputation system, peer push workers, voluntary Wormhole opt-in for node participation, fork recovery, killwormhole scripts, obfuscated terminology, and hardened the self-updater to protect encryption keys and chain state during updates. New features: Shodan search, train tracking, Sentinel Hub imagery, 8 new intelligence layers, CCTV expansion to 11,000+ cameras across 6 countries, Mesh Terminal CLI, prediction markets, desktop-shell scaffold, and comprehensive mesh test suite (215 frontend + backend tests passing). Community contributors: @wa1id, @AlborzNazari, @adust09, @Xpirix, @imqdcr, @csysp, @suranyami, @chr0n1x, @johan-martensson, @singularfailure, @smithbh, @OrfeoTerkuci, @deuza, @tm-const, @Elhard1, @ttulttul
396 lines
14 KiB
Python
396 lines
14 KiB
Python
"""Oracle Service — deterministic intelligence ranking for news items.
|
||
|
||
Enriches news items with:
|
||
- oracle_score: risk_score weighted by source confidence (0–10)
|
||
- sentiment: VADER compound score (-1.0 to +1.0)
|
||
- prediction_odds: matched prediction market probabilities (or None)
|
||
- machine_assessment: structured human-readable analysis string
|
||
"""
|
||
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
_analyzer = None
|
||
|
||
|
||
def _get_analyzer():
|
||
global _analyzer
|
||
if _analyzer is None:
|
||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||
_analyzer = SentimentIntensityAnalyzer()
|
||
return _analyzer
|
||
|
||
|
||
def compute_sentiment(headline: str) -> float:
|
||
"""VADER compound sentiment score for a headline. Range: -1.0 to +1.0."""
|
||
if not headline:
|
||
return 0.0
|
||
return _get_analyzer().polarity_scores(headline)["compound"]
|
||
|
||
|
||
def compute_oracle_score(risk_score: int, source_weight: float) -> float:
|
||
"""Weighted oracle score: risk_score scaled by source confidence.
|
||
|
||
source_weight is 1–5 (from feed config). Normalised to 0.2–1.0 multiplier.
|
||
Result range: 0.0–10.0.
|
||
"""
|
||
multiplier = source_weight / 5.0 # 1→0.2, 5→1.0
|
||
return round(risk_score * multiplier, 1)
|
||
|
||
|
||
_STOP_WORDS = frozenset({
|
||
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
||
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
|
||
"being", "have", "has", "had", "do", "does", "did", "will", "would",
|
||
"could", "should", "may", "might", "shall", "can", "this", "that",
|
||
"these", "those", "it", "its", "if", "not", "no", "so", "as", "up",
|
||
"out", "about", "into", "over", "after", "before", "between", "under",
|
||
"than", "then", "more", "most", "other", "some", "such", "only", "own",
|
||
"same", "also", "just", "how", "what", "which", "who", "whom", "when",
|
||
"where", "why", "all", "each", "every", "both", "few", "many", "much",
|
||
"any", "very", "too", "here", "there", "now", "new", "says", "said",
|
||
"-", "--", "—", "vs", "vs.", "&", "he", "she", "they", "we", "you",
|
||
"his", "her", "my", "our", "your", "their", "him", "us", "them",
|
||
})
|
||
|
||
|
||
def _tokenize(text: str) -> set[str]:
|
||
"""Lowercase, strip punctuation, remove stop words."""
|
||
import re
|
||
words = re.findall(r"[a-z0-9]+(?:'[a-z]+)?", text.lower())
|
||
return {w for w in words if w not in _STOP_WORDS and len(w) > 1}
|
||
|
||
|
||
def _match_prediction_markets(title: str, markets: list[dict]) -> dict | None:
|
||
"""Find best-matching prediction market for a news headline.
|
||
|
||
Uses Jaccard similarity on meaningful tokens (stop words removed).
|
||
Requires at least 2 meaningful keyword overlaps AND Jaccard >= 0.15.
|
||
"""
|
||
if not markets or not title:
|
||
return None
|
||
|
||
title_words = _tokenize(title)
|
||
if len(title_words) < 2:
|
||
return None
|
||
|
||
best_match = None
|
||
best_score = 0.0
|
||
|
||
for market in markets:
|
||
market_title = market.get("title", "")
|
||
market_words = _tokenize(market_title)
|
||
if len(market_words) < 2:
|
||
continue
|
||
|
||
intersection = title_words & market_words
|
||
if len(intersection) < 2:
|
||
continue
|
||
|
||
union = title_words | market_words
|
||
jaccard = len(intersection) / len(union) if union else 0.0
|
||
|
||
if jaccard > best_score and jaccard >= 0.15:
|
||
best_score = jaccard
|
||
best_match = market
|
||
|
||
if not best_match:
|
||
return None
|
||
|
||
return {
|
||
"title": best_match.get("title", ""),
|
||
"polymarket_pct": best_match.get("polymarket_pct"),
|
||
"kalshi_pct": best_match.get("kalshi_pct"),
|
||
"consensus_pct": best_match.get("consensus_pct"),
|
||
"match_score": round(best_score, 2),
|
||
}
|
||
|
||
|
||
def _build_assessment(oracle_score: float, sentiment: float, prediction: dict | None) -> str:
|
||
"""Build structured machine_assessment string."""
|
||
parts = []
|
||
|
||
# Oracle tier
|
||
if oracle_score >= 7:
|
||
tier = "CRITICAL"
|
||
elif oracle_score >= 4:
|
||
tier = "ELEVATED"
|
||
else:
|
||
tier = "ROUTINE"
|
||
parts.append(f"ORACLE: {oracle_score}/10 [{tier}]")
|
||
|
||
# Sentiment
|
||
if sentiment >= 0.05:
|
||
sdir = "POSITIVE"
|
||
elif sentiment <= -0.05:
|
||
sdir = "NEGATIVE"
|
||
else:
|
||
sdir = "NEUTRAL"
|
||
parts.append(f"SENTIMENT: {sentiment:+.2f} [{sdir}]")
|
||
|
||
# Prediction market
|
||
if prediction:
|
||
consensus = prediction.get("consensus_pct")
|
||
if consensus is not None:
|
||
parts.append(f"MKT CONSENSUS: {consensus}%")
|
||
poly = prediction.get("polymarket_pct")
|
||
kalshi = prediction.get("kalshi_pct")
|
||
sources = []
|
||
if poly is not None:
|
||
sources.append(f"Polymarket {poly}%")
|
||
if kalshi is not None:
|
||
sources.append(f"Kalshi {kalshi}%")
|
||
if sources:
|
||
parts.append(f" Sources: {' | '.join(sources)}")
|
||
|
||
return " // ".join(parts[:3]) + ("\n" + parts[3] if len(parts) > 3 else "")
|
||
|
||
|
||
def enrich_news_items(
|
||
news_items: list[dict], source_weights: dict[str, float], markets: list[dict] | None = None
|
||
) -> list[dict]:
|
||
"""Enrich news items with oracle scores, sentiment, and prediction market odds.
|
||
|
||
Args:
|
||
news_items: list of news item dicts (modified in-place)
|
||
source_weights: {source_name: weight} from feed config (1–5 scale)
|
||
markets: merged prediction market events list (or None)
|
||
|
||
Returns:
|
||
The same list, enriched with oracle_score, sentiment, prediction_odds, machine_assessment.
|
||
"""
|
||
if markets is None:
|
||
markets = []
|
||
|
||
for item in news_items:
|
||
title = item.get("title", "")
|
||
source = item.get("source", "")
|
||
risk_score = item.get("risk_score", 1)
|
||
weight = source_weights.get(source, 3) # default weight 3 (mid-range)
|
||
|
||
sentiment = compute_sentiment(title)
|
||
oracle_score = compute_oracle_score(risk_score, weight)
|
||
prediction = _match_prediction_markets(title, markets)
|
||
|
||
item["sentiment"] = sentiment
|
||
item["oracle_score"] = oracle_score
|
||
item["prediction_odds"] = prediction
|
||
item["machine_assessment"] = _build_assessment(oracle_score, sentiment, prediction)
|
||
|
||
return news_items
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Global threat level
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_THREAT_TIERS = [
|
||
(80, "SEVERE", "#ef4444"), # red
|
||
(60, "HIGH", "#f97316"), # orange
|
||
(40, "ELEVATED", "#eab308"), # yellow
|
||
(20, "GUARDED", "#3b82f6"), # blue
|
||
(0, "GREEN", "#22c55e"), # green
|
||
]
|
||
|
||
|
||
def compute_global_threat_level(
|
||
news_items: list[dict],
|
||
markets: list[dict] | None = None,
|
||
military_flights: list[dict] | None = None,
|
||
gps_jamming: list[dict] | None = None,
|
||
ships: list[dict] | None = None,
|
||
correlations: list[dict] | None = None,
|
||
) -> dict:
|
||
"""Fuse news sentiment, prediction-market conflict odds, event frequency,
|
||
military activity, GPS jamming, and cross-layer correlations into a single
|
||
0-100 threat score.
|
||
|
||
Formula (weights sum to 1.0):
|
||
0.25 × negative_sentiment_intensity
|
||
0.25 × conflict_market_avg_probability
|
||
0.10 × high_risk_event_ratio
|
||
0.10 × max_oracle_score (normalised to 0-100)
|
||
0.10 × military_activity_anomaly
|
||
0.10 × gps_jamming_indicator
|
||
0.10 × correlation_alerts
|
||
"""
|
||
if not news_items:
|
||
return {"score": 0, "level": "GREEN", "color": "#22c55e", "drivers": []}
|
||
|
||
# --- Component 1: negative sentiment intensity (0-100) ---
|
||
neg_scores = [abs(it.get("sentiment", 0)) for it in news_items if (it.get("sentiment") or 0) <= -0.05]
|
||
neg_intensity = (sum(neg_scores) / len(news_items)) * 100 if news_items else 0
|
||
neg_intensity = min(100, neg_intensity * 2.5) # scale up — avg abs sentiment rarely > 0.4
|
||
|
||
# --- Component 2: conflict market avg probability (0-100) ---
|
||
conflict_probs: list[float] = []
|
||
for m in (markets or []):
|
||
if m.get("category") == "CONFLICT":
|
||
pct = m.get("consensus_pct") or m.get("polymarket_pct") or m.get("kalshi_pct")
|
||
if pct is not None:
|
||
conflict_probs.append(float(pct))
|
||
conflict_avg = sum(conflict_probs) / len(conflict_probs) if conflict_probs else 0
|
||
|
||
# --- Component 3: high-risk event ratio (0-100) ---
|
||
high_risk = sum(1 for it in news_items if (it.get("risk_score") or 0) >= 7)
|
||
event_ratio = (high_risk / len(news_items)) * 100 if news_items else 0
|
||
|
||
# --- Component 4: max oracle score (0-100) ---
|
||
max_oracle = max((it.get("oracle_score") or 0) for it in news_items)
|
||
max_oracle_pct = max_oracle * 10 # 0-10 → 0-100
|
||
|
||
# --- Component 5: military activity anomaly (0-100) ---
|
||
mil_count = len(military_flights or [])
|
||
# Baseline: ~20-50 military flights is normal. Spike above 80 is anomalous.
|
||
mil_anomaly = min(100, max(0, (mil_count - 30) * 2)) if mil_count > 30 else 0
|
||
|
||
# --- Component 6: GPS jamming indicator (0-100) ---
|
||
jam_zones = gps_jamming or []
|
||
high_jam = sum(1 for z in jam_zones if z.get("severity") == "high")
|
||
med_jam = sum(1 for z in jam_zones if z.get("severity") == "medium")
|
||
jam_score = min(100, high_jam * 25 + med_jam * 10)
|
||
|
||
# --- Component 7: cross-layer correlation alerts (0-100) ---
|
||
corr_list: list[dict] = correlations if correlations else []
|
||
corr_points = sum(
|
||
15 if a.get("severity") == "high" else 8 if a.get("severity") == "medium" else 3
|
||
for a in corr_list
|
||
)
|
||
corr_score = min(100, corr_points)
|
||
|
||
# --- Weighted fusion ---
|
||
score = (
|
||
0.25 * neg_intensity
|
||
+ 0.25 * conflict_avg
|
||
+ 0.10 * event_ratio
|
||
+ 0.10 * max_oracle_pct
|
||
+ 0.10 * mil_anomaly
|
||
+ 0.10 * jam_score
|
||
+ 0.10 * corr_score
|
||
)
|
||
score = max(0, min(100, round(score)))
|
||
|
||
# --- Tier ---
|
||
level, color = "GREEN", "#22c55e"
|
||
for threshold, name, c in _THREAT_TIERS:
|
||
if score >= threshold:
|
||
level, color = name, c
|
||
break
|
||
|
||
# --- Drivers (top reasons for current level) ---
|
||
drivers: list[str] = []
|
||
if high_risk:
|
||
drivers.append(f"{high_risk} CRITICAL-tier news item{'s' if high_risk != 1 else ''}")
|
||
if conflict_avg >= 30:
|
||
drivers.append(f"CONFLICT markets avg {conflict_avg:.0f}%")
|
||
if neg_intensity >= 40:
|
||
drivers.append(f"Negative sentiment intensity {neg_intensity:.0f}/100")
|
||
if max_oracle >= 7:
|
||
drivers.append(f"Max oracle score {max_oracle}/10")
|
||
if mil_anomaly >= 30:
|
||
drivers.append(f"Military flight spike: {mil_count} tracked")
|
||
if jam_score >= 25:
|
||
drivers.append(f"GPS jamming: {high_jam} HIGH + {med_jam} MED zones")
|
||
if corr_score >= 15:
|
||
corr_high = sum(1 for a in corr_list if a.get("severity") == "high")
|
||
corr_med = sum(1 for a in corr_list if a.get("severity") == "medium")
|
||
drivers.append(f"Cross-layer correlations: {corr_high} HIGH + {corr_med} MED")
|
||
if not drivers:
|
||
drivers.append("Baseline — no significant threat indicators")
|
||
|
||
return {
|
||
"score": score,
|
||
"level": level,
|
||
"color": color,
|
||
"drivers": drivers[:4],
|
||
}
|
||
|
||
|
||
def detect_breaking_events(news_items: list[dict]) -> None:
|
||
"""Mark news items as 'breaking' when multiple credible sources converge.
|
||
|
||
Criteria: cluster_count >= 3 AND risk_score >= 7.
|
||
Modifies items in-place by setting ``breaking = True``.
|
||
"""
|
||
for item in news_items:
|
||
cluster = item.get("cluster_count", 1)
|
||
risk = item.get("risk_score", 0)
|
||
if cluster >= 3 and risk >= 7:
|
||
item["breaking"] = True
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Region oracle intel (for map entity tooltips)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_region_cache: dict[str, tuple[float, dict]] = {} # "lat,lng" -> (timestamp, result)
|
||
_REGION_CACHE_TTL = 60 # seconds
|
||
_REGION_RADIUS_DEG = 5.0 # ~500km at equator
|
||
|
||
|
||
def get_region_oracle_intel(lat: float, lng: float, news_items: list[dict]) -> dict:
|
||
"""Get oracle intelligence summary for a geographic region.
|
||
|
||
Finds news items within ~5 degrees, returns top oracle_score item,
|
||
average sentiment, and best market match. Cached on 0.5-degree grid.
|
||
"""
|
||
import time
|
||
|
||
# Grid-snap for cache key (0.5 degree grid)
|
||
grid_lat = round(lat * 2) / 2
|
||
grid_lng = round(lng * 2) / 2
|
||
cache_key = f"{grid_lat},{grid_lng}"
|
||
|
||
now = time.time()
|
||
if cache_key in _region_cache:
|
||
ts, cached_result = _region_cache[cache_key]
|
||
if now - ts < _REGION_CACHE_TTL:
|
||
return cached_result
|
||
|
||
# Find nearby news items
|
||
nearby = []
|
||
for item in news_items:
|
||
coords = item.get("coords")
|
||
if not coords or len(coords) < 2:
|
||
continue
|
||
ilat, ilng = coords[0], coords[1]
|
||
if abs(ilat - lat) <= _REGION_RADIUS_DEG and abs(ilng - lng) <= _REGION_RADIUS_DEG:
|
||
nearby.append(item)
|
||
|
||
if not nearby:
|
||
result = {"found": False}
|
||
_region_cache[cache_key] = (now, result)
|
||
return result
|
||
|
||
# Top oracle score item
|
||
top = max(nearby, key=lambda x: x.get("oracle_score", 0))
|
||
avg_sentiment = sum(it.get("sentiment", 0) for it in nearby) / len(nearby)
|
||
|
||
# Best market match from nearby items
|
||
best_market = None
|
||
for it in nearby:
|
||
po = it.get("prediction_odds")
|
||
if po and po.get("consensus_pct") is not None:
|
||
if best_market is None or (po.get("consensus_pct") or 0) > (best_market.get("consensus_pct") or 0):
|
||
best_market = po
|
||
|
||
# Oracle tier
|
||
oracle_score = top.get("oracle_score", 0)
|
||
tier = "CRITICAL" if oracle_score >= 7 else "ELEVATED" if oracle_score >= 4 else "ROUTINE"
|
||
|
||
result = {
|
||
"found": True,
|
||
"top_headline": top.get("title", ""),
|
||
"oracle_score": oracle_score,
|
||
"tier": tier,
|
||
"avg_sentiment": round(avg_sentiment, 2),
|
||
"nearby_count": len(nearby),
|
||
"market": {
|
||
"title": best_market.get("title", ""),
|
||
"consensus_pct": best_market.get("consensus_pct"),
|
||
} if best_market else None,
|
||
}
|
||
_region_cache[cache_key] = (now, result)
|
||
return result
|