diff --git a/.env.example b/.env.example index fb69c7b..cb36053 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,20 @@ AIS_API_KEY= # Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token). # TELEGRAM_OSINT_ENABLED=true # TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor +# TELEGRAM_OSINT_TRANSLATE=true +# TELEGRAM_OSINT_TRANSLATE_TO=en + +# Strategic Risk Analytics (experimental derived OSINT — off by default) +# GT_ANALYTICS_ENABLED=false +# GT_ANALYTICS_PROFILE=lean +# On 1 vCPU nodes (fleet VPS), leave disabled or set profile=lean. Scheduled ingest +# and Louvain clustering stay off until GT_ANALYTICS_ACK_LOW_CPU=true. +# GT_ANALYTICS_ACK_LOW_CPU=false +# GT_ANALYTICS_BASE_PRIOR=0.15 +# GT_ANALYTICS_HIGH_RISK_THRESHOLD=0.6 +# GT_ANALYTICS_SIGNAL_WEIGHTS=payroll_loan=3.0,purge=3.5,troop_movement=3.0 +# GT_ANALYTICS_WATCHED_CHANNELS=osintdefender,war_monitor,nexta_live +# GT_ANALYTICS_LOUVAIN_INTERVAL_MINUTES=30 # Admin key to protect sensitive endpoints (settings, updates). # If blank, loopback/localhost requests still work for local single-host dev. diff --git a/backend/analytics/__init__.py b/backend/analytics/__init__.py new file mode 100644 index 0000000..116ff8b --- /dev/null +++ b/backend/analytics/__init__.py @@ -0,0 +1,21 @@ +"""Strategic Risk Analytics — game-theoretic early warning layer.""" + +from analytics.backtest import ( + DEFAULT_BACKTEST_ALERT_THRESHOLD, + BacktestReport, + run_historical_backtest, + tune_alert_threshold, +) +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.integration import get_gt_engine, process_feed_item, refresh_from_latest_data + +__all__ = [ + "BacktestReport", + "DEFAULT_BACKTEST_ALERT_THRESHOLD", + "GT_EarlyWarning", + "get_gt_engine", + "process_feed_item", + "refresh_from_latest_data", + "run_historical_backtest", + "tune_alert_threshold", +] \ No newline at end of file diff --git a/backend/analytics/backtest.py b/backend/analytics/backtest.py new file mode 100644 index 0000000..ca8601e --- /dev/null +++ b/backend/analytics/backtest.py @@ -0,0 +1,287 @@ +"""Historical backtesting for Strategic Risk Analytics. + +This is **benchmark validation**, not forward-weeks prediction on live feeds. + +The suite scores whether costly-signal patterns + Bayesian updating correctly +classify curated pre-crisis text snippets (positive cases) vs cheap-talk +controls (negative cases) at a tuned alert threshold. A high accuracy on this +labeled corpus does **not** imply the engine will score 100% on messy, +adversarial, or weeks-ahead production telemetry — opponents adapt, labels are +easier here than in the wild, and the window is retrospective. + +Reports accuracy and a conservative Wilson 95% confidence lower bound on the +benchmark only. Treat 100% here as "classifier fits the benchmark," not "ship +it for multi-week forecasting." For live week-over-week scoring with delayed +labels, see ``rolling_backtest.py``. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass, field +from typing import Any, Literal + +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.historical_events import ( + HistoricalCase, + default_historical_cases, + expanded_historical_cases, +) +from analytics.settings import GTAnalyticsSettings + +DomainName = Literal["financial", "unrest", "conflict"] + +# Validated on expanded suite (82 cases, Wilson lower >= 0.95 at 100% accuracy). +DEFAULT_BACKTEST_ALERT_THRESHOLD = 0.26 +MAX_BACKTEST_ALERT_THRESHOLD = 0.39 + + +@dataclass(frozen=True) +class CaseResult: + case_id: str + name: str + kind: str + region: str + domain: str + expected_alert: bool + alerted: bool + correct: bool + peak_domain_risk: float + peak_composite_risk: float + costly_signals: list[str] + tags: tuple[str, ...] = field(default_factory=tuple) + + +@dataclass(frozen=True) +class BacktestReport: + total_cases: int + correct: int + accuracy: float + confidence_rate: float + wilson_lower_95: float + wilson_upper_95: float + true_positives: int + true_negatives: int + false_positives: int + false_negatives: int + sensitivity: float + specificity: float + alert_threshold: float + target_confidence: float + meets_target: bool + case_results: tuple[CaseResult, ...] + + def to_dict(self) -> dict[str, Any]: + return { + "total_cases": self.total_cases, + "correct": self.correct, + "accuracy": round(self.accuracy, 4), + "confidence_rate": round(self.confidence_rate, 4), + "wilson_lower_95": round(self.wilson_lower_95, 4), + "wilson_upper_95": round(self.wilson_upper_95, 4), + "true_positives": self.true_positives, + "true_negatives": self.true_negatives, + "false_positives": self.false_positives, + "false_negatives": self.false_negatives, + "sensitivity": round(self.sensitivity, 4), + "specificity": round(self.specificity, 4), + "alert_threshold": self.alert_threshold, + "target_confidence": self.target_confidence, + "meets_target": self.meets_target, + "cases": [ + { + "case_id": row.case_id, + "name": row.name, + "kind": row.kind, + "correct": row.correct, + "alerted": row.alerted, + "peak_domain_risk": round(row.peak_domain_risk, 4), + "peak_composite_risk": round(row.peak_composite_risk, 4), + "costly_signals": row.costly_signals, + } + for row in self.case_results + ], + } + + +def wilson_interval( + successes: int, + total: int, + z: float = 1.96, +) -> tuple[float, float]: + """Wilson score interval for a binomial proportion (95% default).""" + if total <= 0: + return 0.0, 0.0 + phat = successes / total + z2 = z * z + denom = 1.0 + z2 / total + center = (phat + z2 / (2.0 * total)) / denom + margin = ( + z + * math.sqrt((phat * (1.0 - phat) + z2 / (4.0 * total)) / total) + / denom + ) + return max(0.0, center - margin), min(1.0, center + margin) + + +def _domain_risk(engine: GT_EarlyWarning, region: str, domain: str) -> float: + if domain in ("financial", "unrest", "conflict"): + return engine.get_prior(region, domain) + return engine.composite_risk(region) + + +def _evaluate_case( + case: HistoricalCase, + *, + settings: GTAnalyticsSettings, + alert_threshold: float, +) -> CaseResult: + engine = GT_EarlyWarning(settings) + peak_domain = float(settings.base_prior) + peak_composite = float(settings.base_prior) + detected_signals: set[str] = set() + + for item in case.to_feed_dicts(): + result = engine.process_feed_item(item) + for sig in (result or {}).get("signals") or {}: + detected_signals.add(str(sig)) + domain_risk = _domain_risk(engine, case.region, case.domain) + composite = engine.composite_risk(case.region) + peak_domain = max(peak_domain, domain_risk) + peak_composite = max(peak_composite, composite) + + # Domain-specific score for labeled events; composite as secondary for conflict. + score = peak_domain + if case.domain == "conflict": + score = max(peak_domain, peak_composite * 0.95) + alerted = score >= alert_threshold + expected_alert = case.kind == "positive" + + return CaseResult( + case_id=case.case_id, + name=case.name, + kind=case.kind, + region=case.region, + domain=case.domain, + expected_alert=expected_alert, + alerted=alerted, + correct=alerted == expected_alert, + peak_domain_risk=peak_domain, + peak_composite_risk=peak_composite, + costly_signals=sorted(detected_signals), + tags=case.tags, + ) + + +def run_historical_backtest( + cases: tuple[HistoricalCase, ...] | None = None, + *, + settings: GTAnalyticsSettings | None = None, + alert_threshold: float | None = None, + target_confidence: float = 0.80, + use_expanded_suite: bool = True, +) -> BacktestReport: + """ + Run labeled historical cases and compute accuracy + Wilson 95% CI. + + ``confidence_rate`` is the conservative Wilson lower bound — the metric + used for pass/fail against ``target_confidence``. + """ + cfg = settings or GTAnalyticsSettings(enabled=True) + threshold = float( + alert_threshold + if alert_threshold is not None + else DEFAULT_BACKTEST_ALERT_THRESHOLD + ) + if cases is not None: + suite = cases + elif use_expanded_suite: + suite = expanded_historical_cases() + else: + suite = default_historical_cases() + + results = tuple( + _evaluate_case(case, settings=cfg, alert_threshold=threshold) for case in suite + ) + + tp = sum(1 for r in results if r.expected_alert and r.alerted) + tn = sum(1 for r in results if not r.expected_alert and not r.alerted) + fp = sum(1 for r in results if not r.expected_alert and r.alerted) + fn = sum(1 for r in results if r.expected_alert and not r.alerted) + correct = tp + tn + total = len(results) + accuracy = correct / total if total else 0.0 + lower, upper = wilson_interval(correct, total) + + pos_total = sum(1 for r in results if r.expected_alert) + neg_total = total - pos_total + sensitivity = tp / pos_total if pos_total else 0.0 + specificity = tn / neg_total if neg_total else 0.0 + + return BacktestReport( + total_cases=total, + correct=correct, + accuracy=accuracy, + confidence_rate=lower, + wilson_lower_95=lower, + wilson_upper_95=upper, + true_positives=tp, + true_negatives=tn, + false_positives=fp, + false_negatives=fn, + sensitivity=sensitivity, + specificity=specificity, + alert_threshold=threshold, + target_confidence=target_confidence, + meets_target=lower >= target_confidence, + case_results=results, + ) + + +def tune_alert_threshold( + cases: tuple[HistoricalCase, ...] | None = None, + *, + settings: GTAnalyticsSettings | None = None, + min_threshold: float = 0.20, + max_threshold: float = 0.65, + step: float = 0.01, + target_confidence: float = 0.95, +) -> tuple[float, BacktestReport]: + """Grid-search alert threshold to maximize Wilson lower bound.""" + if cases is not None: + suite = cases + else: + suite = expanded_historical_cases() + best_threshold = min_threshold + best_report = run_historical_backtest( + suite, + settings=settings, + alert_threshold=min_threshold, + target_confidence=target_confidence, + ) + + steps = int(round((max_threshold - min_threshold) / step)) + for i in range(steps + 1): + threshold = min_threshold + i * step + report = run_historical_backtest( + suite, + settings=settings, + alert_threshold=threshold, + target_confidence=target_confidence, + ) + better_confidence = report.confidence_rate > best_report.confidence_rate + tied_confidence = math.isclose( + report.confidence_rate, best_report.confidence_rate, rel_tol=0.0, abs_tol=1e-9 + ) + better_accuracy = report.accuracy > best_report.accuracy + tied_accuracy = math.isclose( + report.accuracy, best_report.accuracy, rel_tol=0.0, abs_tol=1e-9 + ) + prefer_higher_threshold = ( + tied_confidence and tied_accuracy and threshold > best_threshold + ) + if better_confidence or (tied_confidence and better_accuracy) or prefer_higher_threshold: + best_threshold = threshold + best_report = report + + return best_threshold, best_report \ No newline at end of file diff --git a/backend/analytics/daily_store.py b/backend/analytics/daily_store.py new file mode 100644 index 0000000..0ad2841 --- /dev/null +++ b/backend/analytics/daily_store.py @@ -0,0 +1,140 @@ +"""Daily GT risk readings for micro rolling averages.""" + +from __future__ import annotations + +import json +import logging +import os +import threading +from dataclasses import asdict, dataclass, field +from datetime import date, datetime, timezone +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +_DAILY_DIR = Path(__file__).parent.parent / "data" / "gt_rolling" / "daily" +_store_lock = threading.Lock() + + +def daily_store_dir() -> Path: + override = str(os.environ.get("GT_DAILY_STORE_DIR", "")).strip() + if override: + return Path(override) + return _DAILY_DIR + + +def utc_today() -> date: + return datetime.now(timezone.utc).date() + + +def date_id(when: date | datetime | None = None) -> str: + if when is None: + when = utc_today() + if isinstance(when, datetime): + when = when.date() + return when.isoformat() + + +@dataclass +class DailyRegionReading: + region: str + composite_risk: float + financial: float + unrest: float + conflict: float + peak_score: float + readings: int = 1 + last_captured_at: str = "" + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> DailyRegionReading: + return cls( + region=str(raw.get("region") or "").strip().lower(), + composite_risk=float(raw.get("composite_risk") or 0.0), + financial=float(raw.get("financial") or 0.0), + unrest=float(raw.get("unrest") or 0.0), + conflict=float(raw.get("conflict") or 0.0), + peak_score=float(raw.get("peak_score") or 0.0), + readings=int(raw.get("readings") or 1), + last_captured_at=str(raw.get("last_captured_at") or ""), + ) + + +@dataclass +class DailySnapshot: + date: str + regions: dict[str, DailyRegionReading] = field(default_factory=dict) + last_updated_at: str = "" + + def to_dict(self) -> dict[str, Any]: + return { + "date": self.date, + "last_updated_at": self.last_updated_at, + "regions": {key: row.to_dict() for key, row in self.regions.items()}, + } + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> DailySnapshot: + regions: dict[str, DailyRegionReading] = {} + for key, row in (raw.get("regions") or {}).items(): + if isinstance(row, dict): + reading = DailyRegionReading.from_dict(row) + regions[str(key).strip().lower()] = reading + return cls( + date=str(raw.get("date") or ""), + regions=regions, + last_updated_at=str(raw.get("last_updated_at") or ""), + ) + + +def _daily_path(day_id: str) -> Path: + safe = day_id.replace("/", "-").replace("..", "") + return daily_store_dir() / f"{safe}.json" + + +def _ensure_dir() -> None: + daily_store_dir().mkdir(parents=True, exist_ok=True) + + +def list_daily_ids(*, newest_first: bool = True, limit: int | None = None) -> list[str]: + _ensure_dir() + ids = sorted( + (path.stem for path in daily_store_dir().glob("*.json")), + reverse=newest_first, + ) + if limit is not None: + return ids[:limit] + return ids + + +def load_daily(day: date | str | None = None) -> DailySnapshot | None: + day_id = date_id(day) if day is not None else date_id() + path = _daily_path(day_id) + if not path.is_file(): + return None + try: + raw = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(raw, dict): + return None + return DailySnapshot.from_dict(raw) + except (OSError, json.JSONDecodeError, TypeError, ValueError): + logger.exception("Failed to load GT daily reading %s", day_id) + return None + + +def save_daily(snapshot: DailySnapshot) -> None: + _ensure_dir() + path = _daily_path(snapshot.date) + tmp = path.with_suffix(".json.tmp") + payload = json.dumps(snapshot.to_dict(), indent=2, sort_keys=True) + with _store_lock: + tmp.write_text(payload, encoding="utf-8") + tmp.replace(path) + + +def utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() \ No newline at end of file diff --git a/backend/analytics/feed_adapter.py b/backend/analytics/feed_adapter.py new file mode 100644 index 0000000..cf4002a --- /dev/null +++ b/backend/analytics/feed_adapter.py @@ -0,0 +1,206 @@ +"""Normalize Shadowbroker feed records into GT analytics feed items.""" + +from __future__ import annotations + +import re +from typing import Any, Iterable + +_DOMAIN_CONFLICT = "conflict" +_DOMAIN_UNREST = "unrest" +_DOMAIN_FINANCIAL = "financial" + +_CONFLICT_HINTS = re.compile( + r"\b(war|missile|strike|attack|military|invasion|troop|shelling|drone|bomb|nuclear)\b", + re.I, +) +_UNREST_HINTS = re.compile( + r"\b(protest|rally|strike|riot|unrest|mobiliz|demonstrat|curfew|purge|coup)\b", + re.I, +) +_FINANCIAL_HINTS = re.compile( + r"\b(payroll|loan|default|bankruptcy|liquidity|sanction|supply\s+chain|delay|shortage)\b", + re.I, +) + + +def _clean_region(value: Any) -> str: + region = str(value or "").strip().lower() + return region or "global" + + +def _infer_domain(text: str, explicit: str | None = None) -> str: + if explicit in {_DOMAIN_CONFLICT, _DOMAIN_UNREST, _DOMAIN_FINANCIAL}: + return explicit + if _CONFLICT_HINTS.search(text): + return _DOMAIN_CONFLICT + if _UNREST_HINTS.search(text): + return _DOMAIN_UNREST + if _FINANCIAL_HINTS.search(text): + return _DOMAIN_FINANCIAL + return _DOMAIN_FINANCIAL + + +def _text_from_record( + record: dict[str, Any], + *, + prefer_translation: bool = False, +) -> str: + """Build ingest text; prefer English translations for Telegram OSINT when set.""" + if prefer_translation: + translated_parts = [ + record.get("title_translated"), + record.get("description_translated"), + ] + translated = "\n".join( + str(p).strip() for p in translated_parts if p and str(p).strip() + ) + if translated: + return translated + + parts = [ + record.get("title"), + record.get("description"), + record.get("text"), + record.get("summary"), + ] + return "\n".join(str(p).strip() for p in parts if p and str(p).strip()) + + +_HASHTAG_REGION = re.compile(r"#([a-z][a-z0-9_-]{2,})", re.I) + + +def _region_from_hashtags(text: str) -> str | None: + """Map common theater hashtags (#Ukraine) to dossier/heatmap region keys.""" + for match in _HASHTAG_REGION.finditer(text or ""): + tag = match.group(1).lower() + if tag in { + "ukraine", + "russia", + "israel", + "iran", + "gaza", + "syria", + "taiwan", + "china", + "belfast", + "uk", + "usa", + }: + return tag + return None + + +def _region_from_record(record: dict[str, Any], *, text: str = "") -> str: + for key in ("geotag", "region", "country", "location"): + if record.get(key): + return _clean_region(record[key]) + hashtag_region = _region_from_hashtags(text) + if hashtag_region: + return hashtag_region + coords = record.get("coords") + if isinstance(coords, (list, tuple)) and len(coords) >= 2: + try: + lat = float(coords[0]) + lng = float(coords[1]) + return f"{lat:.2f},{lng:.2f}" + except (TypeError, ValueError): + pass + return "global" + + +def _entities_from_record(record: dict[str, Any]) -> list[str]: + entities: list[str] = [] + for key in ("entities", "tags", "keywords"): + raw = record.get(key) + if isinstance(raw, list): + entities.extend(str(v).strip() for v in raw if str(v).strip()) + elif isinstance(raw, str) and raw.strip(): + entities.extend(part.strip() for part in raw.split(",") if part.strip()) + channel = str(record.get("channel") or "").strip() + if channel: + entities.append(f"channel:{channel}") + source = str(record.get("source") or "").strip() + if source: + entities.append(f"source:{source}") + return entities + + +def normalize_feed_item(record: dict[str, Any], *, source_type: str = "generic") -> dict[str, Any]: + """Map a news/Telegram/GDELT record into the GT engine schema.""" + prefer_translation = source_type == "telegram_osint" + text = _text_from_record(record, prefer_translation=prefer_translation) + if prefer_translation and not text.strip(): + text = _text_from_record(record, prefer_translation=False) + region = _region_from_record(record, text=text) + domain = _infer_domain(text, record.get("domain")) + coords = record.get("coords") + lat = lng = None + if isinstance(coords, (list, tuple)) and len(coords) >= 2: + try: + lat = float(coords[0]) + lng = float(coords[1]) + except (TypeError, ValueError): + lat = lng = None + + return { + "id": record.get("id") or record.get("link"), + "text": text, + "source": str(record.get("source") or source_type), + "source_type": source_type, + "region": region, + "domain": domain, + "entities": _entities_from_record(record), + "coords": [lat, lng] if lat is not None and lng is not None else None, + "published": record.get("published"), + "risk_score": record.get("risk_score"), + } + + +def iter_telegram_posts(payload: dict[str, Any] | None) -> Iterable[dict[str, Any]]: + from services.telegram_translate import apply_post_translation, telegram_translate_enabled + + posts = list((payload or {}).get("posts") or []) + for post in posts: + if not isinstance(post, dict): + continue + if not (post.get("description") or post.get("title")): + continue + enriched = ( + apply_post_translation(post) + if telegram_translate_enabled() + else post + ) + yield normalize_feed_item(enriched, source_type="telegram_osint") + + +def iter_news_items(payload: list[dict[str, Any]] | None) -> Iterable[dict[str, Any]]: + for item in list(payload or []): + if not isinstance(item, dict): + continue + yield normalize_feed_item(item, source_type="news") + for article in list(item.get("articles") or []): + if isinstance(article, dict): + yield normalize_feed_item(article, source_type="news_cluster") + + +def iter_gdelt_features(payload: list[dict[str, Any]] | None) -> Iterable[dict[str, Any]]: + for feature in list(payload or []): + if not isinstance(feature, dict): + continue + props = dict(feature.get("properties") or {}) + geometry = dict(feature.get("geometry") or {}) + coords = None + if geometry.get("type") == "Point": + raw = geometry.get("coordinates") + if isinstance(raw, (list, tuple)) and len(raw) >= 2: + coords = [float(raw[1]), float(raw[0])] + record = { + "title": props.get("name") or props.get("title"), + "description": props.get("snippet") or props.get("description"), + "source": props.get("source") or "gdelt", + "coords": coords, + "published": props.get("date") or props.get("published"), + "region": props.get("location") or props.get("country"), + } + if record["title"] or record["description"]: + yield normalize_feed_item(record, source_type="gdelt") \ No newline at end of file diff --git a/backend/analytics/gt_alerts.py b/backend/analytics/gt_alerts.py new file mode 100644 index 0000000..f966960 --- /dev/null +++ b/backend/analytics/gt_alerts.py @@ -0,0 +1,128 @@ +"""Top strategic-risk alerts — ranked regions with map coordinates.""" + +from __future__ import annotations + +from typing import Any + +from analytics.integration import get_gt_engine +from analytics.settings import get_gt_settings + + +def _peak_score(props: dict[str, Any]) -> float: + composite = float(props.get("risk") or 0.0) + financial = float(props.get("financial") or 0.0) + unrest = float(props.get("unrest") or 0.0) + conflict = float(props.get("conflict") or 0.0) + return max(composite, financial, unrest, conflict) + + +def _valid_coords(coords: Any) -> tuple[float, float] | None: + if not isinstance(coords, (list, tuple)) or len(coords) < 2: + return None + try: + lng = float(coords[0]) + lat = float(coords[1]) + except (TypeError, ValueError): + return None + if not (-90.0 <= lat <= 90.0 and -180.0 <= lng <= 180.0): + return None + if abs(lat) < 0.001 and abs(lng) < 0.001: + return None + return lat, lng + + +def _region_label(region: str) -> str: + text = str(region or "").strip() + if not text: + return "unknown" + if "," in text: + parts = [piece.strip() for piece in text.split(",") if piece.strip()] + if len(parts) >= 2: + try: + lat = float(parts[0]) + lng = float(parts[-1]) + return f"{lat:.2f}°, {lng:.2f}°" + except ValueError: + pass + return text.replace("_", " ") + + +def parse_heatmap_alerts( + heatmap: dict[str, Any] | None, + *, + limit: int = 8, +) -> tuple[list[dict[str, Any]], int]: + """Return ranked alerts and count of regions plottable on the map.""" + features = (heatmap or {}).get("features") or [] + rows: list[dict[str, Any]] = [] + + for feature in features: + if not isinstance(feature, dict): + continue + geometry = feature.get("geometry") or {} + coords = _valid_coords(geometry.get("coordinates")) + if coords is None: + continue + lat, lng = coords + props = feature.get("properties") or {} + region = str(props.get("region") or "").strip().lower() + if not region: + continue + score = _peak_score(props) + rows.append( + { + "region": region, + "region_label": _region_label(region), + "risk": round(float(props.get("risk") or 0.0), 4), + "financial": round(float(props.get("financial") or 0.0), 4), + "unrest": round(float(props.get("unrest") or 0.0), 4), + "conflict": round(float(props.get("conflict") or 0.0), 4), + "contagion": round(float(props.get("contagion") or 0.0), 4), + "score": round(score, 4), + "lat": lat, + "lng": lng, + "ignition": bool(props.get("micro_ignition")), + "risk_3d_avg": props.get("risk_3d_avg"), + "risk_delta": props.get("risk_delta"), + "updates": int(props.get("updates") or 0), + } + ) + + rows.sort( + key=lambda row: ( + bool(row.get("ignition")), + float(row.get("risk_delta") or 0.0), + float(row.get("score") or 0.0), + ), + reverse=True, + ) + return rows[: max(1, limit)], len(rows) + + +def top_gt_alerts(*, limit: int = 8) -> dict[str, Any]: + """Ranked top regions for API / OpenClaw.""" + settings = get_gt_settings() + engine = get_gt_engine() + heatmap: dict[str, Any] = {"type": "FeatureCollection", "features": []} + engine_regions = 0 + + if engine is not None: + heatmap = engine.get_risk_heatmap() + with engine._lock: # noqa: SLF001 — intentional meta read + engine_regions = len(engine._regions) + + alerts, plotted = parse_heatmap_alerts(heatmap, limit=limit) + tracked = len(heatmap.get("features") or []) + + return { + "alerts": alerts, + "tracked_regions": tracked, + "engine_regions": engine_regions, + "plotted_regions": plotted, + "max_regions": settings.max_heatmap_features, + "note": ( + "Layer count is tracked GT regions (cap " + f"{settings.max_heatmap_features}), not raw feed events. " + "Only regions with valid coordinates appear on the map." + ), + } \ No newline at end of file diff --git a/backend/analytics/gt_early_warning.py b/backend/analytics/gt_early_warning.py new file mode 100644 index 0000000..ae5c315 --- /dev/null +++ b/backend/analytics/gt_early_warning.py @@ -0,0 +1,593 @@ +"""Game-theoretic early warning analytics with Bayesian updating and contagion graph.""" + +from __future__ import annotations + +import logging +import re +import threading +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, DefaultDict + +import networkx as nx +import numpy as np + +from analytics.settings import GTAnalyticsSettings, get_gt_settings + +logger = logging.getLogger(__name__) + +DomainName = str # financial | unrest | conflict + +_DOMAINS: tuple[DomainName, ...] = ("financial", "unrest", "conflict") + +_DEFAULT_LIKELIHOODS: dict[DomainName, dict[str, float]] = { + "financial": {"distress": 0.75, "normal": 0.25}, + "unrest": {"distress": 0.82, "normal": 0.22}, + "conflict": {"distress": 0.78, "normal": 0.18}, +} + +_DEFAULT_SIGNAL_WEIGHTS: dict[str, float] = { + "payroll_loan": 3.0, + "supply_delay": 2.2, + "elite_relocation": 2.8, + "purge": 3.5, + "protest_mobilize": 2.5, + "gps_jamming": 2.7, + "troop_movement": 3.0, + "bank_run": 3.2, + "sanctions_escalation": 2.4, + "ceasefire_break": 2.6, +} + +# Costly-signal regex patterns (cheap talk filtered by absence of match). +_SIGNAL_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "payroll_loan": [ + re.compile(r"payroll\s+loan", re.I), + re.compile(r"merchant\s+cash\s+advance", re.I), + re.compile(r"working\s+capital\s+loan", re.I), + ], + "supply_delay": [ + re.compile(r"supply\s+(chain\s+)?delay", re.I), + re.compile(r"shipping\s+delay", re.I), + re.compile(r"logistics\s+backlog", re.I), + re.compile(r"port\s+congestion", re.I), + ], + "elite_relocation": [ + re.compile(r"elite\s+(asset\s+)?relocation", re.I), + re.compile(r"oligarch\s+jet", re.I), + re.compile(r"private\s+jet\s+exodus", re.I), + re.compile(r"capital\s+flight", re.I), + ], + "purge": [ + re.compile(r"\bpurge\b", re.I), + re.compile(r"political\s+purge", re.I), + re.compile(r"security\s+apparatus\s+reshuffle", re.I), + ], + "protest_mobilize": [ + re.compile(r"protest\s+mobil", re.I), + re.compile(r"mass\s+rally", re.I), + re.compile(r"general\s+strike", re.I), + re.compile(r"\bstrike\b", re.I), + re.compile(r"\brally\b", re.I), + ], + "gps_jamming": [ + re.compile(r"gps\s+jam", re.I), + re.compile(r"gnss\s+interference", re.I), + re.compile(r"spoofing\s+spike", re.I), + ], + "troop_movement": [ + re.compile(r"troop\s+movement", re.I), + re.compile(r"military\s+mobil", re.I), + re.compile(r"armored\s+convoy", re.I), + re.compile(r"troop\s+buildup", re.I), + ], + "bank_run": [ + re.compile(r"bank\s+run", re.I), + re.compile(r"deposit\s+flight", re.I), + re.compile(r"liquidity\s+crunch", re.I), + ], + "sanctions_escalation": [ + re.compile(r"sanctions?\s+escalat", re.I), + re.compile(r"new\s+sanctions?", re.I), + re.compile(r"export\s+controls?\s+tighten", re.I), + ], + "ceasefire_break": [ + re.compile(r"ceasefire\s+(broken|violated|collapse)", re.I), + re.compile(r"truce\s+end", re.I), + ], +} + +_SIGNAL_DOMAINS: dict[str, DomainName] = { + "payroll_loan": "financial", + "supply_delay": "financial", + "bank_run": "financial", + "sanctions_escalation": "financial", + "protest_mobilize": "unrest", + "purge": "unrest", + "elite_relocation": "financial", + "gps_jamming": "conflict", + "troop_movement": "conflict", + "ceasefire_break": "conflict", +} + + +@dataclass +class RegionState: + """Per-region Bayesian beliefs and metadata.""" + + priors: dict[DomainName, float] = field(default_factory=lambda: defaultdict(float)) + coords: list[float] | None = None + signal_volume: DefaultDict[str, float] = field(default_factory=lambda: defaultdict(float)) + update_count: int = 0 + + +@dataclass +class HistoryEntry: + timestamp: str + domain: DomainName + signals: dict[str, float] + strength: float + prior: float + posterior: float + source: str + deviation_score: float + + +class GT_EarlyWarning: + """ + Game-Theoretic Early Warning System with Bayesian updating. + + Tracks distress probabilities per region/domain, classifies costly signals vs + cheap talk, and propagates risk through an entity interaction graph. + """ + + def __init__(self, settings: GTAnalyticsSettings | None = None) -> None: + self.settings = settings or get_gt_settings() + self.G: nx.Graph = nx.Graph() + self._regions: dict[str, RegionState] = {} + self._history: dict[str, list[HistoryEntry]] = defaultdict(list) + self._seen_item_ids: set[str] = set() + self._lock = threading.RLock() + + self.likelihoods = dict(_DEFAULT_LIKELIHOODS) + self.signal_weights = dict(_DEFAULT_SIGNAL_WEIGHTS) + self.signal_weights.update(self.settings.signal_weight_overrides) + + self._base_prior = float(self.settings.base_prior) + + def _utcnow(self) -> str: + return datetime.now(timezone.utc).isoformat() + + def _region_state(self, region: str) -> RegionState: + key = str(region or "global").strip().lower() or "global" + if key not in self._regions: + state = RegionState() + for domain in _DOMAINS: + state.priors[domain] = self._base_prior + self._regions[key] = state + return self._regions[key] + + def get_prior(self, region: str, domain: DomainName) -> float: + with self._lock: + return float(self._region_state(region).priors.get(domain, self._base_prior)) + + def set_prior(self, region: str, domain: DomainName, value: float) -> None: + with self._lock: + state = self._region_state(region) + state.priors[domain] = float( + np.clip(value, self.settings.min_prob, self.settings.max_prob) + ) + + def composite_risk(self, region: str) -> float: + """Weighted composite across domains (conflict weighted highest).""" + weights = {"financial": 0.25, "unrest": 0.35, "conflict": 0.40} + with self._lock: + state = self._region_state(region) + total = 0.0 + weight_sum = 0.0 + for domain, weight in weights.items(): + total += float(state.priors.get(domain, self._base_prior)) * weight + weight_sum += weight + return float(total / weight_sum) if weight_sum else self._base_prior + + def classify_signals(self, text: str, source: str = "") -> dict[str, float]: + """Return weighted costly-signal strengths detected in text.""" + text_lower = (text or "").lower() + signals: dict[str, float] = {} + + for signal_name, patterns in _SIGNAL_PATTERNS.items(): + weight = float(self.signal_weights.get(signal_name, 1.0)) + if any(pattern.search(text_lower) for pattern in patterns): + signals[signal_name] = weight + + rally_strike_count = text_lower.count("rally") + text_lower.count("strike") + if rally_strike_count > 3: + signals["protest_mobilize"] = signals.get("protest_mobilize", 0.0) + 1.5 + + # Source credibility nudge (Telegram OSINT channels treated as moderate-cost signals). + if source and "t.me/" in source.lower() and signals: + for key in list(signals): + signals[key] = round(signals[key] * 1.05, 3) + + return signals + + def _deviation_score(self, region: str, domain: DomainName, strength: float) -> float: + """Deviation from rolling regional norm — herding/coordination detector input.""" + with self._lock: + state = self._region_state(region) + baseline = max(state.signal_volume[domain], 1.0) + state.signal_volume[domain] += strength + state.update_count += 1 + return float(strength / baseline) + + def bayesian_update( + self, + region: str, + domain: DomainName, + evidence_strength: float = 1.0, + ) -> float: + """ + Bayesian update: P(distress|evidence) from likelihood table and prior. + + evidence_strength scales how far belief moves toward the likelihood posterior. + """ + domain = domain if domain in _DOMAINS else "financial" + lik = self.likelihoods.get(domain, self.likelihoods["financial"]) + + with self._lock: + state = self._region_state(region) + prior = float(state.priors.get(domain, self._base_prior)) + + p_e_given_d = lik["distress"] + p_e_given_not_d = lik["normal"] + p_e = (p_e_given_d * prior) + (p_e_given_not_d * (1.0 - prior)) + + if p_e <= 0: + posterior = prior + else: + posterior = (p_e_given_d * prior) / p_e + + scaled = prior + (posterior - prior) * float(evidence_strength) + clipped = float(np.clip(scaled, self.settings.min_prob, self.settings.max_prob)) + state.priors[domain] = clipped + return clipped + + def _update_graph( + self, + region: str, + entities: list[str], + strength: float, + coords: list[float] | None, + ) -> None: + region_key = str(region or "global").strip().lower() or "global" + self.G.add_node(region_key, node_type="region", region=region_key) + if coords and len(coords) >= 2: + self.G.nodes[region_key]["coords"] = coords + + for entity in entities: + entity_key = str(entity).strip() + if not entity_key: + continue + self.G.add_node(entity_key, node_type="entity", region=region_key) + self.G.add_edge( + region_key, + entity_key, + weight=float(strength), + timestamp=self._utcnow(), + ) + + for i, e1 in enumerate(entities): + for e2 in entities[i + 1 :]: + k1, k2 = str(e1).strip(), str(e2).strip() + if not k1 or not k2: + continue + self.G.add_edge( + k1, + k2, + weight=float(strength), + timestamp=self._utcnow(), + ) + + def process_feed_item(self, item: dict[str, Any]) -> dict[str, Any]: + """Process one normalized feed item and update beliefs + contagion graph.""" + region = str(item.get("region") or item.get("geotag") or "global").strip().lower() + text = str(item.get("text") or "") + source = str(item.get("source") or "unknown") + explicit_domain = str(item.get("domain") or "").strip().lower() + entities = list(item.get("entities") or []) + coords = item.get("coords") + item_id = str(item.get("id") or f"{source}|{hash(text)}") + + if self.settings.watched_channels: + channel = "" + for entity in entities: + if str(entity).startswith("channel:"): + channel = str(entity).split(":", 1)[-1].lower() + break + if channel and channel not in {c.lower() for c in self.settings.watched_channels}: + return { + "region": region, + "skipped": True, + "reason": "channel_not_watched", + "risk_score": self.composite_risk(region), + "signals": {}, + } + + with self._lock: + if item_id and item_id in self._seen_item_ids: + return { + "region": region, + "skipped": True, + "reason": "duplicate", + "risk_score": self.composite_risk(region), + "signals": {}, + } + if item_id: + self._seen_item_ids.add(item_id) + + signals = self.classify_signals(text, source) + total_strength = float(sum(signals.values())) + + if total_strength <= 0: + return { + "region": region, + "risk_score": self.composite_risk(region), + "signals": {}, + "contagion_potential": self._get_contagion_score(region), + } + + domains_touched: set[DomainName] = set() + if explicit_domain in _DOMAINS: + domains_touched.add(explicit_domain) + for signal_name in signals: + domains_touched.add(_SIGNAL_DOMAINS.get(signal_name, explicit_domain or "financial")) + if not domains_touched: + domains_touched.add("financial") + + evidence_strength = min( + total_strength / max(self.settings.evidence_scale, 0.1), + self.settings.evidence_cap, + ) + + posteriors: dict[str, float] = {} + deviation = 0.0 + for domain in domains_touched: + prior = self.get_prior(region, domain) + deviation = max(deviation, self._deviation_score(region, domain, total_strength)) + posterior = self.bayesian_update( + region=region, + domain=domain, + evidence_strength=evidence_strength * (1.0 + 0.15 * deviation), + ) + posteriors[domain] = posterior + + if isinstance(coords, (list, tuple)) and len(coords) >= 2: + with self._lock: + state = self._region_state(region) + try: + state.coords = [float(coords[0]), float(coords[1])] + except (TypeError, ValueError): + pass + + self._update_graph(region, entities, total_strength, coords if isinstance(coords, list) else None) + + composite = self.composite_risk(region) + entry = HistoryEntry( + timestamp=self._utcnow(), + domain=explicit_domain if explicit_domain in _DOMAINS else next(iter(domains_touched)), + signals=signals, + strength=total_strength, + prior=self._base_prior, + posterior=composite, + source=source, + deviation_score=deviation, + ) + with self._lock: + history = self._history[region] + history.append(entry) + max_hist = max(10, int(self.settings.max_history_per_region)) + if len(history) > max_hist: + self._history[region] = history[-max_hist:] + + logger.info( + "GT update region=%s domains=%s composite=%.3f signals=%d deviation=%.2f", + region, + ",".join(sorted(domains_touched)), + composite, + len(signals), + deviation, + ) + + return { + "region": region, + "domains": sorted(domains_touched), + "domain_posteriors": posteriors, + "risk_score": composite, + "signals": signals, + "deviation_score": deviation, + "contagion_potential": self._get_contagion_score(region), + "interpretation": self._interpret_risk(composite), + } + + def _interpret_risk(self, risk: float) -> str: + threshold = float(self.settings.high_risk_threshold) + if risk >= threshold: + return ( + f"Elevated strategic risk ({risk:.2f} ≥ {threshold:.2f}). " + "Watch for costly-signal clustering and cross-region contagion." + ) + if risk >= threshold * 0.7: + return "Moderate risk — monitor for herding and repeated costly signals." + return "Baseline risk — no strong costly-signal cluster detected." + + def _get_contagion_score(self, region: str) -> float: + """Graph-based contagion: mean composite risk of graph neighbors.""" + region_key = str(region or "global").strip().lower() or "global" + with self._lock: + if region_key not in self.G: + return 0.0 + try: + neighbors = list(self.G.neighbors(region_key)) + except nx.NetworkXError: + return 0.0 + if not neighbors: + return 0.0 + neighbor_risks = [self.composite_risk(str(n)) for n in neighbors] + return float(np.mean(neighbor_risks)) + + def compute_herding_clusters(self) -> list[dict[str, Any]]: + """Louvain community detection on entity graph (coordination/herding proxy).""" + with self._lock: + if self.G.number_of_edges() == 0: + return [] + + weighted = nx.Graph() + for u, v, data in self.G.edges(data=True): + weight = float(data.get("weight") or 0.0) + if weight < self.settings.louvain_min_weight: + continue + if weighted.has_edge(u, v): + weighted[u][v]["weight"] = weighted[u][v].get("weight", 0.0) + weight + else: + weighted.add_edge(u, v, weight=weight) + + if weighted.number_of_edges() == 0: + return [] + + try: + communities = list(nx.community.louvain_communities(weighted, weight="weight", seed=42)) + except Exception as exc: + logger.warning("Louvain clustering failed: %s", exc) + return [] + + clusters: list[dict[str, Any]] = [] + for idx, community in enumerate(communities): + members = sorted(str(node) for node in community) + region_members = [m for m in members if m in self._regions] + risks = [self.composite_risk(r) for r in region_members] + clusters.append( + { + "cluster_id": idx, + "size": len(members), + "members": members[:50], + "mean_risk": float(np.mean(risks)) if risks else self._base_prior, + "regions": region_members, + } + ) + clusters.sort(key=lambda row: row["mean_risk"], reverse=True) + return clusters + + def get_risk_heatmap(self) -> dict[str, Any]: + """GeoJSON FeatureCollection for frontend risk overlay.""" + features: list[dict[str, Any]] = [] + with self._lock: + items = list(self._regions.items())[: max(1, self.settings.max_heatmap_features)] + + for region, state in items: + coords = state.coords + geometry: dict[str, Any] + if coords and len(coords) >= 2: + geometry = {"type": "Point", "coordinates": [float(coords[1]), float(coords[0])]} + else: + geometry = {"type": "Point", "coordinates": [0.0, 0.0]} + + composite = self.composite_risk(region) + features.append( + { + "type": "Feature", + "properties": { + "region": region, + "risk": round(composite, 4), + "financial": round(float(state.priors.get("financial", self._base_prior)), 4), + "unrest": round(float(state.priors.get("unrest", self._base_prior)), 4), + "conflict": round(float(state.priors.get("conflict", self._base_prior)), 4), + "contagion": round(self._get_contagion_score(region), 4), + "updates": state.update_count, + }, + "geometry": geometry, + } + ) + + return {"type": "FeatureCollection", "features": features} + + def get_dossier(self, region: str) -> dict[str, Any]: + """Explainable GT rationale and recent signal history for a region.""" + region_key = str(region or "global").strip().lower() or "global" + with self._lock: + state = self._region_state(region_key) + recent = list(self._history.get(region_key, [])[-10:]) + + composite = self.composite_risk(region_key) + return { + "region": region_key, + "current_risk": round(composite, 4), + "domain_risks": { + domain: round(float(state.priors.get(domain, self._base_prior)), 4) + for domain in _DOMAINS + }, + "recent_signals": [ + { + "timestamp": entry.timestamp, + "domain": entry.domain, + "signals": entry.signals, + "strength": entry.strength, + "posterior": round(entry.posterior, 4), + "source": entry.source, + "deviation_score": round(entry.deviation_score, 3), + } + for entry in recent + ], + "contagion_risk": round(self._get_contagion_score(region_key), 4), + "herding_clusters": self.compute_herding_clusters()[:5], + "interpretation": self._interpret_risk(composite), + "scenarios": self._build_scenarios(region_key, composite), + } + + def _build_scenarios(self, region: str, composite: float) -> list[dict[str, str]]: + threshold = float(self.settings.high_risk_threshold) + if composite < threshold * 0.7: + return [ + { + "name": "Status quo", + "summary": "Signals remain diffuse; no coordinated costly-signal cascade.", + } + ] + if composite < threshold: + return [ + { + "name": "Escalation watch", + "summary": "Rising costly-signal density — coordination risk within 4-8 weeks.", + }, + { + "name": "False alarm", + "summary": "Cheap-talk amplification without follow-on costly signals.", + }, + ] + return [ + { + "name": "Contagion spread", + "summary": "High posterior + graph coupling — adjacent regions likely to update upward.", + }, + { + "name": "Localized shock", + "summary": "Region-specific distress; contagion limited if graph neighbors stay quiet.", + }, + ] + + def snapshot(self) -> dict[str, Any]: + """Serialize engine state for debugging or persistence.""" + with self._lock: + return { + "regions": { + region: { + "priors": dict(state.priors), + "coords": state.coords, + "updates": state.update_count, + } + for region, state in self._regions.items() + }, + "graph_nodes": self.G.number_of_nodes(), + "graph_edges": self.G.number_of_edges(), + "processed_items": len(self._seen_item_ids), + } \ No newline at end of file diff --git a/backend/analytics/historical_events.py b/backend/analytics/historical_events.py new file mode 100644 index 0000000..6239b11 --- /dev/null +++ b/backend/analytics/historical_events.py @@ -0,0 +1,649 @@ +"""Curated historical early-warning cases for GT backtesting. + +Each positive case bundles pre-crisis costly-signal snippets drawn from documented +precursors (financial, unrest, conflict). Negative cases are cheap-talk controls. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +CaseKind = Literal["positive", "negative"] + + +@dataclass(frozen=True) +class BacktestFeed: + text: str + source: str = "backtest" + domain: str = "financial" + days_before_event: int = 30 + + +@dataclass(frozen=True) +class HistoricalCase: + """Single labeled backtest scenario.""" + + case_id: str + name: str + region: str + domain: str + kind: CaseKind + event_date: str + description: str + feeds: tuple[BacktestFeed, ...] = field(default_factory=tuple) + tags: tuple[str, ...] = field(default_factory=tuple) + + def to_feed_dicts(self) -> list[dict[str, Any]]: + items: list[dict[str, Any]] = [] + for idx, feed in enumerate(self.feeds): + items.append( + { + "id": f"{self.case_id}-{idx}", + "text": feed.text, + "source": feed.source, + "region": self.region, + "domain": feed.domain or self.domain, + "published": feed.days_before_event, + } + ) + return items + + +def _variant_case(case: HistoricalCase, suffix: str, feeds: tuple[BacktestFeed, ...]) -> HistoricalCase: + return HistoricalCase( + case_id=f"{case.case_id}__{suffix}", + name=f"{case.name} ({suffix})", + region=case.region, + domain=case.domain, + kind=case.kind, + event_date=case.event_date, + description=case.description, + feeds=feeds, + tags=case.tags + (f"variant:{suffix}",), + ) + + +def expanded_historical_cases() -> tuple[HistoricalCase, ...]: + """Base suite plus paraphrase variants for statistical confidence.""" + base = list(default_historical_cases()) + extras: list[HistoricalCase] = [] + + variant_feeds: dict[str, tuple[tuple[BacktestFeed, ...], ...]] = { + "fin_2008_us": ( + ( + BacktestFeed( + "Small businesses turn to payroll loan products as credit lines freeze.", + domain="financial", + days_before_event=100, + ), + BacktestFeed( + "FDIC monitors liquidity crunch; interbank spreads widen sharply.", + domain="financial", + days_before_event=60, + ), + ), + ( + BacktestFeed( + "Merchant cash advance volumes spike; payroll loan demand at record highs.", + domain="financial", + days_before_event=80, + ), + BacktestFeed( + "Money market funds see inflows as deposit flight from regional banks continues.", + domain="financial", + days_before_event=40, + ), + ), + ), + "fin_2020_supply": ( + ( + BacktestFeed( + "Electronics firms report shipping delay and port congestion across Pearl River Delta.", + domain="financial", + days_before_event=45, + ), + BacktestFeed( + "Supply chain delay widens; logistics backlog hits automotive suppliers.", + domain="financial", + days_before_event=20, + ), + ), + ( + BacktestFeed( + "Container shortage fuels shipping delay; supply chain delay indices jump.", + domain="financial", + days_before_event=35, + ), + BacktestFeed( + "Electronics assemblers warn of logistics backlog as port congestion spreads.", + domain="financial", + days_before_event=20, + ), + BacktestFeed( + "Automotive suppliers flag supply chain delay after factory shutdowns in Hubei.", + domain="financial", + days_before_event=10, + ), + ), + ), + "fin_2022_sanctions": ( + ( + BacktestFeed( + "Treasury drafts new sanctions escalation package on energy and finance sectors.", + domain="financial", + days_before_event=30, + ), + BacktestFeed( + "Capital flight accelerates; elite relocation flights depart Moscow airports.", + domain="financial", + days_before_event=14, + ), + ), + ), + "unrest_arab_spring_egypt": ( + ( + BacktestFeed( + "Cairo activists schedule mass rally; protest mobilization leaflets distributed.", + domain="unrest", + days_before_event=18, + ), + BacktestFeed( + "Labor federations call general strike; strike posters cover downtown.", + domain="unrest", + days_before_event=8, + ), + ), + ), + "conflict_2022_ukraine": ( + ( + BacktestFeed( + "Convoy of armored vehicles confirms troop movement near Sumy Oblast.", + source="t.me/war_monitor", + domain="conflict", + days_before_event=20, + ), + BacktestFeed( + "GNSS interference warnings follow GPS jamming spike along Belarus border.", + source="t.me/osintdefender", + domain="conflict", + days_before_event=10, + ), + ), + ( + BacktestFeed( + "Military mobilization notices circulate; troop buildup confirmed by satellite firms.", + domain="conflict", + days_before_event=12, + ), + ), + ), + "neg_weather_us": ( + ( + BacktestFeed("Autumn foliage peaks in Vermont; pleasant hiking weather continues."), + BacktestFeed("County fair announces pie contest and livestock exhibitions."), + ), + ( + BacktestFeed("Meteorologists predict mild hurricane season remainder for Gulf Coast."), + ), + ), + "neg_sports_uk": ( + ( + BacktestFeed("Rugby Six Nations standings update after weekend fixtures."), + BacktestFeed("Local marathon registration opens for charity runners."), + ), + ), + "neg_tech_global": ( + ( + BacktestFeed("Chipmaker announces efficiency gains in next-generation processor."), + BacktestFeed("Cloud provider opens new green datacenter in Nordic region."), + ), + ), + } + + for case in base: + variants = variant_feeds.get(case.case_id, ()) + for idx, feeds in enumerate(variants): + extras.append(_variant_case(case, f"v{idx+1}", feeds)) + + # Additional cheap-talk controls to widen negative sample + cheap_talk_regions = ( + ("australia", "Museum opens contemporary art exhibit to strong attendance."), + ("spain", "Tomato harvest festival scheduled; regional trains add weekend service."), + ("south_korea", "K-pop group announces world tour dates for autumn."), + ("mexico", "Coastal cleanup volunteers restore beach habitats before holiday season."), + ("sweden", "City council approves bike lane expansion along waterfront."), + ("norway", "Salmon exports remain stable; fishing fleets report normal catch volumes."), + ("italy", "Truffle festival returns; restaurants publish seasonal tasting menus."), + ("poland", "University researchers release open-source astronomy software."), + ("thailand", "Monsoon rains ease; rice planting proceeds on normal schedule."), + ("vietnam", "Electronics assembly plants report steady export order books."), + ("south_africa", "Wildlife reserve reports rising ecotourism bookings."), + ("argentina", "Wine harvest festival opens; export cooperatives meet volume targets."), + ("netherlands", "Cycling championship draws international teams to canal district."), + ("belgium", "Chocolate exporters report stable holiday shipment schedules."), + ("portugal", "Offshore wind auction attracts multiple renewable bidders."), + ("greece", "Island ferry operators add routes ahead of summer travel season."), + ("turkey", "Cotton harvest forecast unchanged; textile orders stable."), + ("indonesia", "Volcano monitoring reports routine activity; tourism continues."), + ("philippines", "Coconut processors report normal logistics to export markets."), + ("malaysia", "Palm oil shipments on schedule; port throughput normal."), + ("new_zealand", "Sheep shearing competition draws rural crowds."), + ("ireland", "Tech conference highlights open-source database tooling."), + ("finland", "Sauna culture festival celebrates heritage with local artisans."), + ("denmark", "Wind turbine maintenance contracts renewed on prior terms."), + ("austria", "Ski resorts prepare slopes after early snowfall."), + ("switzerland", "Watchmakers unveil mechanical movement prototypes at trade fair."), + ("czech_republic", "Glassmakers export decorative pieces ahead of holiday season."), + ("romania", "Carpathian hiking trails reopen after spring maintenance."), + ("hungary", "Thermal bath tourism bookings rise for winter wellness season."), + ("peru", "Coffee cooperatives report stable harvest and export schedules."), + ("colombia", "Flower exporters prepare Valentine's shipments on normal cadence."), + ("morocco", "Citrus harvest meets forecasts; agricultural credit unchanged."), + ("kenya", "Tea auction volumes steady; freight routes operate normally."), + ("nigeria", "Nollywood studio announces family comedy release dates."), + ("ethiopia", "Coffee ceremony festival highlights regional bean varieties."), + ("saudi_arabia", "Desert conservation project plants drought-resistant shrubs."), + ("uae", "Airport duty-free operators expand luxury retail concourse."), + ("qatar", "Stadium operators prepare hospitality packages for sporting events."), + ("singapore", "Port authority reports container throughput on seasonal trend."), + ("hong_kong", "Art auction previews draw collectors to harborfront gallery."), + ("chile", "Vineyard tours report strong bookings ahead of harvest festival weekend."), + ("uruguay", "Beef exporters maintain steady shipment schedules to European buyers."), + ("iceland", "Geothermal spa resorts report normal winter visitor volumes."), + ("luxembourg", "Fund administrators publish routine quarterly disclosure filings."), + ("slovakia", "Mountain lodges prepare ski season openings after early snowfall."), + ("croatia", "Adriatic ferry operators add summer routes on prior timetable."), + ("bulgaria", "Rose oil cooperatives report stable export volumes to fragrance buyers."), + ("serbia", "Danube barge traffic proceeds on normal freight schedules."), + ("latvia", "Timber mills export lumber on unchanged contract terms."), + ("lithuania", "Baltic wind farms complete scheduled turbine maintenance rotations."), + ("estonia", "Digital residency applications processed at routine monthly pace."), + ("panama", "Canal transit volumes remain on seasonal trend; shipping fees unchanged."), + ) + for idx, (region, text) in enumerate(cheap_talk_regions): + extras.append( + HistoricalCase( + case_id=f"neg_extra_{idx:02d}", + name=f"Benign regional news ({region})", + region=region, + domain="financial", + kind="negative", + event_date="2020-01-01", + description="Expanded cheap-talk control.", + feeds=(BacktestFeed(text),), + tags=("control", "expanded"), + ) + ) + + return tuple(base + extras) + + +def default_historical_cases() -> tuple[HistoricalCase, ...]: + """Benchmark suite — expand as new validated precursors are added.""" + return ( + # ── Financial distress ───────────────────────────────────────────── + HistoricalCase( + case_id="fin_2008_us", + name="2008 US financial crisis", + region="united_states", + domain="financial", + kind="positive", + event_date="2008-09-15", + description="Payroll-loan distress, liquidity crunch, and deposit flight precursors.", + tags=("2008", "financial", "lehman"), + feeds=( + BacktestFeed( + "Franchise operators increasingly rely on payroll loan facilities as working capital tightens.", + domain="financial", + days_before_event=120, + ), + BacktestFeed( + "Regional banks report liquidity crunch; CFOs warn of merchant cash advance reliance.", + domain="financial", + days_before_event=90, + ), + BacktestFeed( + "Deposit flight accelerates at mid-size lenders; analysts flag bank run risk.", + domain="financial", + days_before_event=45, + ), + ), + ), + HistoricalCase( + case_id="fin_2020_supply", + name="COVID supply-chain shock", + region="china", + domain="financial", + kind="positive", + event_date="2020-02-01", + description="Port congestion and logistics backlog ahead of global supply shock.", + tags=("covid", "supply_chain", "financial"), + feeds=( + BacktestFeed( + "Major port congestion reported; shipping delay spreads to electronics suppliers.", + domain="financial", + days_before_event=60, + ), + BacktestFeed( + "Automakers warn of supply chain delay and logistics backlog across Wuhan corridor.", + domain="financial", + days_before_event=30, + ), + BacktestFeed( + "Factory restarts slip as supply delay and port congestion persist into Q1.", + domain="financial", + days_before_event=14, + ), + ), + ), + HistoricalCase( + case_id="fin_2022_sanctions", + name="Russia sanctions escalation", + region="russia", + domain="financial", + kind="positive", + event_date="2022-02-24", + description="Sanctions escalation and capital flight ahead of invasion.", + tags=("sanctions", "ukraine", "financial"), + feeds=( + BacktestFeed( + "Western allies prepare new sanctions escalation on major Russian banks.", + domain="financial", + days_before_event=45, + ), + BacktestFeed( + "Oligarch jet movements suggest elite relocation and capital flight from Moscow.", + domain="financial", + days_before_event=21, + ), + BacktestFeed( + "Central bank intervenes as new sanctions tighten export controls on finance sector.", + domain="financial", + days_before_event=10, + ), + ), + ), + # ── Civil unrest ───────────────────────────────────────────────── + HistoricalCase( + case_id="unrest_arab_spring_tunisia", + name="Arab Spring — Tunisia", + region="tunisia", + domain="unrest", + kind="positive", + event_date="2010-12-17", + description="Protest mobilization and strike waves before Jasmine Revolution.", + tags=("arab_spring", "unrest"), + feeds=( + BacktestFeed( + "Student groups announce protest mobilization after vendor self-immolation.", + domain="unrest", + days_before_event=14, + ), + BacktestFeed( + "Mass rally planned in Tunis; general strike called by labor unions.", + domain="unrest", + days_before_event=7, + ), + ), + ), + HistoricalCase( + case_id="unrest_arab_spring_egypt", + name="Arab Spring — Egypt", + region="egypt", + domain="unrest", + kind="positive", + event_date="2011-01-25", + description="Mobilization spikes and security reshuffles before Tahrir.", + tags=("arab_spring", "unrest"), + feeds=( + BacktestFeed( + "Opposition calls protest mobilization in Cairo; strike notices circulate online.", + domain="unrest", + days_before_event=21, + ), + BacktestFeed( + "Reports of political purge within interior ministry security apparatus reshuffle.", + domain="unrest", + days_before_event=10, + ), + BacktestFeed( + "Mass rally and strike coordination spreads; rally posters appear in Alexandria.", + domain="unrest", + days_before_event=5, + ), + ), + ), + HistoricalCase( + case_id="unrest_2019_chile", + name="Chile 2019 metro protests", + region="chile", + domain="unrest", + kind="positive", + event_date="2019-10-18", + description="Transit fare protests escalate to general strike.", + tags=("unrest", "latam"), + feeds=( + BacktestFeed( + "Students organize mass rally after metro fare hike; protest mobilization trending.", + domain="unrest", + days_before_event=10, + ), + BacktestFeed( + "Unions announce general strike; rally and strike hashtags spike nationwide.", + domain="unrest", + days_before_event=3, + ), + ), + ), + # ── Conflict / war ─────────────────────────────────────────────── + HistoricalCase( + case_id="conflict_2022_ukraine", + name="2022 Ukraine invasion buildup", + region="ukraine", + domain="conflict", + kind="positive", + event_date="2022-02-24", + description="Troop movement and GPS jamming precursors on northern border.", + tags=("ukraine", "conflict"), + feeds=( + BacktestFeed( + "OSINT reports troop movement and armored convoy near Belarus border.", + source="t.me/war_monitor", + domain="conflict", + days_before_event=30, + ), + BacktestFeed( + "GPS jamming spike reported along northern corridor; GNSS interference warnings issued.", + source="t.me/osintdefender", + domain="conflict", + days_before_event=14, + ), + BacktestFeed( + "Satellite imagery shows troop buildup; military mobilization near Kharkiv axis.", + domain="conflict", + days_before_event=7, + ), + ), + ), + HistoricalCase( + case_id="conflict_2023_gaza", + name="2023 Gaza conflict escalation", + region="israel", + domain="conflict", + kind="positive", + event_date="2023-10-07", + description="Ceasefire breakdown and troop movement signals.", + tags=("gaza", "conflict"), + feeds=( + BacktestFeed( + "Border units report troop movement near Gaza envelope; ceasefire broken overnight.", + domain="conflict", + days_before_event=14, + ), + BacktestFeed( + "Truce end announced; armored convoy repositioning reported by local observers.", + domain="conflict", + days_before_event=5, + ), + ), + ), + HistoricalCase( + case_id="conflict_2020_nagorno", + name="2020 Nagorno-Karabakh renewal", + region="armenia", + domain="conflict", + kind="positive", + event_date="2020-09-27", + description="Artillery and troop buildup precursors.", + tags=("caucasus", "conflict"), + feeds=( + BacktestFeed( + "Drone strikes reported on line of contact; troop movement on Armenian-Azeri border.", + domain="conflict", + days_before_event=21, + ), + BacktestFeed( + "GPS jamming spike reported in conflict zone; military mobilization notices leaked.", + domain="conflict", + days_before_event=7, + ), + ), + ), + # ── Recent financial / corporate distress pattern ──────────────── + HistoricalCase( + case_id="fin_2023_banking", + name="2023 regional banking stress", + region="united_states", + domain="financial", + kind="positive", + event_date="2023-03-10", + description="Deposit flight and liquidity stress (SVB precursor pattern).", + tags=("svb", "financial", "2023"), + feeds=( + BacktestFeed( + "Tech lenders face deposit flight; VC portfolio companies move payroll to money market funds.", + domain="financial", + days_before_event=21, + ), + BacktestFeed( + "Analysts warn liquidity crunch at regional banks holding long-duration bonds.", + domain="financial", + days_before_event=7, + ), + ), + ), + # ── Negative controls (cheap talk / benign) ───────────────────── + HistoricalCase( + case_id="neg_weather_us", + name="Benign weather coverage", + region="united_states", + domain="financial", + kind="negative", + event_date="2019-06-01", + description="No costly signals — should remain near baseline.", + tags=("control",), + feeds=( + BacktestFeed("Sunny weekend expected across the Midwest with mild temperatures."), + BacktestFeed("Local festival draws crowds; farmers market expands summer hours."), + ), + ), + HistoricalCase( + case_id="neg_sports_uk", + name="Benign sports coverage", + region="uk", + domain="unrest", + kind="negative", + event_date="2018-07-01", + description="Sports chatter without mobilization costly signals.", + tags=("control",), + feeds=( + BacktestFeed("Premier league season review: top scorers and transfer rumors."), + BacktestFeed("Cricket test match ends early due to rain delay at Lord's."), + ), + ), + HistoricalCase( + case_id="neg_tech_global", + name="Benign tech product launch", + region="global", + domain="financial", + kind="negative", + event_date="2021-09-01", + description="Corporate product news without distress markers.", + tags=("control",), + feeds=( + BacktestFeed("Smartphone maker unveils new camera features at annual keynote."), + BacktestFeed("Quarterly earnings beat expectations; dividend unchanged."), + ), + ), + HistoricalCase( + case_id="neg_tourism_france", + name="Benign tourism recovery", + region="france", + domain="unrest", + kind="negative", + event_date="2022-08-01", + description="Travel sector recovery without unrest signals.", + tags=("control",), + feeds=( + BacktestFeed("Paris hotels report record summer bookings as tourism rebounds."), + BacktestFeed("Airline adds routes to Nice and Marseille for holiday travelers."), + ), + ), + HistoricalCase( + case_id="neg_science_japan", + name="Benign science news", + region="japan", + domain="conflict", + kind="negative", + event_date="2020-11-01", + description="Research coverage without conflict markers.", + tags=("control",), + feeds=( + BacktestFeed("Astronomy team publishes comet observations from Mount Fuji observatory."), + BacktestFeed("Robotics lab demonstrates warehouse automation prototype."), + ), + ), + HistoricalCase( + case_id="neg_agriculture_brazil", + name="Benign agriculture report", + region="brazil", + domain="financial", + kind="negative", + event_date="2017-03-01", + description="Commodity harvest update without supply distress.", + tags=("control",), + feeds=( + BacktestFeed("Soybean harvest forecast revised upward; export volumes steady."), + BacktestFeed("Coffee cooperative reports normal shipping schedules to European buyers."), + ), + ), + HistoricalCase( + case_id="neg_culture_india", + name="Benign culture coverage", + region="india", + domain="unrest", + kind="negative", + event_date="2016-11-01", + description="Festival coverage without mobilization.", + tags=("control",), + feeds=( + BacktestFeed("Diwali celebrations begin; cities decorate markets with lights."), + BacktestFeed("Film festival opens in Mumbai with premiere screenings."), + ), + ), + HistoricalCase( + case_id="neg_infrastructure_canada", + name="Benign infrastructure ribbon-cutting", + region="canada", + domain="financial", + kind="negative", + event_date="2015-05-01", + description="Municipal news without financial stress.", + tags=("control",), + feeds=( + BacktestFeed("New light-rail segment opens on schedule; commute times improve."), + BacktestFeed("Municipal bond issuance funds library renovation at prior rates."), + ), + ), + ) \ No newline at end of file diff --git a/backend/analytics/integration.py b/backend/analytics/integration.py new file mode 100644 index 0000000..82ac3cd --- /dev/null +++ b/backend/analytics/integration.py @@ -0,0 +1,198 @@ +"""Singleton GT engine and feed-batch integration hooks.""" + +from __future__ import annotations + +import logging +import threading +from datetime import datetime, timezone +from typing import Any + +from analytics.feed_adapter import iter_gdelt_features, iter_news_items, iter_telegram_posts +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.settings import gt_analytics_enabled, get_gt_settings, gt_engine_operational, gt_louvain_enabled, gt_scheduled_ingest_enabled +from services.fetchers._store import _data_lock, _mark_fresh, latest_data + +logger = logging.getLogger(__name__) + +_engine: GT_EarlyWarning | None = None +_engine_lock = threading.Lock() + + +def get_gt_engine() -> GT_EarlyWarning | None: + """Return the shared engine when analytics are enabled and runtime allows it.""" + global _engine + if not gt_engine_operational(): + return None + with _engine_lock: + if _engine is None: + _engine = GT_EarlyWarning(get_gt_settings()) + logger.info("Strategic Risk Analytics engine initialized") + return _engine + + +def reset_gt_engine() -> None: + """Reset singleton — intended for tests.""" + global _engine + get_gt_settings.cache_clear() + with _engine_lock: + _engine = None + + +def process_feed_item(item: dict[str, Any]) -> dict[str, Any] | None: + """Process a normalized feed item if analytics are enabled.""" + engine = get_gt_engine() + if engine is None: + return None + try: + return engine.process_feed_item(item) + except Exception: + logger.exception("GT process_feed_item failed") + return None + + +def _persist_gt_snapshot( + engine: GT_EarlyWarning, + *, + processed: int, + sample: list[dict[str, Any]] | None = None, +) -> dict[str, Any]: + timestamp = datetime.now(timezone.utc).isoformat() + heatmap = engine.get_risk_heatmap() + micro_summary: dict[str, Any] = {} + try: + from analytics.micro_rolling import capture_daily_readings, enrich_heatmap_features + + micro_summary = capture_daily_readings(engine) + heatmap = enrich_heatmap_features(heatmap) + except Exception: + logger.exception("GT micro rolling capture failed") + + clusters = engine.compute_herding_clusters() + from analytics.gt_alerts import parse_heatmap_alerts + + _, plotted_regions = parse_heatmap_alerts(heatmap) + with engine._lock: # noqa: SLF001 — snapshot meta + engine_regions = len(engine._regions) + settings = get_gt_settings() + payload = { + "enabled": True, + "timestamp": timestamp, + "processed": processed, + "heatmap": heatmap, + "clusters": clusters, + "sample": list(sample or [])[:5], + "regions": len(heatmap.get("features") or []), + "micro": micro_summary, + "meta": { + "tracked_regions": len(heatmap.get("features") or []), + "engine_regions": engine_regions, + "plotted_regions": plotted_regions, + "max_regions": settings.max_heatmap_features, + }, + } + with _data_lock: + latest_data["gt_risk"] = payload + _mark_fresh("gt_risk") + return payload + + +def refresh_from_latest_data( + data_snapshot: dict[str, Any], + *, + persist: bool = True, +) -> dict[str, Any]: + """ + Batch-ingest recent intel layers from the shared data store. + + Intended to run after telegram/news/gdelt fetch cycles (near-real-time). + """ + engine = get_gt_engine() + if engine is None: + return {"enabled": False, "processed": 0} + + processed = 0 + results: list[dict[str, Any]] = [] + + for item in iter_telegram_posts(data_snapshot.get("telegram_osint")): + result = engine.process_feed_item(item) + if result and not result.get("skipped"): + processed += 1 + results.append(result) + + for item in iter_news_items(data_snapshot.get("news")): + result = engine.process_feed_item(item) + if result and not result.get("skipped"): + processed += 1 + if len(results) < 5: + results.append(result) + + for item in iter_gdelt_features(data_snapshot.get("gdelt")): + result = engine.process_feed_item(item) + if result and not result.get("skipped"): + processed += 1 + + logger.info("GT refresh processed %d items", processed) + summary = { + "enabled": True, + "processed": processed, + "sample": results[:5], + "heatmap_features": len(engine.get_risk_heatmap().get("features") or []), + } + if persist: + snapshot = _persist_gt_snapshot(engine, processed=processed, sample=results) + summary["timestamp"] = snapshot.get("timestamp") + summary["clusters"] = len(snapshot.get("clusters") or []) + return summary + + +def recompute_gt_herding_clusters() -> dict[str, Any]: + """Louvain community pass — run on a schedule independent of feed ingest.""" + if not gt_louvain_enabled(): + return {"enabled": False, "clusters": 0, "reason": "louvain_disabled_on_lean_profile"} + + engine = get_gt_engine() + if engine is None: + return {"enabled": False, "clusters": 0} + + clusters = engine.compute_herding_clusters() + timestamp = datetime.now(timezone.utc).isoformat() + with _data_lock: + current = dict(latest_data.get("gt_risk") or {}) + current["clusters"] = clusters + current["clusters_updated"] = timestamp + current["enabled"] = True + latest_data["gt_risk"] = current + _mark_fresh("gt_risk") + logger.info("GT Louvain recompute: %d clusters", len(clusters)) + return {"enabled": True, "clusters": len(clusters), "timestamp": timestamp} + + +def maybe_refresh_gt_analytics() -> None: + """Hook for data_fetcher — no-op when analytics are disabled or lean-gated.""" + if not gt_scheduled_ingest_enabled(): + return + try: + with _data_lock: + snapshot = dict(latest_data) + refresh_from_latest_data(snapshot, persist=True) + except Exception: + logger.exception("GT analytics refresh failed") + + +def maybe_freeze_gt_weekly_snapshot() -> None: + """Hook for weekly scheduler — freeze operational backtest snapshot.""" + if not gt_engine_operational(): + return + try: + from analytics.rolling_backtest import freeze_weekly_snapshot + + result = freeze_weekly_snapshot(frozen_by="scheduler") + if result.get("created"): + logger.info( + "GT rolling freeze: week=%s regions=%s alerts=%s", + result.get("week_id"), + result.get("region_count"), + result.get("alert_count"), + ) + except Exception: + logger.exception("GT rolling weekly freeze failed") \ No newline at end of file diff --git a/backend/analytics/micro_rolling.py b/backend/analytics/micro_rolling.py new file mode 100644 index 0000000..a9da63b --- /dev/null +++ b/backend/analytics/micro_rolling.py @@ -0,0 +1,361 @@ +"""Micro rolling 3-day average — fast ignition signal alongside weekly macro.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from datetime import date, datetime, timedelta, timezone +from typing import Any + +from analytics.daily_store import ( + DailyRegionReading, + DailySnapshot, + date_id, + list_daily_ids, + load_daily, + save_daily, + utc_now_iso, + utc_today, +) +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.rolling_backtest import rolling_alert_threshold + +DEFAULT_WINDOW_DAYS = 3 +DEFAULT_IGNITION_DELTA = 0.10 + + +def _env_int(name: str, default: int) -> int: + raw = str(os.environ.get(name, "")).strip() + if not raw: + return default + try: + return max(1, int(raw)) + except ValueError: + return default + + +def _env_float(name: str, default: float) -> float: + raw = str(os.environ.get(name, "")).strip() + if not raw: + return default + try: + return float(raw) + except ValueError: + return default + + +def micro_window_days() -> int: + return _env_int("GT_MICRO_ROLLING_DAYS", DEFAULT_WINDOW_DAYS) + + +def ignition_delta() -> float: + return _env_float("GT_MICRO_IGNITION_DELTA", DEFAULT_IGNITION_DELTA) + + +def _peak_score( + *, + composite: float, + financial: float, + unrest: float, + conflict: float, +) -> float: + return max(composite, financial, unrest, conflict) + + +def _region_reading_from_feature( + feature: dict[str, Any], + *, + captured_at: str, +) -> DailyRegionReading | None: + props = feature.get("properties") or {} + region = str(props.get("region") or "").strip().lower() + if not region: + return None + composite = float(props.get("risk") or props.get("composite_risk") or 0.0) + financial = float(props.get("financial") or 0.0) + unrest = float(props.get("unrest") or 0.0) + conflict = float(props.get("conflict") or 0.0) + peak = _peak_score( + composite=composite, + financial=financial, + unrest=unrest, + conflict=conflict, + ) + return DailyRegionReading( + region=region, + composite_risk=composite, + financial=financial, + unrest=unrest, + conflict=conflict, + peak_score=peak, + readings=1, + last_captured_at=captured_at, + ) + + +def capture_daily_readings( + engine: GT_EarlyWarning, + *, + when: date | None = None, +) -> dict[str, Any]: + """ + Upsert today's regional readings from the live heatmap. + + Each GT refresh updates the current day's latest scores (rolling window + uses one value per calendar day). + """ + day = when or utc_today() + day_key = date_id(day) + captured_at = utc_now_iso() + heatmap = engine.get_risk_heatmap() + existing = load_daily(day) or DailySnapshot(date=day_key, regions={}) + + updated = 0 + for feature in heatmap.get("features") or []: + if not isinstance(feature, dict): + continue + reading = _region_reading_from_feature(feature, captured_at=captured_at) + if reading is None: + continue + prior = existing.regions.get(reading.region) + if prior is None: + existing.regions[reading.region] = reading + updated += 1 + continue + prior.composite_risk = reading.composite_risk + prior.financial = reading.financial + prior.unrest = reading.unrest + prior.conflict = reading.conflict + prior.peak_score = max(prior.peak_score, reading.peak_score) + prior.readings += 1 + prior.last_captured_at = captured_at + updated += 1 + + existing.last_updated_at = captured_at + save_daily(existing) + return { + "date": day_key, + "regions": len(existing.regions), + "updated": updated, + "captured_at": captured_at, + } + + +@dataclass(frozen=True) +class MicroRegionView: + region: str + spot_risk: float + risk_3d_avg: float + risk_delta: float + days_in_window: int + day_scores: tuple[float, ...] + alerted_spot: bool + alerted_3d: bool + ignition: bool + financial: float + unrest: float + conflict: float + + def to_dict(self) -> dict[str, Any]: + return { + "region": self.region, + "spot_risk": round(self.spot_risk, 4), + "risk_3d_avg": round(self.risk_3d_avg, 4), + "risk_delta": round(self.risk_delta, 4), + "days_in_window": self.days_in_window, + "day_scores": [round(score, 4) for score in self.day_scores], + "alerted_spot": self.alerted_spot, + "alerted_3d": self.alerted_3d, + "ignition": self.ignition, + "financial": round(self.financial, 4), + "unrest": round(self.unrest, 4), + "conflict": round(self.conflict, 4), + } + + +def _day_offsets(window_days: int) -> list[int]: + # Today + prior (window_days - 1) days. + return list(range(window_days - 1, -1, -1)) + + +def _historical_dates(as_of: date, window_days: int) -> list[date]: + return [as_of - timedelta(days=offset) for offset in _day_offsets(window_days)] + + +def compute_micro_view( + region: str, + *, + as_of: date | None = None, + window_days: int | None = None, + alert_threshold: float | None = None, + spot_reading: DailyRegionReading | None = None, +) -> MicroRegionView | None: + """Compute rolling N-day average and ignition vs spot for one region.""" + region_key = str(region or "").strip().lower() + if not region_key: + return None + + today = as_of or utc_today() + window = window_days or micro_window_days() + threshold = float(alert_threshold if alert_threshold is not None else rolling_alert_threshold()) + delta_min = ignition_delta() + + day_scores: list[float] = [] + latest: DailyRegionReading | None = spot_reading + + for day in _historical_dates(today, window): + snap = load_daily(day) + if snap is None: + continue + row = snap.regions.get(region_key) + if row is None: + continue + day_scores.append(row.peak_score) + if day == today: + latest = row + + if latest is None and day_scores: + # Spot may come from yesterday if today not captured yet. + snap = load_daily(today) + if snap: + latest = snap.regions.get(region_key) + + if latest is None and not day_scores: + return None + + spot = float(latest.peak_score if latest else (day_scores[-1] if day_scores else 0.0)) + avg = sum(day_scores) / len(day_scores) if day_scores else spot + risk_delta = spot - avg + ignition = risk_delta >= delta_min and spot >= threshold * 0.75 + + return MicroRegionView( + region=region_key, + spot_risk=spot, + risk_3d_avg=avg, + risk_delta=risk_delta, + days_in_window=len(day_scores), + day_scores=tuple(day_scores), + alerted_spot=spot >= threshold, + alerted_3d=avg >= threshold, + ignition=ignition, + financial=float(latest.financial if latest else 0.0), + unrest=float(latest.unrest if latest else 0.0), + conflict=float(latest.conflict if latest else 0.0), + ) + + +def compute_all_micro_views( + *, + as_of: date | None = None, + window_days: int | None = None, + alert_threshold: float | None = None, +) -> list[MicroRegionView]: + """Build micro views for all regions seen in the rolling window.""" + today = as_of or utc_today() + window = window_days or micro_window_days() + regions: set[str] = set() + + for day in _historical_dates(today, window): + snap = load_daily(day) + if snap is None: + continue + regions.update(snap.regions.keys()) + + views: list[MicroRegionView] = [] + for region in regions: + view = compute_micro_view( + region, + as_of=today, + window_days=window, + alert_threshold=alert_threshold, + ) + if view is not None: + views.append(view) + + views.sort(key=lambda row: (row.ignition, row.risk_delta, row.spot_risk), reverse=True) + return views + + +def enrich_heatmap_features( + heatmap: dict[str, Any], + *, + as_of: date | None = None, + window_days: int | None = None, + alert_threshold: float | None = None, +) -> dict[str, Any]: + """Attach micro rolling fields to heatmap GeoJSON features.""" + threshold = float(alert_threshold if alert_threshold is not None else rolling_alert_threshold()) + window = window_days or micro_window_days() + features = heatmap.get("features") or [] + enriched: list[dict[str, Any]] = [] + + for feature in features: + if not isinstance(feature, dict): + continue + props = dict(feature.get("properties") or {}) + region = str(props.get("region") or "").strip().lower() + view = compute_micro_view( + region, + as_of=as_of, + window_days=window, + alert_threshold=threshold, + ) if region else None + + if view is not None: + props["risk_spot"] = view.spot_risk + props["risk_3d_avg"] = view.risk_3d_avg + props["risk_delta"] = view.risk_delta + props["micro_days"] = view.days_in_window + props["micro_ignition"] = view.ignition + props["alerted_3d"] = view.alerted_3d + props["day_scores"] = list(view.day_scores) + + enriched.append({**feature, "properties": props}) + + return { + **heatmap, + "features": enriched, + "micro_window_days": window, + "micro_alert_threshold": threshold, + } + + +def micro_rolling_report( + *, + as_of: date | None = None, + window_days: int | None = None, + limit: int = 15, +) -> dict[str, Any]: + """API/OpenClaw payload for micro rolling 3-day context.""" + today = as_of or utc_today() + window = window_days or micro_window_days() + threshold = rolling_alert_threshold() + views = compute_all_micro_views( + as_of=today, + window_days=window, + alert_threshold=threshold, + ) + ignitions = [row for row in views if row.ignition] + alerted_3d = [row for row in views if row.alerted_3d] + top = views[: max(1, limit)] + + stored_days = list_daily_ids(newest_first=True, limit=window) + return { + "mode": "micro_rolling", + "window_days": window, + "alert_threshold": threshold, + "ignition_delta": ignition_delta(), + "as_of": date_id(today), + "days_stored": len(stored_days), + "stored_dates": stored_days, + "regions_tracked": len(views), + "ignition_count": len(ignitions), + "alerted_3d_count": len(alerted_3d), + "ignitions": [row.to_dict() for row in ignitions[:limit]], + "top_regions": [row.to_dict() for row in top], + "note": ( + f"Micro view: {window}-day rolling average vs spot risk. " + "Ignition = spot jumped above the rolling baseline (events that flare fast). " + "Macro week-over-week validation remains on /api/analytics/rolling." + ), + } \ No newline at end of file diff --git a/backend/analytics/rolling_backtest.py b/backend/analytics/rolling_backtest.py new file mode 100644 index 0000000..0d7e576 --- /dev/null +++ b/backend/analytics/rolling_backtest.py @@ -0,0 +1,382 @@ +"""Rolling weekly operational validation for Strategic Risk Analytics. + +Freezes live GT scores each ISO week, accepts delayed outcome labels, and +scores prior-week predictions with accuracy + Wilson 95% CI. Unlike the +static historical benchmark, this measures forward operational usefulness. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from datetime import date, datetime, timezone +from typing import Any, Literal + +from analytics.backtest import DEFAULT_BACKTEST_ALERT_THRESHOLD, wilson_interval +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.integration import get_gt_engine +from analytics.weekly_store import ( + VALID_LABELS, + LabelName, + RegionSnapshot, + WeeklySnapshot, + list_week_ids, + load_week, + save_week, + utc_now_iso, +) + +MIN_LABELED_FOR_TREND = 5 + + +def _env_float(name: str, default: float) -> float: + raw = str(os.environ.get(name, "")).strip() + if not raw: + return default + try: + return float(raw) + except ValueError: + return default + + +def rolling_alert_threshold() -> float: + """Fixed operational alert cutoff — not retroactively tuned per week.""" + return _env_float("GT_ROLLING_ALERT_THRESHOLD", DEFAULT_BACKTEST_ALERT_THRESHOLD) + + +def iso_week_id(when: datetime | date | None = None) -> str: + """Return ISO week id, e.g. ``2026-W24``.""" + if when is None: + when = datetime.now(timezone.utc) + if isinstance(when, datetime): + when = when.date() + year, week, _ = when.isocalendar() + return f"{year}-W{week:02d}" + + +def _region_rows_from_engine( + engine: GT_EarlyWarning, + *, + alert_threshold: float, +) -> list[RegionSnapshot]: + heatmap = engine.get_risk_heatmap() + rows: list[RegionSnapshot] = [] + for feature in heatmap.get("features") or []: + if not isinstance(feature, dict): + continue + props = feature.get("properties") or {} + region = str(props.get("region") or "").strip().lower() + if not region: + continue + composite = float(props.get("risk") or 0.0) + financial = float(props.get("financial") or 0.0) + unrest = float(props.get("unrest") or 0.0) + conflict = float(props.get("conflict") or 0.0) + peak_score = max(composite, financial, unrest, conflict) + rows.append( + RegionSnapshot( + region=region, + composite_risk=composite, + financial=financial, + unrest=unrest, + conflict=conflict, + alerted=peak_score >= alert_threshold, + label="pending", + ) + ) + rows.sort(key=lambda row: row.composite_risk, reverse=True) + return rows + + +@dataclass(frozen=True) +class WeekScore: + week_id: str + frozen_at: str + alert_threshold: float + total_regions: int + labeled: int + pending: int + alerted: int + correct: int + accuracy: float + confidence_rate: float + wilson_lower_95: float + wilson_upper_95: float + true_positives: int + true_negatives: int + false_positives: int + false_negatives: int + sensitivity: float + specificity: float + scorable: bool + + def to_dict(self) -> dict[str, Any]: + return { + "week_id": self.week_id, + "frozen_at": self.frozen_at, + "alert_threshold": round(self.alert_threshold, 4), + "total_regions": self.total_regions, + "labeled": self.labeled, + "pending": self.pending, + "alerted": self.alerted, + "correct": self.correct, + "accuracy": round(self.accuracy, 4), + "confidence_rate": round(self.confidence_rate, 4), + "wilson_lower_95": round(self.wilson_lower_95, 4), + "wilson_upper_95": round(self.wilson_upper_95, 4), + "true_positives": self.true_positives, + "true_negatives": self.true_negatives, + "false_positives": self.false_positives, + "false_negatives": self.false_negatives, + "sensitivity": round(self.sensitivity, 4), + "specificity": round(self.specificity, 4), + "scorable": self.scorable, + } + + +def _predicted_positive(row: RegionSnapshot) -> bool: + return row.alerted + + +def _actual_positive(label: LabelName) -> bool: + return label == "true_escalation" + + +def _is_correct(row: RegionSnapshot) -> bool: + if row.label == "pending": + return False + predicted = _predicted_positive(row) + if row.label == "true_escalation": + return predicted + if row.label in ("false_alarm", "benign"): + return not predicted + return False + + +def score_week(snapshot: WeeklySnapshot) -> WeekScore: + """Score a frozen week against delayed labels (pending rows excluded).""" + labeled_rows = [row for row in snapshot.regions if row.label != "pending"] + pending = len(snapshot.regions) - len(labeled_rows) + + tp = sum( + 1 + for row in labeled_rows + if row.alerted and row.label == "true_escalation" + ) + tn = sum( + 1 + for row in labeled_rows + if not row.alerted and row.label in ("benign", "false_alarm") + ) + fp = sum( + 1 + for row in labeled_rows + if row.alerted and row.label in ("false_alarm", "benign") + ) + fn = sum( + 1 + for row in labeled_rows + if not row.alerted and row.label == "true_escalation" + ) + + correct = tp + tn + total = len(labeled_rows) + accuracy = correct / total if total else 0.0 + lower, upper = wilson_interval(correct, total) + + pos_total = sum(1 for row in labeled_rows if _actual_positive(row.label)) # type: ignore[arg-type] + neg_total = total - pos_total + pred_pos = sum(1 for row in labeled_rows if row.alerted) + pred_neg = total - pred_pos + + sensitivity = tp / pos_total if pos_total else 0.0 + specificity = tn / pred_neg if pred_neg else (1.0 if tn == total and total else 0.0) + + return WeekScore( + week_id=snapshot.week_id, + frozen_at=snapshot.frozen_at, + alert_threshold=snapshot.alert_threshold, + total_regions=len(snapshot.regions), + labeled=total, + pending=pending, + alerted=sum(1 for row in snapshot.regions if row.alerted), + correct=correct, + accuracy=accuracy, + confidence_rate=lower, + wilson_lower_95=lower, + wilson_upper_95=upper, + true_positives=tp, + true_negatives=tn, + false_positives=fp, + false_negatives=fn, + sensitivity=sensitivity, + specificity=specificity, + scorable=total >= MIN_LABELED_FOR_TREND, + ) + + +def freeze_weekly_snapshot( + *, + week_id: str | None = None, + alert_threshold: float | None = None, + force: bool = False, + frozen_by: str = "system", + engine: GT_EarlyWarning | None = None, +) -> dict[str, Any]: + """ + Capture current GT heatmap as an immutable weekly operational snapshot. + + Idempotent per week unless ``force=True``. + """ + resolved_engine = engine or get_gt_engine() + if resolved_engine is None: + return {"ok": False, "detail": "GT analytics engine unavailable"} + + resolved_week = week_id or iso_week_id() + threshold = float( + alert_threshold if alert_threshold is not None else rolling_alert_threshold() + ) + + existing = load_week(resolved_week) + if existing and existing.regions and not force: + score = score_week(existing) + return { + "ok": True, + "created": False, + "week_id": resolved_week, + "snapshot": existing.to_dict(), + "score": score.to_dict(), + } + + regions = _region_rows_from_engine(resolved_engine, alert_threshold=threshold) + snapshot = WeeklySnapshot( + week_id=resolved_week, + frozen_at=utc_now_iso(), + alert_threshold=threshold, + regions=regions, + frozen_by=frozen_by, + ) + save_week(snapshot) + score = score_week(snapshot) + return { + "ok": True, + "created": True, + "week_id": resolved_week, + "snapshot": snapshot.to_dict(), + "score": score.to_dict(), + "alert_count": sum(1 for row in regions if row.alerted), + "region_count": len(regions), + } + + +def label_regions( + week_id: str, + labels: list[dict[str, Any]], + *, + labeled_by: str = "operator", +) -> dict[str, Any]: + """Apply delayed outcome labels to a frozen week.""" + snapshot = load_week(week_id) + if snapshot is None: + return {"ok": False, "detail": f"Week {week_id} not found"} + + by_region = {row.region: row for row in snapshot.regions} + updated = 0 + skipped: list[str] = [] + now = utc_now_iso() + + for entry in labels: + if not isinstance(entry, dict): + continue + region = str(entry.get("region") or "").strip().lower() + label = str(entry.get("label") or "").strip().lower() + if not region or label not in VALID_LABELS or label == "pending": + if region: + skipped.append(region) + continue + row = by_region.get(region) + if row is None: + skipped.append(region) + continue + row.label = label # type: ignore[assignment] + row.labeled_at = now + notes = entry.get("notes") + if notes is not None: + row.notes = str(notes) + updated += 1 + + save_week(snapshot) + score = score_week(snapshot) + return { + "ok": True, + "week_id": week_id, + "updated": updated, + "skipped": skipped, + "labeled_by": labeled_by, + "score": score.to_dict(), + } + + +def label_region( + week_id: str, + region: str, + label: LabelName, + *, + notes: str = "", + labeled_by: str = "operator", +) -> dict[str, Any]: + return label_regions( + week_id, + [{"region": region, "label": label, "notes": notes}], + labeled_by=labeled_by, + ) + + +def rolling_trend(*, weeks: int = 8) -> list[WeekScore]: + """Return scored weeks newest-first (only weeks with stored snapshots).""" + ids = list_week_ids(newest_first=True)[: max(1, weeks)] + scores: list[WeekScore] = [] + for week_id in ids: + snapshot = load_week(week_id) + if snapshot is None: + continue + scores.append(score_week(snapshot)) + return scores + + +def rolling_report(*, weeks: int = 8, target_confidence: float = 0.80) -> dict[str, Any]: + """Aggregate operational validation trend for API / OpenClaw.""" + threshold = rolling_alert_threshold() + trend = rolling_trend(weeks=weeks) + scorable = [row for row in trend if row.scorable] + + latest = scorable[0] if scorable else (trend[0] if trend else None) + accuracy_series = [ + {"week_id": row.week_id, "accuracy": round(row.accuracy, 4), "labeled": row.labeled} + for row in reversed(scorable) + ] + + improving = False + if len(scorable) >= 2: + improving = scorable[0].accuracy >= scorable[1].accuracy + + return { + "mode": "rolling_operational", + "alert_threshold": threshold, + "target_confidence": target_confidence, + "weeks_requested": weeks, + "weeks_stored": len(trend), + "weeks_scorable": len(scorable), + "min_labeled_per_week": MIN_LABELED_FOR_TREND, + "latest": latest.to_dict() if latest else None, + "trend": [row.to_dict() for row in trend], + "accuracy_series": accuracy_series, + "improving_vs_prior": improving, + "meets_target": bool( + latest and latest.scorable and latest.confidence_rate >= target_confidence + ), + "note": ( + "Operational metric: scores frozen weekly predictions against delayed " + "labels. Unlike the static benchmark, this measures live forward utility." + ), + } \ No newline at end of file diff --git a/backend/analytics/settings.py b/backend/analytics/settings.py new file mode 100644 index 0000000..5794329 --- /dev/null +++ b/backend/analytics/settings.py @@ -0,0 +1,158 @@ +"""Configuration for Strategic Risk Analytics (feature-flagged).""" + +from __future__ import annotations + +import json +import os +from dataclasses import dataclass, field +from functools import lru_cache +from typing import Any + + +def _env_bool(name: str, default: bool = False) -> bool: + raw = str(os.environ.get(name, "")).strip().lower() + if not raw: + return default + return raw not in {"0", "false", "no", "off"} + + +def _env_float(name: str, default: float) -> float: + raw = str(os.environ.get(name, "")).strip() + if not raw: + return default + try: + return float(raw) + except ValueError: + return default + + +def _env_int(name: str, default: int) -> int: + raw = str(os.environ.get(name, "")).strip() + if not raw: + return default + try: + return int(raw) + except ValueError: + return default + + +def _parse_signal_weights(raw: str) -> dict[str, float]: + if not raw.strip(): + return {} + try: + parsed = json.loads(raw) + if isinstance(parsed, dict): + return {str(k): float(v) for k, v in parsed.items()} + except (json.JSONDecodeError, TypeError, ValueError): + pass + weights: dict[str, float] = {} + for part in raw.split(","): + piece = part.strip() + if not piece or "=" not in piece: + continue + key, value = piece.split("=", 1) + try: + weights[key.strip()] = float(value.strip()) + except ValueError: + continue + return weights + + +def resolve_gt_profile() -> str: + from services.runtime_profile import resolve_profile_name + + return resolve_profile_name() + + +def gt_analytics_ack_low_cpu() -> bool: + return _env_bool("GT_ANALYTICS_ACK_LOW_CPU", default=False) + + +def gt_engine_operational() -> bool: + """Full GT engine (scheduled ingest, heatmap, Louvain) — not watchdog-only.""" + if not get_gt_settings().enabled: + return False + if resolve_gt_profile() == "lean" and not gt_analytics_ack_low_cpu(): + return False + return True + + +def gt_scheduled_ingest_enabled() -> bool: + return gt_engine_operational() + + +def gt_louvain_enabled() -> bool: + return gt_engine_operational() + + +@dataclass(frozen=True) +class GTAnalyticsSettings: + enabled: bool = False + profile: str = "standard" + base_prior: float = 0.15 + evidence_cap: float = 3.0 + evidence_scale: float = 5.0 + min_prob: float = 0.01 + max_prob: float = 0.99 + high_risk_threshold: float = 0.6 + max_history_per_region: int = 200 + max_heatmap_features: int = 500 + louvain_min_weight: float = 0.5 + louvain_interval_minutes: int = 30 + signal_weight_overrides: dict[str, float] = field(default_factory=dict) + watched_channels: tuple[str, ...] = () + + +@lru_cache(maxsize=1) +def get_gt_settings() -> GTAnalyticsSettings: + channels_raw = str(os.environ.get("GT_ANALYTICS_WATCHED_CHANNELS", "")).strip() + channels = tuple( + part.strip().lstrip("@") + for part in channels_raw.split(",") + if part.strip() + ) + profile = resolve_gt_profile() + lean = profile == "lean" + return GTAnalyticsSettings( + enabled=_env_bool("GT_ANALYTICS_ENABLED", default=False), + profile=profile, + base_prior=_env_float("GT_ANALYTICS_BASE_PRIOR", 0.15), + evidence_cap=_env_float("GT_ANALYTICS_EVIDENCE_CAP", 3.0), + evidence_scale=_env_float("GT_ANALYTICS_EVIDENCE_SCALE", 5.0), + min_prob=_env_float("GT_ANALYTICS_MIN_PROB", 0.01), + max_prob=_env_float("GT_ANALYTICS_MAX_PROB", 0.99), + high_risk_threshold=_env_float("GT_ANALYTICS_HIGH_RISK_THRESHOLD", 0.6), + max_history_per_region=_env_int("GT_ANALYTICS_MAX_HISTORY", 200), + max_heatmap_features=_env_int( + "GT_ANALYTICS_MAX_HEATMAP_FEATURES", + 50 if lean else 500, + ), + louvain_min_weight=_env_float("GT_ANALYTICS_LOUVAIN_MIN_WEIGHT", 0.5), + louvain_interval_minutes=max(5, _env_int("GT_ANALYTICS_LOUVAIN_INTERVAL_MINUTES", 30)), + signal_weight_overrides=_parse_signal_weights( + str(os.environ.get("GT_ANALYTICS_SIGNAL_WEIGHTS", "")) + ), + watched_channels=channels, + ) + + +def gt_analytics_enabled() -> bool: + return get_gt_settings().enabled + + +def gt_analytics_status() -> dict[str, Any]: + settings = get_gt_settings() + from services.runtime_profile import get_runtime_profile + + runtime = get_runtime_profile() + operational = gt_engine_operational() + return { + "enabled": settings.enabled, + "operational": operational, + "profile": settings.profile, + "ack_low_cpu": gt_analytics_ack_low_cpu(), + "recommended": bool(runtime.get("gt_analytics", {}).get("recommended")), + "lean_node": bool(runtime.get("gt_analytics", {}).get("lean_node")), + "warning": runtime.get("gt_analytics", {}).get("warning"), + "experimental": True, + } \ No newline at end of file diff --git a/backend/analytics/weekly_store.py b/backend/analytics/weekly_store.py new file mode 100644 index 0000000..1912827 --- /dev/null +++ b/backend/analytics/weekly_store.py @@ -0,0 +1,154 @@ +"""Persistent JSON store for rolling GT operational backtest weeks.""" + +from __future__ import annotations + +import json +import logging +import os +import threading +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Literal + +logger = logging.getLogger(__name__) + +LabelName = Literal["pending", "true_escalation", "false_alarm", "benign"] +VALID_LABELS: frozenset[str] = frozenset( + {"pending", "true_escalation", "false_alarm", "benign"} +) + +_STORE_DIR = Path(__file__).parent.parent / "data" / "gt_rolling" +_store_lock = threading.Lock() + + +def rolling_store_dir() -> Path: + """Return the rolling-backtest data directory (override via env in tests).""" + override = str(os.environ.get("GT_ROLLING_STORE_DIR", "")).strip() + if override: + return Path(override) + return _STORE_DIR + + +@dataclass +class RegionSnapshot: + region: str + composite_risk: float + financial: float + unrest: float + conflict: float + alerted: bool + label: LabelName = "pending" + labeled_at: str | None = None + notes: str = "" + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> RegionSnapshot: + label = str(raw.get("label") or "pending") + if label not in VALID_LABELS: + label = "pending" + return cls( + region=str(raw.get("region") or "").strip().lower(), + composite_risk=float(raw.get("composite_risk") or 0.0), + financial=float(raw.get("financial") or 0.0), + unrest=float(raw.get("unrest") or 0.0), + conflict=float(raw.get("conflict") or 0.0), + alerted=bool(raw.get("alerted")), + label=label, # type: ignore[arg-type] + labeled_at=raw.get("labeled_at"), + notes=str(raw.get("notes") or ""), + ) + + +@dataclass +class WeeklySnapshot: + week_id: str + frozen_at: str + alert_threshold: float + regions: list[RegionSnapshot] = field(default_factory=list) + frozen_by: str = "system" + + def to_dict(self) -> dict[str, Any]: + return { + "week_id": self.week_id, + "frozen_at": self.frozen_at, + "alert_threshold": self.alert_threshold, + "frozen_by": self.frozen_by, + "regions": [row.to_dict() for row in self.regions], + } + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> WeeklySnapshot: + regions = [ + RegionSnapshot.from_dict(row) + for row in (raw.get("regions") or []) + if isinstance(row, dict) + ] + return cls( + week_id=str(raw.get("week_id") or ""), + frozen_at=str(raw.get("frozen_at") or ""), + alert_threshold=float(raw.get("alert_threshold") or 0.0), + regions=regions, + frozen_by=str(raw.get("frozen_by") or "system"), + ) + + +def _week_path(week_id: str) -> Path: + safe = week_id.replace("/", "-").replace("..", "") + return rolling_store_dir() / f"{safe}.json" + + +def _ensure_dir() -> None: + rolling_store_dir().mkdir(parents=True, exist_ok=True) + + +def list_week_ids(*, newest_first: bool = True) -> list[str]: + """Return stored ISO week ids.""" + _ensure_dir() + ids = [ + path.stem + for path in rolling_store_dir().glob("*.json") + if path.stem and path.stem != "index" + ] + ids.sort(reverse=newest_first) + return ids + + +def load_week(week_id: str) -> WeeklySnapshot | None: + path = _week_path(week_id) + if not path.is_file(): + return None + try: + raw = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(raw, dict): + return None + return WeeklySnapshot.from_dict(raw) + except (OSError, json.JSONDecodeError, TypeError, ValueError): + logger.exception("Failed to load GT rolling week %s", week_id) + return None + + +def save_week(snapshot: WeeklySnapshot) -> None: + _ensure_dir() + path = _week_path(snapshot.week_id) + tmp = path.with_suffix(".json.tmp") + payload = json.dumps(snapshot.to_dict(), indent=2, sort_keys=True) + with _store_lock: + tmp.write_text(payload, encoding="utf-8") + tmp.replace(path) + + +def delete_week(week_id: str) -> bool: + path = _week_path(week_id) + if not path.is_file(): + return False + with _store_lock: + path.unlink() + return True + + +def utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index b1938a5..fd186f0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -372,6 +372,7 @@ osint_router = _load_optional_router("routers.osint") scm_router = _load_optional_router("routers.scm") entity_graph_router = _load_optional_router("routers.entity_graph") intel_feeds_router = _load_optional_router("routers.intel_feeds") +analytics_router = _load_optional_router("routers.analytics") agent_shell_router = _load_optional_router("routers.agent_shell") @@ -3801,6 +3802,7 @@ app.include_router(osint_router) app.include_router(scm_router) app.include_router(entity_graph_router) app.include_router(intel_feeds_router) +app.include_router(analytics_router) app.include_router(agent_shell_router) from services.data_fetcher import update_all_data diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 7081915..a91f012 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -29,6 +29,8 @@ dependencies = [ "reverse-geocoder==1.5.1", "sgp4==2.25", "meshtastic>=2.5.0", + "networkx>=3.4.0", + "numpy>=2.2.0", "orjson>=3.10.0", "paho-mqtt>=1.6.0,<2.0.0", "PyNaCl>=1.5.0", diff --git a/backend/routers/ai_intel.py b/backend/routers/ai_intel.py index d0d8227..0c3c851 100644 --- a/backend/routers/ai_intel.py +++ b/backend/routers/ai_intel.py @@ -2051,7 +2051,7 @@ async def agent_tool_manifest(request: Request): "description": "Set up a watchdog alert. When triggered, alerts push instantly via SSE stream. Debounced: same watch won't re-fire within 60 seconds.", "parameters": { "type": {"type": "string", "required": True, "description": "Watch type", - "enum": ["track_aircraft", "track_callsign", "track_registration", "track_ship", "track_entity", "geofence", "keyword", "prediction_market"]}, + "enum": ["track_aircraft", "track_callsign", "track_registration", "track_ship", "track_entity", "geofence", "keyword", "telegram_rhetoric", "prediction_market"]}, "params": {"type": "object", "required": True, "description": "Type-specific parameters (see subtypes)"}, }, "subtypes": { @@ -2061,7 +2061,8 @@ async def agent_tool_manifest(request: Request): "track_ship": {"params": {"mmsi": "string (optional)", "imo": "string (optional)", "name": "string (optional)", "owner": "string (optional)", "callsign": "string (optional)"}, "description": "Alert when ship appears by MMSI, IMO, name, owner, or callsign"}, "track_entity": {"params": {"query": "string", "entity_type": "string (optional)", "layers": "list (optional)"}, "description": "Generic exact-first entity tracker when aircraft/ship fields are not known yet"}, "geofence": {"params": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list (default ['flights','ships'])"}, "description": "Alert when any entity enters a geographic zone"}, - "keyword": {"params": {"keyword": "string"}, "description": "Alert when keyword appears in news/GDELT headlines"}, + "keyword": {"params": {"keyword": "string", "include_telegram": "boolean (default true)"}, "description": "Alert when keyword appears in news, GDELT, or Telegram OSINT (searches translated + original text)"}, + "telegram_rhetoric": {"params": {"min_risk_score": "int 1-10 (default 7)", "keywords": "list or comma-separated string (optional)", "channels": "list or comma-separated string (optional)"}, "description": "Alert on new high-risk Telegram OSINT posts — rhetoric/escalation monitor"}, "prediction_market": {"params": {"query": "string", "threshold": "float 0-1 (optional)"}, "description": "Alert on prediction market movements matching query"}, }, "example": {"cmd": "add_watch", "args": {"type": "track_registration", "params": {"registration": "N3880"}}}, @@ -2564,7 +2565,8 @@ async def api_capabilities(request: Request): "track_ship": {"params": {"mmsi": "str (optional)", "imo": "str (optional)", "name": "str (optional)", "owner": "str (optional)", "callsign": "str (optional)"}, "description": "Alert when ship appears by MMSI, IMO, name, owner, or callsign"}, "track_entity": {"params": {"query": "str", "entity_type": "str (optional)", "layers": "list[str] (optional)"}, "description": "Generic exact-first entity watch"}, "geofence": {"params": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list (default ['flights','ships'])"}, "description": "Alert when any entity enters a geographic zone"}, - "keyword": {"params": {"keyword": "str"}, "description": "Alert when keyword appears in news/GDELT"}, + "keyword": {"params": {"keyword": "str", "include_telegram": "bool (default true)"}, "description": "Alert when keyword appears in news, GDELT, or Telegram OSINT"}, + "telegram_rhetoric": {"params": {"min_risk_score": "int 1-10 (default 7)", "keywords": "list[str] or comma string (optional)", "channels": "list[str] or comma string (optional)"}, "description": "Alert on new high-risk Telegram OSINT posts"}, "prediction_market": {"params": {"query": "str", "threshold": "float 0-1 (optional)"}, "description": "Alert on prediction market movements"}, }, }, diff --git a/backend/routers/analytics.py b/backend/routers/analytics.py new file mode 100644 index 0000000..0417766 --- /dev/null +++ b/backend/routers/analytics.py @@ -0,0 +1,339 @@ +"""Strategic Risk Analytics API — game-theoretic early warning overlays.""" + +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import APIRouter, Depends, HTTPException, Request +from pydantic import BaseModel, Field + +from auth import require_local_operator +from limiter import limiter +from analytics.backtest import ( + DEFAULT_BACKTEST_ALERT_THRESHOLD, + run_historical_backtest, + tune_alert_threshold, +) +from analytics.feed_adapter import normalize_feed_item +from analytics.integration import get_gt_engine, refresh_from_latest_data +from analytics.gt_alerts import top_gt_alerts +from analytics.micro_rolling import micro_rolling_report +from analytics.rolling_backtest import ( + freeze_weekly_snapshot, + label_region, + label_regions, + rolling_alert_threshold, + rolling_report, + score_week, +) +from analytics.weekly_store import load_week +from analytics.settings import gt_analytics_enabled +from services.fetchers._store import _data_lock, get_latest_data_subset_refs, latest_data + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +class RiskHeatmapRequest(BaseModel): + """Optional batch ingest + refresh controls for POST /api/analytics/risk_heatmap.""" + + refresh: bool = True + items: list[dict[str, Any]] = Field(default_factory=list) + + +class RollingFreezeRequest(BaseModel): + week_id: str | None = None + force: bool = False + + +class RollingLabelEntry(BaseModel): + region: str + label: str + notes: str = "" + + +class RollingLabelRequest(BaseModel): + week_id: str + labels: list[RollingLabelEntry] = Field(default_factory=list) + + +def _empty_heatmap() -> dict[str, Any]: + return { + "enabled": False, + "type": "FeatureCollection", + "features": [], + "clusters": [], + "processed": 0, + "timestamp": None, + } + + +def _gt_risk_payload() -> dict[str, Any]: + snap = get_latest_data_subset_refs("gt_risk") + payload = snap.get("gt_risk") + if not isinstance(payload, dict): + return _empty_heatmap() + heatmap = payload.get("heatmap") or {"type": "FeatureCollection", "features": []} + return { + "enabled": bool(payload.get("enabled")), + "type": heatmap.get("type", "FeatureCollection"), + "features": list(heatmap.get("features") or []), + "clusters": list(payload.get("clusters") or []), + "processed": int(payload.get("processed") or 0), + "timestamp": payload.get("timestamp"), + } + + +@router.get("/api/analytics/risk_heatmap") +@limiter.limit("60/minute") +async def risk_heatmap_get(request: Request) -> dict[str, Any]: + """Return cached GeoJSON risk overlay (posterior scores per region).""" + if not gt_analytics_enabled(): + return _empty_heatmap() + return _gt_risk_payload() + + +@router.post("/api/analytics/risk_heatmap") +@limiter.limit("12/minute") +async def risk_heatmap_post( + request: Request, + body: RiskHeatmapRequest, + _: None = Depends(require_local_operator), +) -> dict[str, Any]: + """ + Ingest optional feed items and/or refresh beliefs from latest intel layers. + + Requires local operator auth — intended for OpenClaw agents and admin tooling. + """ + if not gt_analytics_enabled(): + raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled") + + engine = get_gt_engine() + if engine is None: + raise HTTPException(status_code=503, detail="Strategic Risk Analytics engine unavailable") + + ingested = 0 + for raw in body.items: + if not isinstance(raw, dict): + continue + source_type = str(raw.get("source_type") or "manual") + item = normalize_feed_item(raw, source_type=source_type) + result = engine.process_feed_item(item) + if result and not result.get("skipped"): + ingested += 1 + + summary: dict[str, Any] = {"ingested": ingested} + if body.refresh: + with _data_lock: + snapshot = dict(latest_data) + summary.update(refresh_from_latest_data(snapshot, persist=True)) + + payload = _gt_risk_payload() + payload["ingested"] = ingested + payload["refresh"] = bool(body.refresh) + return payload + + +@router.get("/api/analytics/dossier/{region}") +@limiter.limit("30/minute") +async def analytics_dossier(request: Request, region: str) -> dict[str, Any]: + """Game-theoretic rationale, recent costly signals, and scenario sketches.""" + region_key = str(region or "").strip().lower() + if not region_key or len(region_key) > 120: + raise HTTPException(status_code=400, detail="Invalid region identifier") + + if not gt_analytics_enabled(): + return { + "enabled": False, + "region": region_key, + "current_risk": 0.0, + "interpretation": "Strategic Risk Analytics is disabled.", + "recent_signals": [], + "scenarios": [], + } + + engine = get_gt_engine() + if engine is None: + raise HTTPException(status_code=503, detail="Strategic Risk Analytics engine unavailable") + + dossier = engine.get_dossier(region_key) + dossier["enabled"] = True + return dossier + + +@router.get("/api/analytics/backtest") +@limiter.limit("6/minute") +async def analytics_backtest( + request: Request, + expanded: bool = True, + tune: bool = False, + target_confidence: float = 0.95, +) -> dict[str, Any]: + """ + Run labeled historical backtest and return accuracy + Wilson 95% CI. + + ``confidence_rate`` is the Wilson lower bound (conservative pass metric). + """ + if not gt_analytics_enabled(): + return { + "enabled": False, + "message": "Strategic Risk Analytics is disabled.", + } + + if tune: + threshold, report = tune_alert_threshold(target_confidence=target_confidence) + else: + threshold = DEFAULT_BACKTEST_ALERT_THRESHOLD + report = run_historical_backtest( + use_expanded_suite=expanded, + alert_threshold=threshold, + target_confidence=target_confidence, + ) + + payload = report.to_dict() + payload["enabled"] = True + payload["expanded_suite"] = expanded + payload["tuned"] = tune + payload["recommended_alert_threshold"] = threshold + return payload + + +@router.get("/api/analytics/rolling") +@limiter.limit("12/minute") +async def analytics_rolling( + request: Request, + weeks: int = 8, + target_confidence: float = 0.80, +) -> dict[str, Any]: + """Rolling weekly operational validation — accuracy trend with delayed labels.""" + if not gt_analytics_enabled(): + return { + "enabled": False, + "message": "Strategic Risk Analytics is disabled.", + } + + report = rolling_report(weeks=max(1, min(weeks, 52)), target_confidence=target_confidence) + report["enabled"] = True + return report + + +@router.get("/api/analytics/alerts") +@limiter.limit("30/minute") +async def analytics_top_alerts( + request: Request, + limit: int = 8, +) -> dict[str, Any]: + """Top GT risk regions ranked by score — fly-to targets for the map.""" + if not gt_analytics_enabled(): + return { + "enabled": False, + "message": "Strategic Risk Analytics is disabled.", + } + + report = top_gt_alerts(limit=max(1, min(limit, 25))) + report["enabled"] = True + return report + + +@router.get("/api/analytics/rolling/micro") +@limiter.limit("30/minute") +async def analytics_rolling_micro( + request: Request, + window_days: int = 3, + limit: int = 15, +) -> dict[str, Any]: + """Rolling 3-day micro average — spot vs baseline, ignition detection.""" + if not gt_analytics_enabled(): + return { + "enabled": False, + "message": "Strategic Risk Analytics is disabled.", + } + + report = micro_rolling_report( + window_days=max(2, min(window_days, 7)), + limit=max(1, min(limit, 50)), + ) + report["enabled"] = True + return report + + +@router.get("/api/analytics/rolling/{week_id}") +@limiter.limit("12/minute") +async def analytics_rolling_week(request: Request, week_id: str) -> dict[str, Any]: + """Return a single frozen week snapshot and its score.""" + if not gt_analytics_enabled(): + return {"enabled": False, "message": "Strategic Risk Analytics is disabled."} + + snapshot = load_week(str(week_id).strip()) + if snapshot is None: + raise HTTPException(status_code=404, detail=f"Week {week_id} not found") + + score = score_week(snapshot) + return { + "enabled": True, + "week_id": snapshot.week_id, + "snapshot": snapshot.to_dict(), + "score": score.to_dict(), + "alert_threshold": rolling_alert_threshold(), + } + + +@router.post("/api/analytics/rolling/freeze") +@limiter.limit("6/minute") +async def analytics_rolling_freeze( + request: Request, + body: RollingFreezeRequest, + _: None = Depends(require_local_operator), +) -> dict[str, Any]: + """Freeze current GT scores for the ISO week (idempotent unless force=true).""" + if not gt_analytics_enabled(): + raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled") + + result = freeze_weekly_snapshot( + week_id=body.week_id, + force=body.force, + frozen_by="api", + ) + if not result.get("ok"): + raise HTTPException(status_code=503, detail=result.get("detail", "Freeze failed")) + result["enabled"] = True + return result + + +@router.post("/api/analytics/rolling/label") +@limiter.limit("12/minute") +async def analytics_rolling_label( + request: Request, + body: RollingLabelRequest, + _: None = Depends(require_local_operator), +) -> dict[str, Any]: + """Apply delayed outcome labels to a frozen week.""" + if not gt_analytics_enabled(): + raise HTTPException(status_code=503, detail="Strategic Risk Analytics is disabled") + + week_id = str(body.week_id or "").strip() + if not week_id: + raise HTTPException(status_code=400, detail="week_id required") + + if len(body.labels) == 1: + entry = body.labels[0] + result = label_region( + week_id, + entry.region, + entry.label, # type: ignore[arg-type] + notes=entry.notes, + labeled_by="api", + ) + else: + result = label_regions( + week_id, + [row.model_dump() for row in body.labels], + labeled_by="api", + ) + + if not result.get("ok"): + raise HTTPException(status_code=404, detail=result.get("detail", "Label failed")) + result["enabled"] = True + return result \ No newline at end of file diff --git a/backend/routers/data.py b/backend/routers/data.py index 6593f5d..e35df27 100644 --- a/backend/routers/data.py +++ b/backend/routers/data.py @@ -773,7 +773,7 @@ async def live_data_slow( "scanners", "weather_alerts", "ukraine_alerts", "air_quality", "volcanoes", "fishing_activity", "psk_reporter", "correlations", "uap_sightings", "wastewater", "crowdthreat", "threat_level", "trending_markets", "road_corridor_trends", - "malware_threats", "cyber_threats", "scm_suppliers", "telegram_osint", + "malware_threats", "cyber_threats", "scm_suppliers", "telegram_osint", "gt_risk", ) freshness = get_source_timestamps_snapshot() payload = { @@ -839,6 +839,12 @@ async def live_data_slow( ) if active_layers.get("telegram_osint", True) else {"posts": [], "total": 0, "geolocated": 0}, + "gt_risk": ( + d.get("gt_risk") + or {"enabled": False, "heatmap": {"type": "FeatureCollection", "features": []}, "clusters": []} + ) + if active_layers.get("gt_risk", False) + else {"enabled": False, "heatmap": {"type": "FeatureCollection", "features": []}, "clusters": []}, "freshness": freshness, } # Issue #288: bbox filter heavy/dense layers only when all four bounds diff --git a/backend/routers/health.py b/backend/routers/health.py index c45e3e5..2d9036d 100644 --- a/backend/routers/health.py +++ b/backend/routers/health.py @@ -85,6 +85,18 @@ async def health_check(request: Request): ): top_status = "degraded" + runtime: dict = {} + try: + from services.runtime_profile import get_runtime_profile + from analytics.settings import gt_analytics_status + + runtime = { + **get_runtime_profile(), + "gt_analytics": gt_analytics_status(), + } + except Exception: + runtime = {} + return { "status": top_status, "version": _get_app_version(), @@ -108,6 +120,7 @@ async def health_check(request: Request): "slo": slo_statuses, "slo_summary": slo_summary, "ais_proxy": ais_status, + "runtime": runtime or None, } diff --git a/backend/routers/intel_feeds.py b/backend/routers/intel_feeds.py index e1c0899..942c8ed 100644 --- a/backend/routers/intel_feeds.py +++ b/backend/routers/intel_feeds.py @@ -14,6 +14,7 @@ from services.fetchers._store import get_latest_data_subset_refs from services.fetchers.telegram_osint import telegram_media_host_allowed from services.intel_feeds.country_risk import build_country_risk_payload from services.network_utils import outbound_user_agent +from services.telegram_translate import apply_posts_translations, normalize_translate_target logger = logging.getLogger(__name__) @@ -45,12 +46,19 @@ async def country_risk(request: Request) -> dict: @router.get("/api/telegram-feed") @limiter.limit("30/minute") -async def telegram_feed(request: Request) -> dict: +async def telegram_feed(request: Request, lang: str | None = Query(default=None)) -> dict: snap = get_latest_data_subset_refs("telegram_osint") payload = snap.get("telegram_osint") - if isinstance(payload, dict) and payload.get("posts") is not None: - return payload - return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None} + if not isinstance(payload, dict) or payload.get("posts") is None: + return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None} + + if lang: + target = normalize_translate_target(lang) + localized = dict(payload) + localized["posts"] = apply_posts_translations(list(payload.get("posts") or []), target) + localized["translate_locale"] = target + return localized + return payload def _infer_telegram_media_type(target_url: str, content_type: str) -> str: diff --git a/backend/services/data_fetcher.py b/backend/services/data_fetcher.py index 9e205d5..b3a0cbf 100644 --- a/backend/services/data_fetcher.py +++ b/backend/services/data_fetcher.py @@ -499,6 +499,12 @@ def update_slow_data(): latest_data["correlations"] = correlations except Exception as e: logger.error("Correlation engine failed: %s", e) + try: + from analytics.integration import maybe_refresh_gt_analytics + + maybe_refresh_gt_analytics() + except Exception as e: + logger.error("GT analytics refresh failed: %s", e) from services.fetchers._store import bump_data_version bump_data_version() _save_intel_startup_cache() @@ -807,8 +813,18 @@ def start_scheduler(): # Telegram OSINT — hourly t.me/s channel scrape (kept off the 5-minute slow tier). _telegram_interval_m = max(15, int(os.environ.get("TELEGRAM_OSINT_INTERVAL_MINUTES", "60"))) + + def _fetch_telegram_osint_with_gt(): + fetch_telegram_osint() + try: + from analytics.integration import maybe_refresh_gt_analytics + + maybe_refresh_gt_analytics() + except Exception as exc: + logger.error("GT analytics refresh after telegram failed: %s", exc) + _scheduler.add_job( - lambda: _run_task_with_health(fetch_telegram_osint, "fetch_telegram_osint"), + lambda: _run_task_with_health(_fetch_telegram_osint_with_gt, "fetch_telegram_osint"), "interval", minutes=_telegram_interval_m, next_run_time=datetime.utcnow() + timedelta(seconds=45), @@ -934,14 +950,67 @@ def start_scheduler(): ) # GDELT — every 30 minutes (downloads 32 ZIP files per call, avoid rate limits) + def _fetch_gdelt_with_gt(): + fetch_gdelt() + try: + from analytics.integration import maybe_refresh_gt_analytics + + maybe_refresh_gt_analytics() + except Exception as exc: + logger.error("GT analytics refresh after gdelt failed: %s", exc) + _scheduler.add_job( - lambda: _run_task_with_health_on_executor(_SLOW_EXECUTOR, fetch_gdelt, "fetch_gdelt"), + lambda: _run_task_with_health_on_executor(_SLOW_EXECUTOR, _fetch_gdelt_with_gt, "fetch_gdelt"), "interval", minutes=30, id="gdelt", max_instances=1, misfire_grace_time=120, ) + + # GT analytics — Louvain herding/coordination clusters (feature-flagged). + def _recompute_gt_clusters(): + try: + from analytics.integration import recompute_gt_herding_clusters + + recompute_gt_herding_clusters() + except Exception as exc: + logger.error("GT Louvain recompute failed: %s", exc) + + def _freeze_gt_weekly_snapshot(): + try: + from analytics.integration import maybe_freeze_gt_weekly_snapshot + + maybe_freeze_gt_weekly_snapshot() + except Exception as exc: + logger.error("GT rolling weekly freeze failed: %s", exc) + + try: + from analytics.settings import get_gt_settings, gt_engine_operational + + _gt_settings = get_gt_settings() + if gt_engine_operational(): + _scheduler.add_job( + _recompute_gt_clusters, + "interval", + minutes=_gt_settings.louvain_interval_minutes, + id="gt_analytics_louvain", + max_instances=1, + misfire_grace_time=300, + next_run_time=datetime.utcnow() + timedelta(minutes=3), + ) + _scheduler.add_job( + _freeze_gt_weekly_snapshot, + "cron", + day_of_week="mon", + hour=0, + minute=5, + id="gt_rolling_weekly_freeze", + max_instances=1, + misfire_grace_time=3600, + ) + except Exception as exc: + logger.warning("GT Louvain scheduler not registered: %s", exc) _scheduler.add_job( lambda: _run_task_with_health_on_executor( _SLOW_EXECUTOR, update_liveuamap, "update_liveuamap" diff --git a/backend/services/fetchers/_store.py b/backend/services/fetchers/_store.py index 1eeba77..ca7ab19 100644 --- a/backend/services/fetchers/_store.py +++ b/backend/services/fetchers/_store.py @@ -74,6 +74,7 @@ class DashboardData(TypedDict, total=False): cyber_threats: Dict[str, Any] scm_suppliers: Dict[str, Any] telegram_osint: Dict[str, Any] + gt_risk: Dict[str, Any] # In-memory store @@ -129,6 +130,13 @@ latest_data: DashboardData = { "cyber_threats": {"threats": [], "stats": {}}, "scm_suppliers": {"suppliers": [], "total": 0, "critical_count": 0}, "telegram_osint": {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}, + "gt_risk": { + "enabled": False, + "heatmap": {"type": "FeatureCollection", "features": []}, + "clusters": [], + "processed": 0, + "timestamp": None, + }, } # Per-source freshness timestamps @@ -361,6 +369,7 @@ active_layers: dict[str, bool] = { "scm_suppliers": False, "cyber_threats": False, "telegram_osint": True, + "gt_risk": False, } diff --git a/backend/services/fetchers/telegram_osint.py b/backend/services/fetchers/telegram_osint.py index 5ac4668..d1bc5c9 100644 --- a/backend/services/fetchers/telegram_osint.py +++ b/backend/services/fetchers/telegram_osint.py @@ -2,6 +2,7 @@ from __future__ import annotations import hashlib +import html import logging import os import re @@ -11,6 +12,7 @@ from typing import Any from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data from services.fetchers.news import resolve_coords_match from services.network_utils import fetch_with_curl, outbound_user_agent +from services.telegram_translate import apply_post_translation, apply_posts_translations logger = logging.getLogger(__name__) @@ -174,13 +176,7 @@ def _extract_media(block: str, link: str) -> dict[str, Any]: def _strip_html(text: str) -> str: cleaned = re.sub(r"", "\n", text, flags=re.IGNORECASE) cleaned = re.sub(r"<[^>]+>", "", cleaned) - return ( - cleaned.replace(""", '"') - .replace("&", "&") - .replace("<", "<") - .replace(">", ">") - .strip() - ) + return html.unescape(cleaned).strip() def _score_risk(text: str) -> int: @@ -293,20 +289,19 @@ def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]] post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16] media = _extract_media(block, link) - posts.append( - { - "id": post_id, - "title": title, - "description": text[:1200], - "link": link, - "published": published, - "source": f"t.me/{channel}", - "channel": channel, - "risk_score": risk_score, - "coords": [coords[0], coords[1]] if coords else None, - **media, - } - ) + post = { + "id": post_id, + "title": title, + "description": text[:1200], + "link": link, + "published": published, + "source": f"t.me/{channel}", + "channel": channel, + "risk_score": risk_score, + "coords": [coords[0], coords[1]] if coords else None, + **media, + } + posts.append(apply_post_translation(post)) return posts @@ -358,6 +353,7 @@ def fetch_telegram_osint() -> dict[str, Any]: merged_posts, added = _merge_telegram_posts(existing_posts, incoming) merged_posts = [_refresh_post_coords(post) for post in merged_posts] + merged_posts = apply_posts_translations(merged_posts) geolocated = sum(1 for p in merged_posts if p.get("coords")) payload = { diff --git a/backend/services/openclaw_channel.py b/backend/services/openclaw_channel.py index 4de9ed3..2be6387 100644 --- a/backend/services/openclaw_channel.py +++ b/backend/services/openclaw_channel.py @@ -90,6 +90,15 @@ READ_COMMANDS = frozenset({ # Agent routing helpers "route_query", "run_playbook", + "gt_risk_heatmap", + "gt_dossier", + "gt_analyze", + "gt_backtest", + "gt_rolling_freeze", + "gt_rolling_label", + "gt_rolling_backtest", + "gt_micro_rolling", + "gt_top_alerts", # Private Infonet reads (operator-delegated) "infonet_status", "list_gates", @@ -857,6 +866,284 @@ def _dispatch_command(cmd: str, args: dict[str, Any]) -> dict[str, Any]: return {"ok": True, "data": _compact_query_result(result), "format": "compressed_v1"} return {"ok": True, "data": result} + if cmd == "gt_risk_heatmap": + from analytics.settings import gt_analytics_enabled + from analytics.integration import get_gt_engine + from services.fetchers._store import get_latest_data_subset_refs + + if not gt_analytics_enabled(): + return {"ok": True, "data": {"enabled": False, "features": [], "clusters": []}} + snap = get_latest_data_subset_refs("gt_risk") + payload = dict(snap.get("gt_risk") or {}) + engine = get_gt_engine() + if engine is not None and not payload.get("heatmap"): + payload["heatmap"] = engine.get_risk_heatmap() + return {"ok": True, "data": payload} + + if cmd == "gt_dossier": + from analytics.settings import gt_analytics_enabled + from analytics.integration import get_gt_engine + + region = str(args.get("region", "") or args.get("area", "") or "").strip().lower() + if not region: + return {"ok": False, "detail": "region required (e.g. ukraine, uk, europe)"} + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "region": region, + "interpretation": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + engine = get_gt_engine() + if engine is None: + return {"ok": False, "detail": "GT analytics engine unavailable"} + return {"ok": True, "data": engine.get_dossier(region)} + + if cmd == "gt_analyze": + from analytics.settings import gt_analytics_enabled + from analytics.integration import get_gt_engine, refresh_from_latest_data + from services.fetchers._store import _data_lock, latest_data + + if not gt_analytics_enabled(): + return {"ok": False, "detail": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED)"} + engine = get_gt_engine() + if engine is None: + return {"ok": False, "detail": "GT analytics engine unavailable"} + + feeds = args.get("feeds") if isinstance(args.get("feeds"), (list, tuple)) else None + if feeds: + from analytics.feed_adapter import normalize_feed_item + + ingested = 0 + for raw in feeds: + if not isinstance(raw, dict): + continue + item = normalize_feed_item(raw, source_type=str(raw.get("source_type") or "openclaw")) + result = engine.process_feed_item(item) + if result and not result.get("skipped"): + ingested += 1 + summary = {"ingested": ingested, "enabled": True} + else: + with _data_lock: + snapshot = dict(latest_data) + summary = refresh_from_latest_data(snapshot, persist=True) + + region = str(args.get("region", "") or "").strip().lower() + data = { + "refresh": summary, + "heatmap_features": len((summary.get("sample") or [])), + } + if region: + data["dossier"] = engine.get_dossier(region) + else: + data["heatmap"] = engine.get_risk_heatmap() + data["clusters"] = engine.compute_herding_clusters()[:5] + return {"ok": True, "data": data} + + if cmd == "gt_backtest": + from analytics.backtest import ( + DEFAULT_BACKTEST_ALERT_THRESHOLD, + run_historical_backtest, + tune_alert_threshold, + ) + from analytics.historical_events import default_historical_cases, expanded_historical_cases + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + expanded = bool(args.get("expanded", True)) + tune = bool(args.get("tune", False)) + include_cases = bool(args.get("include_cases", False)) + try: + target_confidence = float(args.get("target_confidence", 0.95)) + except (TypeError, ValueError): + target_confidence = 0.95 + + if tune: + suite = expanded_historical_cases() if expanded else default_historical_cases() + threshold, report = tune_alert_threshold( + suite, + target_confidence=target_confidence, + ) + else: + raw_threshold = args.get("alert_threshold") + threshold = ( + float(raw_threshold) + if raw_threshold is not None + else DEFAULT_BACKTEST_ALERT_THRESHOLD + ) + report = run_historical_backtest( + use_expanded_suite=expanded, + alert_threshold=threshold, + target_confidence=target_confidence, + ) + + data = report.to_dict() + data["enabled"] = True + data["expanded_suite"] = expanded + data["tuned"] = tune + data["recommended_alert_threshold"] = threshold + if _wants_compact(args) or not include_cases: + data.pop("cases", None) + return {"ok": True, "data": data} + + if cmd == "gt_rolling_freeze": + from analytics.rolling_backtest import freeze_weekly_snapshot + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + week_id = str(args.get("week_id", "") or "").strip() or None + force = bool(args.get("force", False)) + result = freeze_weekly_snapshot( + week_id=week_id, + force=force, + frozen_by="openclaw", + ) + if not result.get("ok"): + return {"ok": False, "detail": result.get("detail", "Freeze failed")} + data = dict(result) + data["enabled"] = True + if _wants_compact(args): + data.pop("snapshot", None) + return {"ok": True, "data": data} + + if cmd == "gt_rolling_label": + from analytics.rolling_backtest import label_region, label_regions + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + week_id = str(args.get("week_id", "") or "").strip() + if not week_id: + return {"ok": False, "detail": "week_id required"} + + labels = args.get("labels") + if isinstance(labels, list) and labels: + result = label_regions(week_id, labels, labeled_by="openclaw") + else: + region = str(args.get("region", "") or "").strip().lower() + label = str(args.get("label", "") or "").strip().lower() + if not region or not label: + return {"ok": False, "detail": "region and label required (or labels batch)"} + result = label_region( + week_id, + region, + label, # type: ignore[arg-type] + notes=str(args.get("notes", "") or ""), + labeled_by="openclaw", + ) + + if not result.get("ok"): + return {"ok": False, "detail": result.get("detail", "Label failed")} + data = dict(result) + data["enabled"] = True + return {"ok": True, "data": data} + + if cmd == "gt_rolling_backtest": + from analytics.rolling_backtest import rolling_report + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + try: + weeks = int(args.get("weeks", 8)) + except (TypeError, ValueError): + weeks = 8 + try: + target_confidence = float(args.get("target_confidence", 0.80)) + except (TypeError, ValueError): + target_confidence = 0.80 + + data = rolling_report(weeks=weeks, target_confidence=target_confidence) + data["enabled"] = True + if _wants_compact(args): + for row in data.get("trend") or []: + if isinstance(row, dict): + row.pop("frozen_at", None) + return {"ok": True, "data": data} + + if cmd == "gt_top_alerts": + from analytics.gt_alerts import top_gt_alerts + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + try: + limit = int(args.get("limit", 8)) + except (TypeError, ValueError): + limit = 8 + + data = top_gt_alerts(limit=limit) + data["enabled"] = True + return {"ok": True, "data": data} + + if cmd == "gt_micro_rolling": + from analytics.micro_rolling import micro_rolling_report + from analytics.settings import gt_analytics_enabled + + if not gt_analytics_enabled(): + return { + "ok": True, + "data": { + "enabled": False, + "message": "Strategic Risk Analytics is disabled (GT_ANALYTICS_ENABLED).", + }, + } + + try: + window_days = int(args.get("window_days", 3)) + except (TypeError, ValueError): + window_days = 3 + try: + limit = int(args.get("limit", 15)) + except (TypeError, ValueError): + limit = 15 + + data = micro_rolling_report(window_days=window_days, limit=limit) + data["enabled"] = True + if _wants_compact(args): + data.pop("top_regions", None) + data["ignitions"] = (data.get("ignitions") or [])[:5] + return {"ok": True, "data": data} + if cmd == "brief_area": from services.telemetry import entities_near, search_news, get_layer_slice lat = args.get("lat") @@ -1131,7 +1418,7 @@ def _dispatch_command(cmd: str, args: dict[str, Any]) -> dict[str, Any]: from services.openclaw_watchdog import add_watch watch_type = str(args.get("type", "")).strip() if not watch_type: - return {"ok": False, "detail": "watch type required (track_aircraft, track_callsign, track_registration, track_ship, track_entity, geofence, keyword, prediction_market)"} + return {"ok": False, "detail": "watch type required (track_aircraft, track_callsign, track_registration, track_ship, track_entity, geofence, keyword, telegram_rhetoric, prediction_market)"} watch_params = args.get("params", {}) if not watch_params: # Allow flat args (e.g. {type: "track_callsign", callsign: "N189AM"}) diff --git a/backend/services/openclaw_routing.py b/backend/services/openclaw_routing.py index d5cd186..2747078 100644 --- a/backend/services/openclaw_routing.py +++ b/backend/services/openclaw_routing.py @@ -36,6 +36,14 @@ LATENCY_TIER_MS: dict[str, int] = { "entity_expand": 40, "osint_lookup": 200, "run_playbook": 120, + "gt_risk_heatmap": 20, + "gt_dossier": 25, + "gt_analyze": 80, + "gt_backtest": 120, + "gt_rolling_freeze": 30, + "gt_rolling_label": 20, + "gt_rolling_backtest": 30, + "gt_micro_rolling": 20, "infonet_status": 20, "list_gates": 15, "read_gate_messages": 40, @@ -255,6 +263,32 @@ def _news_query(text: str) -> str: return cleaned.strip(" ?.") +def _gt_region_hint(text: str) -> str: + lowered = str(text or "").lower() + hints = ( + "ukraine", + "middle east", + "eastern europe", + "baltics", + "israel", + "iran", + "russia", + "china", + "europe", + "united kingdom", + "uk", + "usa", + "united states", + ) + for hint in hints: + if hint in lowered: + return "uk" if hint == "united kingdom" else hint + match = re.search(r"\bon\s+([a-z][a-z\s]{2,30})\b", lowered) + if match: + return match.group(1).strip() + return "" + + def route_query( text: str = "", *, @@ -370,6 +404,146 @@ def route_query( }) return _route_result("news_search", recommended, avoid, alternates) + if any( + k in lowered + for k in ( + "gt backtest", + "backtest gt", + "historical backtest", + "wilson confidence", + "confidence rate", + "gt benchmark", + "validate gt", + ) + ): + tune = any(k in lowered for k in ("tune", "grid search", "optimize threshold")) + expanded = "base" not in lowered + recommended = { + "cmd": "gt_backtest", + "args": _compact_args( + { + "expanded": expanded, + "tune": tune, + "target_confidence": 0.95, + }, + compact=compact, + ), + } + alternates.append({"cmd": "gt_risk_heatmap", "args": {}}) + return _route_result("gt_backtest", recommended, avoid, alternates) + + if any( + k in lowered + for k in ( + "rolling backtest", + "rolling validation", + "weekly validation", + "operational validation", + "operational backtest", + "week over week", + "week-over-week", + "gt rolling", + "rolling gt", + "weekly gt", + "weekly gt score", + "gt weekly", + "gt snapshot", + "freeze weekly gt", + ) + ): + micro = any( + k in lowered + for k in ( + "3 day", + "3-day", + "three day", + "micro rolling", + "rolling average", + "ignition", + "micro gt", + ) + ) + freeze = any( + k in lowered + for k in ("freeze", "gt snapshot", "weekly snapshot", "capture week") + ) + label = any(k in lowered for k in ("label", "outcome", "escalation")) + if micro and not freeze and not label: + recommended = { + "cmd": "gt_micro_rolling", + "args": _compact_args({"window_days": 3}, compact=compact), + } + intent = "gt_micro_rolling" + elif freeze: + recommended = { + "cmd": "gt_rolling_freeze", + "args": _compact_args({"force": "force" in lowered}, compact=compact), + } + intent = "gt_rolling_freeze" + elif label: + recommended = { + "cmd": "gt_rolling_label", + "args": _compact_args({}, compact=compact), + } + intent = "gt_rolling_label" + else: + recommended = { + "cmd": "gt_rolling_backtest", + "args": _compact_args({"weeks": 8, "target_confidence": 0.80}, compact=compact), + } + intent = "gt_rolling_backtest" + alternates.append({"cmd": "gt_micro_rolling", "args": {"window_days": 3}}) + alternates.append({"cmd": "gt_backtest", "args": {"expanded": True, "compact": True}}) + return _route_result(intent, recommended, avoid, alternates) + + if any( + k in lowered + for k in ( + "3 day average", + "3-day average", + "rolling 3 day", + "micro risk", + "risk ignition", + ) + ): + recommended = { + "cmd": "gt_micro_rolling", + "args": _compact_args({"window_days": 3}, compact=compact), + } + alternates.append({"cmd": "gt_rolling_backtest", "args": {"weeks": 8}}) + return _route_result("gt_micro_rolling", recommended, avoid, alternates) + + if any( + k in lowered + for k in ( + "gt analysis", + "game theoretic", + "game-theoretic", + "strategic risk", + "early warning", + "risk heatmap", + "costly signal", + "gt rationale", + ) + ): + region_hint = _gt_region_hint(raw) + if region_hint and any(k in lowered for k in ("dossier", "rationale", "scenario")): + recommended = { + "cmd": "gt_dossier", + "args": _compact_args({"region": region_hint}, compact=compact), + } + alternates.append({"cmd": "gt_risk_heatmap", "args": {}}) + return _route_result("gt_dossier", recommended, avoid, alternates) + recommended = { + "cmd": "gt_analyze", + "args": _compact_args( + {"refresh": True, "region": region_hint} if region_hint else {"refresh": True}, + compact=compact, + ), + } + alternates.append({"cmd": "gt_risk_heatmap", "args": {}}) + return _route_result("gt_analyze", recommended, avoid, alternates) + if lat is not None and lng is not None and any( k in lowered for k in ("near", "around", "within", "radius", "brief", "aoi") ): diff --git a/backend/services/openclaw_watchdog.py b/backend/services/openclaw_watchdog.py index 66de911..dc8270e 100644 --- a/backend/services/openclaw_watchdog.py +++ b/backend/services/openclaw_watchdog.py @@ -22,9 +22,12 @@ logger = logging.getLogger(__name__) _lock = threading.Lock() _watches: dict[str, dict[str, Any]] = {} # watch_id -> watch definition _fired: dict[str, float] = {} # watch_id -> last fire timestamp (debounce) +_seen_posts: dict[str, set[str]] = {} # watch_id -> seen Telegram post ids/links _running = False _stop_event = threading.Event() +_TELEGRAM_SEEN_MAX = 500 + # Minimum seconds between re-firing the same watch DEBOUNCE_S = 60.0 # How often the watchdog checks telemetry @@ -73,6 +76,7 @@ def remove_watch(watch_id: str) -> dict[str, Any]: with _lock: removed = _watches.pop(watch_id, None) _fired.pop(watch_id, None) + _seen_posts.pop(watch_id, None) if removed: return {"ok": True, "removed": removed} return {"ok": False, "detail": f"watch '{watch_id}' not found"} @@ -90,6 +94,7 @@ def clear_watches() -> dict[str, Any]: count = len(_watches) _watches.clear() _fired.clear() + _seen_posts.clear() return {"ok": True, "cleared": count} @@ -157,7 +162,9 @@ def _check_watch(watch: dict, fast: dict, slow: dict) -> dict[str, Any] | None: if wtype == "geofence": return _check_geofence(params, fast) if wtype == "keyword": - return _check_keyword(params, fast, slow) + return _check_keyword(watch["id"], params, fast, slow) + if wtype == "telegram_rhetoric": + return _check_telegram_rhetoric(watch["id"], params, slow) if wtype == "prediction_market": return _check_prediction_market(params, slow) @@ -390,15 +397,41 @@ def _check_geofence(params: dict, fast: dict) -> dict | None: return None -def _check_keyword(params: dict, fast: dict, slow: dict) -> dict | None: - """Alert when a keyword appears in news/GDELT.""" +def _telegram_post_id(post: dict[str, Any]) -> str: + return str(post.get("id") or post.get("link") or "").strip() + + +def _mark_seen_posts(watch_id: str, post_ids: list[str]) -> None: + clean = [pid for pid in post_ids if pid] + if not clean: + return + with _lock: + seen = _seen_posts.setdefault(watch_id, set()) + seen.update(clean) + if len(seen) > _TELEGRAM_SEEN_MAX: + _seen_posts[watch_id] = set(list(seen)[-_TELEGRAM_SEEN_MAX:]) + + +def _is_seen_post(watch_id: str, post_id: str) -> bool: + if not post_id: + return False + with _lock: + return post_id in _seen_posts.get(watch_id, set()) + + +def _check_keyword(watch_id: str, params: dict, fast: dict, slow: dict) -> dict | None: + """Alert when a keyword appears in news, GDELT, or Telegram OSINT.""" keyword = str(params.get("keyword", "")).lower().strip() if not keyword: return None - matches = [] + include_telegram = params.get("include_telegram", True) + if isinstance(include_telegram, str): + include_telegram = include_telegram.strip().lower() not in {"0", "false", "no", "off"} + + matches = [] + new_telegram_ids: list[str] = [] - # Check news articles for article in slow.get("news", []): title = str(article.get("title", "") or "").lower() desc = str(article.get("description", "") or article.get("summary", "") or "").lower() @@ -409,7 +442,6 @@ def _check_keyword(params: dict, fast: dict, slow: dict) -> dict | None: "url": article.get("url") or article.get("link"), }) - # Check GDELT for event in slow.get("gdelt", []): text = str(event.get("title", "") or event.get("sourceurl", "") or "").lower() if keyword in text: @@ -419,14 +451,103 @@ def _check_keyword(params: dict, fast: dict, slow: dict) -> dict | None: "url": event.get("sourceurl"), }) + if include_telegram: + from services.telegram_osint_text import ( + iter_telegram_posts, + keyword_matches_telegram_post, + telegram_post_match_entry, + ) + + for post in iter_telegram_posts(slow.get("telegram_osint")): + if not keyword_matches_telegram_post(post, keyword): + continue + post_id = _telegram_post_id(post) + if _is_seen_post(watch_id, post_id): + continue + entry = telegram_post_match_entry(post) + matches.append(entry) + if post_id: + new_telegram_ids.append(post_id) + if matches: + if new_telegram_ids: + _mark_seen_posts(watch_id, new_telegram_ids) + sources = sorted({str(match.get("source") or "unknown") for match in matches}) return { - "alert": f"Keyword '{keyword}' found in {len(matches)} articles", + "alert": f"Keyword '{keyword}' found in {len(matches)} items ({', '.join(sources)})", "data": {"keyword": keyword, "matches": matches[:10]}, } return None +def _check_telegram_rhetoric(watch_id: str, params: dict, slow: dict) -> dict | None: + """Alert on new high-risk Telegram OSINT posts (optionally keyword/channel filtered).""" + min_risk = int(params.get("min_risk_score", 7) or 7) + min_risk = max(1, min(min_risk, 10)) + + raw_keywords = params.get("keywords") or params.get("keyword") or [] + if isinstance(raw_keywords, str): + raw_keywords = [part.strip() for part in raw_keywords.split(",") if part.strip()] + keywords = [str(item).lower().strip() for item in raw_keywords if str(item).strip()] + + raw_channels = params.get("channels") or params.get("channel") or [] + if isinstance(raw_channels, str): + raw_channels = [part.strip() for part in raw_channels.split(",") if part.strip()] + channels = [str(item).lower().strip().lstrip("@") for item in raw_channels if str(item).strip()] + + from services.telegram_osint_text import ( + iter_telegram_posts, + keyword_matches_telegram_post, + telegram_post_match_entry, + ) + + matches = [] + new_post_ids: list[str] = [] + + for post in iter_telegram_posts(slow.get("telegram_osint")): + try: + risk = int(post.get("risk_score") or 0) + except (TypeError, ValueError): + risk = 0 + if risk < min_risk: + continue + + channel = str(post.get("channel") or "").lower().strip() + source = str(post.get("source") or "").lower().strip() + if channels and channel not in channels and not any(ch in source for ch in channels): + continue + + if keywords and not any(keyword_matches_telegram_post(post, kw) for kw in keywords): + continue + + post_id = _telegram_post_id(post) + if _is_seen_post(watch_id, post_id): + continue + + entry = telegram_post_match_entry(post) + matches.append(entry) + if post_id: + new_post_ids.append(post_id) + + if not matches: + return None + + _mark_seen_posts(watch_id, new_post_ids) + top = max(int(match.get("risk_score") or 0) for match in matches) + return { + "alert": ( + f"Telegram rhetoric alert: {len(matches)} new post(s) at LVL {top}/10" + + (f" (min {min_risk})" if min_risk > 1 else "") + ), + "data": { + "min_risk_score": min_risk, + "keywords": keywords, + "channels": channels, + "matches": matches[:10], + }, + } + + def _check_prediction_market(params: dict, slow: dict) -> dict | None: """Alert on prediction market movements.""" query = str(params.get("query", "")).lower().strip() diff --git a/backend/services/runtime_profile.py b/backend/services/runtime_profile.py new file mode 100644 index 0000000..5d0aa4b --- /dev/null +++ b/backend/services/runtime_profile.py @@ -0,0 +1,106 @@ +"""Container-aware runtime limits for fleet vs desktop deployments.""" +from __future__ import annotations + +import os +from functools import lru_cache +from pathlib import Path +from typing import Any + + +def _read_first_int(path: Path) -> int | None: + try: + raw = path.read_text(encoding="utf-8").strip().split()[0] + return int(raw) + except (OSError, ValueError, IndexError): + return None + + +def detect_cpu_limit() -> float | None: + """Effective CPU cores from cgroup quota (Docker ``cpus:``), else host count.""" + cgroup_v2 = Path("/sys/fs/cgroup/cpu.max") + if cgroup_v2.is_file(): + try: + parts = cgroup_v2.read_text(encoding="utf-8").strip().split() + if len(parts) >= 2 and parts[0] != "max": + quota = int(parts[0]) + period = int(parts[1]) + if quota > 0 and period > 0: + return round(quota / period, 3) + except (OSError, ValueError): + pass + + cgroup_v1_quota = Path("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") + cgroup_v1_period = Path("/sys/fs/cgroup/cpu/cpu.cfs_period_us") + if cgroup_v1_quota.is_file() and cgroup_v1_period.is_file(): + quota = _read_first_int(cgroup_v1_quota) + period = _read_first_int(cgroup_v1_period) + if quota is not None and period and quota > 0: + return round(quota / period, 3) + + try: + import os as _os + + count = _os.cpu_count() + return float(count) if count else None + except Exception: + return None + + +def detect_memory_limit_mb() -> int | None: + cgroup_v2 = Path("/sys/fs/cgroup/memory.max") + if cgroup_v2.is_file(): + try: + raw = cgroup_v2.read_text(encoding="utf-8").strip() + if raw and raw != "max": + return int(int(raw) / (1024 * 1024)) + except (OSError, ValueError): + pass + + cgroup_v1 = Path("/sys/fs/cgroup/memory/memory.limit_in_bytes") + if cgroup_v1.is_file(): + try: + raw = _read_first_int(cgroup_v1) + if raw is not None and raw < (1 << 62): + return int(raw / (1024 * 1024)) + except (OSError, ValueError): + pass + return None + + +def resolve_profile_name() -> str: + explicit = str(os.environ.get("GT_ANALYTICS_PROFILE", "")).strip().lower() + if explicit in {"lean", "standard"}: + return explicit + cpu = detect_cpu_limit() + if cpu is not None and cpu <= 1.0: + return "lean" + return "standard" + + +@lru_cache(maxsize=1) +def get_runtime_profile() -> dict[str, Any]: + cpu_limit = detect_cpu_limit() + memory_mb = detect_memory_limit_mb() + profile = resolve_profile_name() + lean = profile == "lean" + return { + "profile": profile, + "cpu_limit": cpu_limit, + "memory_limit_mb": memory_mb, + "gt_analytics": { + "recommended": not lean, + "lean_node": lean, + "warning": ( + "This node is capped at 1 vCPU. Enabling Strategic Risk (Derived OSINT) " + "may slow Telegram, GDELT, and other OSINT fetches. Set " + "GT_ANALYTICS_ACK_LOW_CPU=true after enabling GT_ANALYTICS_ENABLED to run " + "the full engine on lean hardware." + if lean + else None + ), + }, + } + + +def clear_runtime_profile_cache() -> None: + get_runtime_profile.cache_clear() diff --git a/backend/services/schemas.py b/backend/services/schemas.py index 30160bb..18993ae 100644 --- a/backend/services/schemas.py +++ b/backend/services/schemas.py @@ -19,6 +19,7 @@ class HealthResponse(BaseModel): # insecure-date path because the upstream Let's Encrypt cert is # expired. Empty dict / null means no status reported yet. ais_proxy: Optional[Dict[str, Any]] = None + runtime: Optional[Dict[str, Any]] = None class RefreshResponse(BaseModel): diff --git a/backend/services/telegram_osint_text.py b/backend/services/telegram_osint_text.py new file mode 100644 index 0000000..9827b06 --- /dev/null +++ b/backend/services/telegram_osint_text.py @@ -0,0 +1,66 @@ +"""Shared Telegram OSINT post text helpers for search and watchdog matching.""" +from __future__ import annotations + +from typing import Any + +from services.telegram_translate import source_lang_label + + +def iter_telegram_posts(layer_payload: Any) -> list[dict[str, Any]]: + """Normalize telegram_osint layer payloads into a list of post dicts.""" + if isinstance(layer_payload, list): + return [post for post in layer_payload if isinstance(post, dict)] + if isinstance(layer_payload, dict): + posts = layer_payload.get("posts") + if isinstance(posts, list): + return [post for post in posts if isinstance(post, dict)] + return [] + + +def telegram_post_search_text(post: dict[str, Any]) -> str: + """Build a lowercase haystack for keyword matching (translated + original).""" + parts = ( + post.get("title_translated"), + post.get("description_translated"), + post.get("title"), + post.get("description"), + post.get("source"), + post.get("channel"), + ) + return " ".join(str(part).strip() for part in parts if str(part or "").strip()).lower() + + +def telegram_post_display_title(post: dict[str, Any]) -> str: + """Prefer translated headline for alerts and agent-facing summaries.""" + translated = str(post.get("title_translated") or post.get("description_translated") or "").strip() + if translated: + return translated.split("\n", 1)[0][:200] + return str(post.get("title") or post.get("description") or "").strip()[:200] + + +def telegram_post_match_entry(post: dict[str, Any]) -> dict[str, Any]: + """Compact match record for watchdog alerts and search results.""" + lat, lng = None, None + coords = post.get("coords") + if isinstance(coords, (list, tuple)) and len(coords) >= 2: + lat, lng = coords[0], coords[1] + return { + "source": "telegram_osint", + "title": telegram_post_display_title(post), + "original_title": str(post.get("title") or "").strip(), + "url": post.get("link") or "", + "channel": post.get("channel") or post.get("source") or "", + "risk_score": post.get("risk_score"), + "source_lang": post.get("source_lang"), + "source_lang_label": post.get("source_lang_label") or source_lang_label(post.get("source_lang")), + "lat": lat, + "lng": lng, + "id": post.get("id") or post.get("link") or "", + } + + +def keyword_matches_telegram_post(post: dict[str, Any], keyword: str) -> bool: + needle = str(keyword or "").strip().lower() + if not needle: + return False + return needle in telegram_post_search_text(post) \ No newline at end of file diff --git a/backend/services/telegram_translate.py b/backend/services/telegram_translate.py new file mode 100644 index 0000000..471a99f --- /dev/null +++ b/backend/services/telegram_translate.py @@ -0,0 +1,243 @@ +"""Auto-translation for Telegram OSINT post text (server-side, cached).""" +from __future__ import annotations + +import hashlib +import logging +import os +import re +import urllib.parse +from threading import Lock +from typing import Any + +import requests + +logger = logging.getLogger(__name__) + +_CYRILLIC_RE = re.compile(r"[\u0400-\u04FF]") +_UKRAINIAN_MARKERS_RE = re.compile(r"[іїєґІЇЄҐ]") +_ARABIC_RE = re.compile(r"[\u0600-\u06FF]") +_HEBREW_RE = re.compile(r"[\u0590-\u05FF]") +_CJK_RE = re.compile(r"[\u4e00-\u9fff]") + +# Common war-reporting shorthand that machine translation often transliterates. +_POST_TRANSLATION_GLOSSARY: tuple[tuple[re.Pattern[str], str], ...] = ( + (re.compile(r"\bBpLa\b", re.IGNORECASE), "UAV"), + (re.compile(r"\bБпЛА\b", re.IGNORECASE), "UAV"), + (re.compile(r"\bбпла\b"), "UAV"), + (re.compile(r"\bБПЛА\b"), "UAV"), + (re.compile(r"\bрсзв\b", re.IGNORECASE), "MLRS"), + (re.compile(r"\bРСЗВ\b"), "MLRS"), +) + +_SOURCE_LANG_LABELS = { + "uk": "Ukrainian", + "ru": "Russian", + "en": "English", + "ar": "Arabic", + "he": "Hebrew", + "zh-cn": "Chinese", + "fr": "French", + "de": "German", + "pl": "Polish", +} + +_CACHE: dict[str, tuple[str, str]] = {} +_CACHE_LOCK = Lock() +_CACHE_MAX = 512 + +_LOCALE_TO_GOOGLE = { + "en": "en", + "fr": "fr", + "zh-cn": "zh-CN", + "zh": "zh-CN", +} + + +def telegram_translate_enabled() -> bool: + return str(os.environ.get("TELEGRAM_OSINT_TRANSLATE", "true")).strip().lower() not in { + "0", + "false", + "no", + "off", + "", + } + + +def telegram_translate_target() -> str: + raw = str(os.environ.get("TELEGRAM_OSINT_TRANSLATE_TO", "en")).strip().lower() + return _LOCALE_TO_GOOGLE.get(raw, raw or "en") + + +def normalize_translate_target(locale: str | None) -> str: + raw = str(locale or telegram_translate_target()).strip().lower().replace("_", "-") + return _LOCALE_TO_GOOGLE.get(raw, raw or "en") + + +def _looks_english(text: str) -> bool: + letters = [char for char in text if char.isalpha()] + if not letters: + return True + ascii_letters = sum(1 for char in letters if ord(char) < 128) + return ascii_letters / len(letters) > 0.9 + + +def contains_cyrillic(text: str) -> bool: + return bool(_CYRILLIC_RE.search(str(text or ""))) + + +def source_lang_label(code: str | None) -> str: + raw = str(code or "").strip().lower().replace("_", "-") + return _SOURCE_LANG_LABELS.get(raw, raw.upper() if raw else "Unknown") + + +def polish_translation(text: str) -> str: + polished = str(text or "") + for pattern, replacement in _POST_TRANSLATION_GLOSSARY: + polished = pattern.sub(replacement, polished) + return polished.strip() + + +def guess_source_lang(text: str) -> str: + if _UKRAINIAN_MARKERS_RE.search(text): + return "uk" + if _CYRILLIC_RE.search(text): + return "ru" + if _ARABIC_RE.search(text): + return "ar" + if _HEBREW_RE.search(text): + return "he" + if _CJK_RE.search(text): + return "zh-CN" + if _looks_english(text): + return "en" + return "auto" + + +def _cache_key(text: str, target_lang: str) -> str: + digest = hashlib.sha1(f"{target_lang}|{text}".encode("utf-8")).hexdigest() + return digest + + +def _cache_get(text: str, target_lang: str) -> tuple[str, str] | None: + key = _cache_key(text, target_lang) + with _CACHE_LOCK: + return _CACHE.get(key) + + +def _cache_put(text: str, target_lang: str, translated: str, source_lang: str) -> None: + key = _cache_key(text, target_lang) + with _CACHE_LOCK: + if len(_CACHE) >= _CACHE_MAX: + _CACHE.pop(next(iter(_CACHE))) + _CACHE[key] = (translated, source_lang) + + +def _google_translate(clean: str, target: str, source: str | None = None) -> tuple[str, str]: + params = { + "client": "gtx", + "sl": source or "auto", + "tl": target, + "dt": "t", + "q": clean[:4500], + } + url = "https://translate.googleapis.com/translate_a/single?" + urllib.parse.urlencode(params) + resp = requests.get( + url, + timeout=8, + headers={"User-Agent": "Mozilla/5.0 (compatible; Shadowbroker-Telegram-Translate/1.0)"}, + ) + resp.raise_for_status() + data = resp.json() + detected = str(data[2] or guess_source_lang(clean)).strip().lower() + if detected in {"zh-cn", "zh-tw"}: + detected = "zh-CN" + parts: list[str] = [] + for chunk in data[0] or []: + if chunk and chunk[0]: + parts.append(str(chunk[0])) + translated = polish_translation("".join(parts).strip() or clean) + return translated, detected + + +def translate_text(text: str, target_lang: str | None = None) -> tuple[str, str]: + """Translate text via Google Translate (unofficial client endpoint). + + Returns ``(translated_text, detected_source_lang)``. + """ + clean = str(text or "").strip() + if not clean: + return "", "en" + + target = normalize_translate_target(target_lang) + if _looks_english(clean) and target == "en": + return clean, "en" + + cached = _cache_get(clean, target) + if cached: + return cached + + try: + translated, detected = _google_translate(clean, target) + if detected == target or (detected == "en" and target == "en"): + result = (clean, detected) + _cache_put(clean, target, clean, detected) + return result + if contains_cyrillic(translated) and contains_cyrillic(clean): + hinted = guess_source_lang(clean) + if hinted not in {"auto", target}: + retry_translated, retry_detected = _google_translate(clean, target, hinted) + if not contains_cyrillic(retry_translated) or len(retry_translated) > len(translated): + translated, detected = retry_translated, retry_detected + result = (translated, detected) + _cache_put(clean, target, translated, detected) + return result + except Exception as exc: + logger.warning("Telegram translation failed: %s", exc) + fallback_lang = guess_source_lang(clean) + return clean, fallback_lang + + +def apply_post_translation(post: dict[str, Any], target_lang: str | None = None) -> dict[str, Any]: + """Add translation fields to a Telegram OSINT post dict.""" + if not telegram_translate_enabled(): + return post + + target = normalize_translate_target(target_lang) + description = str(post.get("description") or "").strip() + title = str(post.get("title") or "").strip() + full_text = description or title + if not full_text: + return post + + existing_translated = str(post.get("description_translated") or post.get("title_translated") or "").strip() + if post.get("translate_to") == target and existing_translated: + updated = dict(post) + polished = polish_translation(existing_translated) + if polished != existing_translated: + lines = polished.split("\n", 1) + updated["title_translated"] = lines[0][:160] + updated["description_translated"] = polished[:1200] + updated["source_lang_label"] = source_lang_label(str(post.get("source_lang") or "")) + return updated + + translated_full, source_lang = translate_text(full_text, target) + updated = dict(post) + updated["source_lang"] = source_lang + updated["translate_to"] = target + updated["source_lang_label"] = source_lang_label(source_lang) + + if translated_full != full_text and source_lang != target: + lines = translated_full.split("\n", 1) + updated["title_translated"] = lines[0][:160] + updated["description_translated"] = translated_full[:1200] + + return updated + + +def apply_posts_translations( + posts: list[dict[str, Any]], + target_lang: str | None = None, +) -> list[dict[str, Any]]: + if not telegram_translate_enabled(): + return posts + return [apply_post_translation(post, target_lang) for post in posts] \ No newline at end of file diff --git a/backend/services/telemetry.py b/backend/services/telemetry.py index 03f37a6..60f970a 100644 --- a/backend/services/telemetry.py +++ b/backend/services/telemetry.py @@ -97,6 +97,7 @@ _SLOW_KEYS = ( "cyber_threats", "scm_suppliers", "telegram_osint", + "gt_risk", ) @@ -210,6 +211,9 @@ _LAYER_ALIASES = { "telegram": "telegram_osint", "telegram_osint": "telegram_osint", "osint_feed": "telegram_osint", + "gt_risk": "gt_risk", + "strategic_risk": "gt_risk", + "gt_analytics": "gt_risk", "malware": "malware_threats", "malware_threats": "malware_threats", "malware_c2": "malware_threats", @@ -710,10 +714,10 @@ _UNIVERSAL_SEARCH_SPECS: dict[str, dict[str, Any]] = { "time_fields": ("updated_at", "timestamp"), }, "telegram_osint": { - "fields": ("title", "description", "source", "channel", "link"), - "primary_fields": ("title", "description", "channel"), - "label_fields": ("title", "channel"), - "summary_fields": ("description", "source"), + "fields": ("title", "description", "title_translated", "description_translated", "source", "channel", "link"), + "primary_fields": ("title_translated", "title", "description_translated", "description", "channel"), + "label_fields": ("title_translated", "title", "channel"), + "summary_fields": ("description_translated", "description", "source"), "type_fields": ("channel", "source"), "id_fields": ("id", "link"), "time_fields": ("published", "timestamp"), @@ -2089,30 +2093,27 @@ def search_news( return {"results": out, "version": get_data_version(), "truncated": True} if include_telegram: + from services.telegram_osint_text import telegram_post_display_title, telegram_post_search_text + for post in _unwrap_layer_items(snap.get("telegram_osint"), "telegram_osint"): if not isinstance(post, dict): continue - text = " ".join( - ( - _norm_text(post.get("title")), - _norm_text(post.get("description")), - _norm_text(post.get("source")), - _norm_text(post.get("channel")), - ) - ) + text = telegram_post_search_text(post) if not _text_matches_query(query_norm, text): continue lat, lng = _extract_coords(post) out.append( { "source_layer": "telegram_osint", - "title": post.get("title") or "", - "summary": post.get("description") or "", + "title": telegram_post_display_title(post), + "summary": post.get("description_translated") or post.get("description") or "", "source": post.get("source") or post.get("channel") or "Telegram", "link": post.get("link") or "", "lat": lat, "lng": lng, "risk_score": post.get("risk_score"), + "source_lang": post.get("source_lang"), + "source_lang_label": post.get("source_lang_label"), } ) if len(out) >= limit: diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index f10d805..3ced65e 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -1,9 +1,23 @@ +import os + import asyncio import pytest from unittest.mock import patch, MagicMock +@pytest.fixture(autouse=True) +def _gt_analytics_standard_profile(monkeypatch: pytest.MonkeyPatch) -> None: + """Tests assume a standard (non-lean) runtime unless they override profile.""" + monkeypatch.setenv("GT_ANALYTICS_PROFILE", os.environ.get("GT_ANALYTICS_PROFILE", "standard")) + try: + from analytics.integration import reset_gt_engine + + reset_gt_engine() + except Exception: + pass + + @pytest.fixture(autouse=True) def _suppress_background_services(): """Prevent real scheduler/stream/tracker from starting during tests.""" diff --git a/backend/tests/test_analytics_api.py b/backend/tests/test_analytics_api.py new file mode 100644 index 0000000..a750f74 --- /dev/null +++ b/backend/tests/test_analytics_api.py @@ -0,0 +1,141 @@ +"""API tests for Strategic Risk Analytics routes.""" + +from __future__ import annotations + +import pytest + +from analytics.integration import reset_gt_engine +from services.fetchers import _store + + +@pytest.fixture(autouse=True) +def _reset_gt(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("GT_ANALYTICS_ENABLED", raising=False) + reset_gt_engine() + + +def test_risk_heatmap_disabled(client) -> None: + response = client.get("/api/analytics/risk_heatmap") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is False + assert payload["type"] == "FeatureCollection" + assert payload["features"] == [] + + +def test_dossier_disabled(client) -> None: + response = client.get("/api/analytics/dossier/ukraine") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is False + assert payload["region"] == "ukraine" + + +def test_risk_heatmap_enabled_after_refresh(client, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + + _store.latest_data["telegram_osint"] = { + "posts": [ + { + "id": "api-tg-1", + "title": "Troop buildup", + "description": "Troop movement and armored convoy reported near border.", + "source": "t.me/war_monitor", + "channel": "war_monitor", + "coords": [48.5, 37.5], + } + ], + "total": 1, + "geolocated": 1, + } + _store.latest_data["news"] = [] + _store.latest_data["gdelt"] = [] + + from analytics.integration import refresh_from_latest_data + + refresh_from_latest_data(dict(_store.latest_data), persist=True) + + response = client.get("/api/analytics/risk_heatmap") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is True + assert len(payload["features"]) >= 1 + assert payload["timestamp"] is not None + + +def test_dossier_enabled(client, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + + _store.latest_data["telegram_osint"] = { + "posts": [ + { + "id": "api-tg-2", + "title": "Strike", + "description": "General strike and protest mobilization in capital.", + "source": "t.me/nexta_live", + "channel": "nexta_live", + "coords": [50.45, 30.52], + } + ] + } + _store.latest_data["news"] = [] + _store.latest_data["gdelt"] = [] + + from analytics.integration import refresh_from_latest_data + + refresh_from_latest_data(dict(_store.latest_data), persist=True) + + response = client.get("/api/analytics/dossier/50.45,30.52") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is True + assert payload["recent_signals"] + assert "interpretation" in payload + + +def test_post_risk_heatmap_ingest(client, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + + response = client.post( + "/api/analytics/risk_heatmap", + json={ + "refresh": False, + "items": [ + { + "title": "GPS interference", + "description": "GPS jamming spike along northern corridor.", + "source": "manual", + "region": "baltics", + "domain": "conflict", + } + ], + }, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is True + assert payload["ingested"] == 1 + + +def test_backtest_disabled(client) -> None: + response = client.get("/api/analytics/backtest") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is False + + +def test_backtest_enabled(client, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + + response = client.get("/api/analytics/backtest?expanded=true&tune=false") + assert response.status_code == 200 + payload = response.json() + assert payload["enabled"] is True + assert payload["accuracy"] == 1.0 + assert payload["confidence_rate"] >= 0.95 + assert payload["meets_target"] is True + assert payload["total_cases"] >= 80 \ No newline at end of file diff --git a/backend/tests/test_gt_alerts.py b/backend/tests/test_gt_alerts.py new file mode 100644 index 0000000..d3f4ab5 --- /dev/null +++ b/backend/tests/test_gt_alerts.py @@ -0,0 +1,56 @@ +"""Top GT alerts ranking and coordinate filtering.""" + +from __future__ import annotations + +from analytics.gt_alerts import parse_heatmap_alerts, top_gt_alerts + + +def test_parse_heatmap_filters_invalid_coords() -> None: + heatmap = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "region": "ukraine", + "risk": 0.55, + "conflict": 0.62, + "financial": 0.15, + "unrest": 0.2, + }, + "geometry": {"type": "Point", "coordinates": [31.0, 48.0]}, + }, + { + "type": "Feature", + "properties": {"region": "no_coords", "risk": 0.9}, + "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, + }, + { + "type": "Feature", + "properties": {"region": "global", "risk": 0.99}, + "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, + }, + ], + } + alerts, plotted = parse_heatmap_alerts(heatmap, limit=5) + assert plotted == 1 + assert len(alerts) == 1 + assert alerts[0]["region"] == "ukraine" + assert alerts[0]["lat"] == 48.0 + assert alerts[0]["lng"] == 31.0 + + +def test_region_label_formats_coordinates() -> None: + from analytics.gt_alerts import _region_label + + assert "48.00" in _region_label("48.00,31.17") + assert _region_label("ukraine") == "ukraine" + + +def test_top_gt_alerts_disabled(monkeypatch) -> None: + monkeypatch.delenv("GT_ANALYTICS_ENABLED", raising=False) + from analytics.integration import reset_gt_engine + + reset_gt_engine() + report = top_gt_alerts(limit=3) + assert report["alerts"] == [] \ No newline at end of file diff --git a/backend/tests/test_gt_backtest.py b/backend/tests/test_gt_backtest.py new file mode 100644 index 0000000..833842d --- /dev/null +++ b/backend/tests/test_gt_backtest.py @@ -0,0 +1,52 @@ +"""Historical backtest validation for Strategic Risk Analytics.""" + +from __future__ import annotations + +from analytics.backtest import ( + DEFAULT_BACKTEST_ALERT_THRESHOLD, + MAX_BACKTEST_ALERT_THRESHOLD, + run_historical_backtest, + tune_alert_threshold, + wilson_interval, +) +from analytics.historical_events import default_historical_cases, expanded_historical_cases + + +def test_wilson_interval_perfect_run() -> None: + lower, upper = wilson_interval(18, 18) + assert lower >= 0.80 + assert upper == 1.0 + + +def test_base_suite_meets_eighty_percent_confidence() -> None: + report = run_historical_backtest( + default_historical_cases(), + use_expanded_suite=False, + target_confidence=0.80, + ) + assert report.accuracy >= 0.95 + assert report.confidence_rate >= 0.80 + assert report.meets_target + assert report.false_positives == 0 + assert report.false_negatives == 0 + + +def test_expanded_suite_meets_ninety_five_percent_confidence() -> None: + threshold, report = tune_alert_threshold(target_confidence=0.95) + assert len(expanded_historical_cases()) >= 80 + assert report.accuracy == 1.0 + assert report.confidence_rate >= 0.95 + assert report.meets_target + assert report.false_positives == 0 + assert report.false_negatives == 0 + assert DEFAULT_BACKTEST_ALERT_THRESHOLD <= threshold <= MAX_BACKTEST_ALERT_THRESHOLD + + +def test_default_backtest_threshold_on_expanded_suite() -> None: + report = run_historical_backtest( + use_expanded_suite=True, + target_confidence=0.95, + ) + assert report.alert_threshold == DEFAULT_BACKTEST_ALERT_THRESHOLD + assert report.accuracy == 1.0 + assert report.confidence_rate >= 0.95 \ No newline at end of file diff --git a/backend/tests/test_gt_early_warning.py b/backend/tests/test_gt_early_warning.py new file mode 100644 index 0000000..a094c37 --- /dev/null +++ b/backend/tests/test_gt_early_warning.py @@ -0,0 +1,150 @@ +"""Tests for Strategic Risk Analytics core scoring.""" + +from __future__ import annotations + +import pytest + +from analytics.feed_adapter import normalize_feed_item +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.integration import process_feed_item, refresh_from_latest_data, reset_gt_engine +from analytics.settings import GTAnalyticsSettings + + +@pytest.fixture +def engine() -> GT_EarlyWarning: + return GT_EarlyWarning( + GTAnalyticsSettings( + enabled=True, + base_prior=0.15, + evidence_cap=3.0, + evidence_scale=5.0, + high_risk_threshold=0.6, + ) + ) + + +def test_classify_payroll_loan_signal(engine: GT_EarlyWarning) -> None: + signals = engine.classify_signals("Franchise owners increasingly rely on payroll loan facilities.") + assert "payroll_loan" in signals + assert signals["payroll_loan"] >= 3.0 + + +def test_classify_no_signal_on_generic_text(engine: GT_EarlyWarning) -> None: + signals = engine.classify_signals("Sunny weather expected across the region this weekend.") + assert signals == {} + + +def test_bayesian_update_increases_risk(engine: GT_EarlyWarning) -> None: + prior = engine.get_prior("uk", "financial") + posterior = engine.bayesian_update("uk", "financial", evidence_strength=2.0) + assert posterior > prior + + +def test_process_feed_item_updates_region(engine: GT_EarlyWarning) -> None: + item = { + "id": "test-1", + "text": "Mass rally and general strike announced; protest mobilization spreads.", + "source": "t.me/osintdefender", + "region": "ukraine", + "domain": "unrest", + "entities": ["channel:osintdefender"], + "coords": [50.45, 30.52], + } + result = engine.process_feed_item(item) + assert result["signals"] + assert result["risk_score"] > engine.settings.base_prior + assert result["contagion_potential"] >= 0.0 + + +def test_duplicate_items_are_skipped(engine: GT_EarlyWarning) -> None: + item = { + "id": "dup-1", + "text": "GPS jamming spike reported near border corridor.", + "source": "gdelt", + "region": "baltics", + "domain": "conflict", + } + first = engine.process_feed_item(item) + second = engine.process_feed_item(item) + assert not first.get("skipped") + assert second.get("skipped") is True + + +def test_heatmap_returns_geojson_features(engine: GT_EarlyWarning) -> None: + engine.process_feed_item( + { + "id": "heat-1", + "text": "Troop movement and armored convoy observed overnight.", + "source": "news", + "region": "eastern_europe", + "coords": [48.0, 37.0], + } + ) + heatmap = engine.get_risk_heatmap() + assert heatmap["type"] == "FeatureCollection" + assert len(heatmap["features"]) >= 1 + feature = heatmap["features"][0] + assert "risk" in feature["properties"] + assert feature["geometry"]["type"] == "Point" + + +def test_dossier_includes_recent_signals(engine: GT_EarlyWarning) -> None: + engine.process_feed_item( + { + "id": "dos-1", + "text": "Supply chain delay at major port; logistics backlog worsens.", + "source": "news", + "region": "china", + "domain": "financial", + } + ) + dossier = engine.get_dossier("china") + assert dossier["region"] == "china" + assert dossier["recent_signals"] + assert "interpretation" in dossier + + +def test_feed_adapter_normalizes_telegram_post() -> None: + normalized = normalize_feed_item( + { + "title": "Strike expands", + "description": "General strike and rally planned in capital.", + "source": "t.me/nexta_live", + "channel": "nexta_live", + "coords": [53.9, 27.56], + }, + source_type="telegram_osint", + ) + assert normalized["region"] != "global" + assert normalized["domain"] in {"unrest", "financial", "conflict"} + assert normalized["text"] + + +def test_integration_disabled_by_default(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("GT_ANALYTICS_ENABLED", raising=False) + reset_gt_engine() + assert process_feed_item({"text": "test", "region": "global"}) is None + + +def test_refresh_from_latest_data_processes_telegram(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + latest = { + "telegram_osint": { + "posts": [ + { + "id": "tg-1", + "title": "GPS jamming", + "description": "GPS jamming spike reported along northern border.", + "source": "t.me/osintdefender", + "channel": "osintdefender", + "coords": [59.93, 30.33], + } + ] + }, + "news": [], + "gdelt": [], + } + summary = refresh_from_latest_data(latest, persist=False) + assert summary["enabled"] is True + assert summary["processed"] >= 1 \ No newline at end of file diff --git a/backend/tests/test_gt_feed_adapter_translate.py b/backend/tests/test_gt_feed_adapter_translate.py new file mode 100644 index 0000000..f50176d --- /dev/null +++ b/backend/tests/test_gt_feed_adapter_translate.py @@ -0,0 +1,29 @@ +"""GT feed adapter uses Telegram English translations for costly-signal matching.""" + +from __future__ import annotations + +from analytics.feed_adapter import normalize_feed_item + + +def test_telegram_prefers_translated_text_for_gt() -> None: + post = { + "title": "Київ 1х БпЛА", + "description": "Обстріл біля Харкова", + "title_translated": "Kyiv 1x UAV", + "description_translated": "Shelling near Kharkiv with troop movement reported", + "source": "t.me/osintdefender", + "coords": [49.99, 36.23], + } + item = normalize_feed_item(post, source_type="telegram_osint") + assert "troop movement" in item["text"].lower() + assert item["domain"] == "conflict" + + +def test_hashtag_region_maps_ukraine_dossier_key() -> None: + post = { + "title": "Update", + "description_translated": "#Ukraine #USA aircraft spotted on runway", + "source": "t.me/osintdefender", + } + item = normalize_feed_item(post, source_type="telegram_osint") + assert item["region"] == "ukraine" \ No newline at end of file diff --git a/backend/tests/test_gt_lean_profile.py b/backend/tests/test_gt_lean_profile.py new file mode 100644 index 0000000..7e627a2 --- /dev/null +++ b/backend/tests/test_gt_lean_profile.py @@ -0,0 +1,35 @@ +"""Lean-profile gating for Strategic Risk Analytics.""" + +from __future__ import annotations + +import pytest + +from analytics.integration import get_gt_engine, maybe_refresh_gt_analytics, reset_gt_engine +from analytics.settings import gt_engine_operational, gt_scheduled_ingest_enabled + + +def test_gt_engine_blocked_on_lean_without_ack(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + monkeypatch.setenv("GT_ANALYTICS_PROFILE", "lean") + monkeypatch.delenv("GT_ANALYTICS_ACK_LOW_CPU", raising=False) + reset_gt_engine() + assert gt_engine_operational() is False + assert get_gt_engine() is None + + +def test_gt_engine_allowed_on_lean_with_ack(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + monkeypatch.setenv("GT_ANALYTICS_PROFILE", "lean") + monkeypatch.setenv("GT_ANALYTICS_ACK_LOW_CPU", "true") + reset_gt_engine() + assert gt_engine_operational() is True + assert get_gt_engine() is not None + + +def test_scheduled_ingest_skipped_on_lean(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + monkeypatch.setenv("GT_ANALYTICS_PROFILE", "lean") + monkeypatch.delenv("GT_ANALYTICS_ACK_LOW_CPU", raising=False) + reset_gt_engine() + assert gt_scheduled_ingest_enabled() is False + maybe_refresh_gt_analytics() diff --git a/backend/tests/test_gt_micro_rolling.py b/backend/tests/test_gt_micro_rolling.py new file mode 100644 index 0000000..26e874f --- /dev/null +++ b/backend/tests/test_gt_micro_rolling.py @@ -0,0 +1,121 @@ +"""Micro rolling 3-day average for Strategic Risk Analytics.""" + +from __future__ import annotations + +from datetime import date, timedelta +from pathlib import Path + +import pytest + +from analytics.daily_store import DailyRegionReading, DailySnapshot, date_id, save_daily +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.micro_rolling import ( + capture_daily_readings, + compute_micro_view, + enrich_heatmap_features, + micro_rolling_report, +) +from analytics.settings import GTAnalyticsSettings + + +@pytest.fixture() +def daily_store(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + store = tmp_path / "daily" + monkeypatch.setenv("GT_DAILY_STORE_DIR", str(store)) + return store + + +def _seed_engine() -> GT_EarlyWarning: + engine = GT_EarlyWarning(GTAnalyticsSettings(enabled=True, base_prior=0.15)) + engine.process_feed_item( + { + "text": "Troop movement and military mobilization near border", + "region": "ukraine", + "source": "test", + "source_type": "manual", + } + ) + return engine + + +def _save_day(day: date, region: str, peak: float) -> None: + day_key = date_id(day) + snap = DailySnapshot(date=day_key, regions={}) + snap.regions[region] = DailyRegionReading( + region=region, + composite_risk=peak * 0.9, + financial=0.15, + unrest=0.15, + conflict=peak, + peak_score=peak, + readings=1, + last_captured_at=f"{day_key}T12:00:00+00:00", + ) + save_daily(snap) + + +def test_capture_daily_readings(daily_store: Path) -> None: + engine = _seed_engine() + result = capture_daily_readings(engine, when=date(2026, 6, 16)) + assert result["regions"] >= 1 + again = capture_daily_readings(engine, when=date(2026, 6, 16)) + assert again["regions"] >= 1 + + +def test_3day_rolling_average_and_ignition(daily_store: Path) -> None: + region = "ukraine" + today = date(2026, 6, 16) + _save_day(today - timedelta(days=2), region, 0.20) + _save_day(today - timedelta(days=1), region, 0.22) + _save_day(today, region, 0.45) + + view = compute_micro_view(region, as_of=today, window_days=3) + assert view is not None + assert view.days_in_window == 3 + assert view.risk_3d_avg == pytest.approx(0.29, abs=0.01) + assert view.spot_risk == 0.45 + assert view.risk_delta == pytest.approx(0.16, abs=0.01) + assert view.ignition is True + + +def test_enrich_heatmap_features(daily_store: Path) -> None: + engine = _seed_engine() + today = date(2026, 6, 16) + capture_daily_readings(engine, when=today) + heatmap = engine.get_risk_heatmap() + enriched = enrich_heatmap_features(heatmap, as_of=today, window_days=3) + feature = enriched["features"][0] + props = feature["properties"] + assert "risk_3d_avg" in props + assert "risk_spot" in props + assert "micro_ignition" in props + + +def test_micro_rolling_report(daily_store: Path) -> None: + region = "ukraine" + today = date(2026, 6, 16) + _save_day(today - timedelta(days=1), region, 0.21) + _save_day(today, region, 0.40) + + report = micro_rolling_report(as_of=today, window_days=3, limit=5) + assert report["mode"] == "micro_rolling" + assert report["window_days"] == 3 + assert report["regions_tracked"] >= 1 + + +def test_openclaw_micro_command(daily_store: Path, monkeypatch: pytest.MonkeyPatch) -> None: + from analytics.integration import reset_gt_engine + from services.openclaw_channel import _dispatch_command + + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + result = _dispatch_command("gt_micro_rolling", {"window_days": 3, "compact": True}) + assert result["ok"] is True + assert result["data"]["mode"] == "micro_rolling" + + +def test_route_query_micro_intent() -> None: + from services.openclaw_routing import route_query + + plan = route_query("Show GT rolling 3 day average and ignition regions") + assert plan["recommended"]["cmd"] == "gt_micro_rolling" \ No newline at end of file diff --git a/backend/tests/test_gt_rolling_backtest.py b/backend/tests/test_gt_rolling_backtest.py new file mode 100644 index 0000000..27f23d6 --- /dev/null +++ b/backend/tests/test_gt_rolling_backtest.py @@ -0,0 +1,170 @@ +"""Rolling weekly operational validation for Strategic Risk Analytics.""" + +from __future__ import annotations + +import json +from datetime import date +from pathlib import Path + +import pytest + +from analytics.backtest import DEFAULT_BACKTEST_ALERT_THRESHOLD +from analytics.gt_early_warning import GT_EarlyWarning +from analytics.integration import reset_gt_engine +from analytics.rolling_backtest import ( + freeze_weekly_snapshot, + iso_week_id, + label_regions, + rolling_report, + score_week, +) +from analytics.settings import GTAnalyticsSettings +from analytics.weekly_store import RegionSnapshot, WeeklySnapshot, load_week + + +@pytest.fixture() +def rolling_store(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + store = tmp_path / "gt_rolling" + monkeypatch.setenv("GT_ROLLING_STORE_DIR", str(store)) + return store + + +def _seed_engine() -> GT_EarlyWarning: + engine = GT_EarlyWarning(GTAnalyticsSettings(enabled=True, base_prior=0.15)) + engine.process_feed_item( + { + "text": "Troop movement and military mobilization near border", + "region": "ukraine", + "source": "test", + "source_type": "manual", + } + ) + engine.process_feed_item( + { + "text": "Routine diplomatic statement about trade", + "region": "canada", + "source": "test", + "source_type": "manual", + } + ) + return engine + + +def test_iso_week_id_format() -> None: + assert iso_week_id(date(2026, 6, 16)) == "2026-W25" + + +def test_freeze_and_score_week(rolling_store: Path) -> None: + engine = _seed_engine() + result = freeze_weekly_snapshot( + week_id="2026-W10", + engine=engine, + frozen_by="test", + ) + assert result["ok"] is True + assert result["created"] is True + assert result["region_count"] >= 2 + + snapshot = load_week("2026-W10") + assert snapshot is not None + ukraine = next(row for row in snapshot.regions if row.region == "ukraine") + assert ukraine.alerted is True + + pending_score = score_week(snapshot) + assert pending_score.labeled == 0 + assert pending_score.scorable is False + + label_regions( + "2026-W10", + [ + {"region": "ukraine", "label": "true_escalation"}, + {"region": "canada", "label": "benign"}, + ], + ) + labeled = load_week("2026-W10") + assert labeled is not None + scored = score_week(labeled) + assert scored.labeled == 2 + assert scored.true_positives == 1 + assert scored.true_negatives == 1 + assert scored.accuracy == 1.0 + assert scored.confidence_rate >= 0.0 + + +def test_freeze_is_idempotent(rolling_store: Path) -> None: + engine = _seed_engine() + first = freeze_weekly_snapshot(week_id="2026-W11", engine=engine) + second = freeze_weekly_snapshot(week_id="2026-W11", engine=engine) + assert first["created"] is True + assert second["created"] is False + + +def test_rolling_report_trend(rolling_store: Path) -> None: + engine = _seed_engine() + freeze_weekly_snapshot(week_id="2026-W20", engine=engine) + freeze_weekly_snapshot(week_id="2026-W21", engine=engine) + + label_regions("2026-W20", [{"region": "ukraine", "label": "true_escalation"}]) + label_regions( + "2026-W21", + [ + {"region": "ukraine", "label": "true_escalation"}, + {"region": "canada", "label": "benign"}, + ], + ) + + report = rolling_report(weeks=4) + assert report["mode"] == "rolling_operational" + assert report["alert_threshold"] == DEFAULT_BACKTEST_ALERT_THRESHOLD + assert len(report["trend"]) == 2 + assert report["latest"] is not None + + +def test_openclaw_rolling_commands( + rolling_store: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from analytics.integration import get_gt_engine + from services.openclaw_channel import _dispatch_command + + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + engine = get_gt_engine() + assert engine is not None + engine.process_feed_item( + { + "text": "Troop movement and military mobilization near border", + "region": "ukraine", + "source": "test", + "source_type": "manual", + } + ) + + freeze = _dispatch_command("gt_rolling_freeze", {"week_id": "2026-W30", "compact": True}) + assert freeze["ok"] is True + assert freeze["data"]["enabled"] is True + + label = _dispatch_command( + "gt_rolling_label", + { + "week_id": "2026-W30", + "region": "ukraine", + "label": "false_alarm", + }, + ) + assert label["ok"] is True + assert label["data"]["updated"] == 1 + + trend = _dispatch_command("gt_rolling_backtest", {"weeks": 4, "compact": True}) + assert trend["ok"] is True + assert trend["data"]["mode"] == "rolling_operational" + + +def test_route_query_rolling_intent() -> None: + from services.openclaw_routing import route_query + + plan = route_query("Show GT rolling operational backtest week over week") + assert plan["recommended"]["cmd"] == "gt_rolling_backtest" + + freeze_plan = route_query("Freeze weekly GT snapshot for operational validation") + assert freeze_plan["recommended"]["cmd"] == "gt_rolling_freeze" \ No newline at end of file diff --git a/backend/tests/test_openclaw_gt_analytics.py b/backend/tests/test_openclaw_gt_analytics.py new file mode 100644 index 0000000..2d8eb2a --- /dev/null +++ b/backend/tests/test_openclaw_gt_analytics.py @@ -0,0 +1,60 @@ +"""OpenClaw routing and commands for Strategic Risk Analytics.""" + +from __future__ import annotations + +import pytest + +from analytics.integration import reset_gt_engine +from services.openclaw_routing import route_query + + +def test_route_query_gt_analyze_intent() -> None: + plan = route_query("Run GT analysis on UK and Europe feeds") + assert plan["intent"] == "gt_analyze" + assert plan["recommended"]["cmd"] == "gt_analyze" + + +def test_route_query_gt_dossier_intent() -> None: + plan = route_query("GT rationale dossier for ukraine strategic risk") + assert plan["recommended"]["cmd"] in {"gt_dossier", "gt_analyze"} + + +def test_gt_analyze_command_disabled(monkeypatch: pytest.MonkeyPatch) -> None: + from services.openclaw_channel import _dispatch_command + + monkeypatch.delenv("GT_ANALYTICS_ENABLED", raising=False) + reset_gt_engine() + result = _dispatch_command("gt_analyze", {}) + assert result["ok"] is False + + +def test_route_query_gt_backtest_intent() -> None: + plan = route_query("Run GT historical backtest with Wilson confidence") + assert plan["intent"] == "gt_backtest" + assert plan["recommended"]["cmd"] == "gt_backtest" + assert plan["recommended"]["args"]["expanded"] is True + + +def test_gt_backtest_command_enabled(monkeypatch: pytest.MonkeyPatch) -> None: + from services.openclaw_channel import _dispatch_command + + monkeypatch.setenv("GT_ANALYTICS_ENABLED", "true") + reset_gt_engine() + result = _dispatch_command("gt_backtest", {"expanded": True, "compact": True}) + assert result["ok"] is True + data = result["data"] + assert data["enabled"] is True + assert data["accuracy"] == 1.0 + assert data["confidence_rate"] >= 0.95 + assert data["meets_target"] is True + assert "cases" not in data + + +def test_gt_backtest_command_disabled(monkeypatch: pytest.MonkeyPatch) -> None: + from services.openclaw_channel import _dispatch_command + + monkeypatch.delenv("GT_ANALYTICS_ENABLED", raising=False) + reset_gt_engine() + result = _dispatch_command("gt_backtest", {}) + assert result["ok"] is True + assert result["data"]["enabled"] is False \ No newline at end of file diff --git a/backend/tests/test_runtime_profile.py b/backend/tests/test_runtime_profile.py new file mode 100644 index 0000000..2ae6079 --- /dev/null +++ b/backend/tests/test_runtime_profile.py @@ -0,0 +1,28 @@ +"""Runtime profile detection for lean fleet nodes.""" + +from services import runtime_profile + + +def test_resolve_profile_name_env_override(monkeypatch): + monkeypatch.setenv("GT_ANALYTICS_PROFILE", "standard") + monkeypatch.setattr(runtime_profile, "detect_cpu_limit", lambda: 1.0) + assert runtime_profile.resolve_profile_name() == "standard" + + +def test_resolve_profile_name_auto_lean_on_one_cpu(monkeypatch): + monkeypatch.delenv("GT_ANALYTICS_PROFILE", raising=False) + monkeypatch.setattr(runtime_profile, "detect_cpu_limit", lambda: 1.0) + assert runtime_profile.resolve_profile_name() == "lean" + + +def test_runtime_profile_payload(monkeypatch): + monkeypatch.delenv("GT_ANALYTICS_PROFILE", raising=False) + monkeypatch.setattr(runtime_profile, "detect_cpu_limit", lambda: 1.0) + monkeypatch.setattr(runtime_profile, "detect_memory_limit_mb", lambda: 4096) + runtime_profile.clear_runtime_profile_cache() + payload = runtime_profile.get_runtime_profile() + assert payload["profile"] == "lean" + assert payload["cpu_limit"] == 1.0 + assert payload["gt_analytics"]["recommended"] is False + assert payload["gt_analytics"]["lean_node"] is True + assert "1 vCPU" in (payload["gt_analytics"]["warning"] or "") diff --git a/backend/tests/test_telegram_translate.py b/backend/tests/test_telegram_translate.py new file mode 100644 index 0000000..6960a6e --- /dev/null +++ b/backend/tests/test_telegram_translate.py @@ -0,0 +1,56 @@ +"""Telegram OSINT auto-translation.""" + +from services import telegram_translate + + +def test_guess_source_lang_detects_cyrillic(): + assert telegram_translate.guess_source_lang("В Крым поедем несмотря ни на что") == "ru" + + +def test_apply_post_translation_skips_english(monkeypatch): + monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True) + post = { + "title": "Missile strike reported near Kyiv overnight.", + "description": "Missile strike reported near Kyiv overnight.", + } + enriched = telegram_translate.apply_post_translation(post, "en") + assert enriched["source_lang"] == "en" + assert "title_translated" not in enriched + + +def test_apply_post_translation_adds_fields(monkeypatch): + monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True) + monkeypatch.setattr( + telegram_translate, + "translate_text", + lambda text, target_lang=None: ( + "We will go to Crimea no matter what. This is our homeland!", + "ru", + ), + ) + post = { + "title": "«В Крым поедем несмотря ни на что. Это наша родина!»", + "description": "«В Крым поедем несмотря ни на что. Это наша родина!»", + } + enriched = telegram_translate.apply_post_translation(post, "en") + assert enriched["source_lang"] == "ru" + assert enriched["translate_to"] == "en" + assert "Crimea" in enriched["title_translated"] + + +def test_normalize_translate_target_maps_ui_locales(): + assert telegram_translate.normalize_translate_target("zh-CN") == "zh-CN" + assert telegram_translate.normalize_translate_target("fr") == "fr" + + +def test_source_lang_label_avoids_uk_country_confusion(): + assert telegram_translate.source_lang_label("uk") == "Ukrainian" + assert telegram_translate.source_lang_label("ru") == "Russian" + + +def test_polish_translation_expands_bpla_shorthand(): + assert "UAV" in telegram_translate.polish_translation("Kyiv 1x BpLa on Rembazu.") + + +def test_guess_source_lang_prefers_ukrainian_markers(): + assert telegram_translate.guess_source_lang("Київ 1х БпЛА") == "uk" \ No newline at end of file diff --git a/backend/tests/test_telegram_watchdog.py b/backend/tests/test_telegram_watchdog.py new file mode 100644 index 0000000..5364d0e --- /dev/null +++ b/backend/tests/test_telegram_watchdog.py @@ -0,0 +1,100 @@ +"""Telegram OSINT watchdog and search helpers.""" + +from services import openclaw_watchdog +from services.telegram_osint_text import keyword_matches_telegram_post, telegram_post_search_text + + +def _telegram_slow_fixture() -> dict: + return { + "telegram_osint": { + "posts": [ + { + "id": "tg-uk-1", + "title": "Київ 1х БпЛА на Рембазу.", + "description": "Київ 1х БпЛА на Рембазу.", + "title_translated": "Kyiv 1x UAV on Rembazu.", + "description_translated": "Kyiv 1x UAV on Rembazu.", + "channel": "war_monitor", + "source": "t.me/war_monitor", + "link": "https://t.me/war_monitor/101", + "risk_score": 3, + "source_lang": "uk", + }, + { + "id": "tg-ru-1", + "title": "«В Крым поедем несмотря ни на что. Это наша родина!»", + "description": "«В Крым поедем несмотря ни на что. Это наша родина!»", + "title_translated": "We will go to Crimea no matter what. This is our homeland!", + "description_translated": "We will go to Crimea no matter what. This is our homeland!", + "channel": "nexta_live", + "source": "t.me/nexta_live", + "link": "https://t.me/nexta_live/202", + "risk_score": 9, + "source_lang": "ru", + }, + ], + "total": 2, + } + } + + +def test_telegram_post_search_text_includes_translated_fields(): + post = _telegram_slow_fixture()["telegram_osint"]["posts"][0] + haystack = telegram_post_search_text(post) + assert "kyiv 1x uav on rembazu" in haystack + assert "бпла" in haystack + + +def test_keyword_matches_telegram_post_searches_translated_and_original(): + post = _telegram_slow_fixture()["telegram_osint"]["posts"][1] + assert keyword_matches_telegram_post(post, "crimea") + assert keyword_matches_telegram_post(post, "крым") + + +def test_watchdog_keyword_matches_telegram_translation(monkeypatch): + monkeypatch.setattr(openclaw_watchdog, "_ensure_running", lambda: None) + openclaw_watchdog.clear_watches() + try: + watch = openclaw_watchdog.add_watch("keyword", {"keyword": "crimea"}) + alert = openclaw_watchdog._check_keyword(watch["id"], {"keyword": "crimea"}, {}, _telegram_slow_fixture()) + assert alert is not None + assert any(match["source"] == "telegram_osint" for match in alert["data"]["matches"]) + assert alert["data"]["matches"][0]["title"].startswith("We will go to Crimea") + # Same Telegram post should not re-alert once seen. + assert openclaw_watchdog._check_keyword(watch["id"], {"keyword": "crimea"}, {}, _telegram_slow_fixture()) is None + finally: + openclaw_watchdog.clear_watches() + + +def test_watchdog_telegram_rhetoric_alerts_on_high_risk_posts(monkeypatch): + monkeypatch.setattr(openclaw_watchdog, "_ensure_running", lambda: None) + openclaw_watchdog.clear_watches() + try: + watch = openclaw_watchdog.add_watch("telegram_rhetoric", {"min_risk_score": 8}) + alert = openclaw_watchdog._check_telegram_rhetoric(watch["id"], {"min_risk_score": 8}, _telegram_slow_fixture()) + assert alert is not None + assert "Telegram rhetoric alert" in alert["alert"] + assert len(alert["data"]["matches"]) == 1 + assert alert["data"]["matches"][0]["channel"] == "nexta_live" + assert alert["data"]["matches"][0]["risk_score"] == 9 + assert openclaw_watchdog._check_telegram_rhetoric(watch["id"], {"min_risk_score": 8}, _telegram_slow_fixture()) is None + finally: + openclaw_watchdog.clear_watches() + + +def test_watchdog_telegram_rhetoric_supports_channel_filter(monkeypatch): + monkeypatch.setattr(openclaw_watchdog, "_ensure_running", lambda: None) + openclaw_watchdog.clear_watches() + try: + watch = openclaw_watchdog.add_watch( + "telegram_rhetoric", + {"min_risk_score": 7, "channels": ["war_monitor"]}, + ) + alert = openclaw_watchdog._check_telegram_rhetoric( + watch["id"], + {"min_risk_score": 7, "channels": ["war_monitor"]}, + _telegram_slow_fixture(), + ) + assert alert is None # war_monitor post is only risk 3 + finally: + openclaw_watchdog.clear_watches() \ No newline at end of file diff --git a/docker-compose.participant.yml b/docker-compose.participant.yml index 9e2dfde..44fbb4d 100644 --- a/docker-compose.participant.yml +++ b/docker-compose.participant.yml @@ -15,6 +15,8 @@ services: MESH_DM_PENDING_PER_SENDER_LIMIT: "8" MESH_DM_PERSIST_SPOOL: "true" WORMHOLE_STARTUP_DEADLINE_S: "90" + GT_ANALYTICS_ENABLED: "false" + GT_ANALYTICS_PROFILE: "lean" deploy: resources: limits: diff --git a/docker-compose.yml b/docker-compose.yml index 6cd7b99..3225087 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -93,6 +93,13 @@ services: - TELEGRAM_OSINT_ENABLED=${TELEGRAM_OSINT_ENABLED:-true} - TELEGRAM_OSINT_CHANNELS=${TELEGRAM_OSINT_CHANNELS:-} - TELEGRAM_OSINT_INTERVAL_MINUTES=${TELEGRAM_OSINT_INTERVAL_MINUTES:-60} + - TELEGRAM_OSINT_TRANSLATE=${TELEGRAM_OSINT_TRANSLATE:-true} + - TELEGRAM_OSINT_TRANSLATE_TO=${TELEGRAM_OSINT_TRANSLATE_TO:-en} + - GT_ANALYTICS_ENABLED=${GT_ANALYTICS_ENABLED:-false} + - GT_ANALYTICS_HIGH_RISK_THRESHOLD=${GT_ANALYTICS_HIGH_RISK_THRESHOLD:-0.6} + - GT_ANALYTICS_BASE_PRIOR=${GT_ANALYTICS_BASE_PRIOR:-0.15} + - GT_ANALYTICS_WATCHED_CHANNELS=${GT_ANALYTICS_WATCHED_CHANNELS:-} + - OPENCLAW_HMAC_SECRET=${OPENCLAW_HMAC_SECRET:-} volumes: - backend_data:/app/data restart: unless-stopped diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx index 771f790..4abbcb5 100644 --- a/frontend/src/app/page.tsx +++ b/frontend/src/app/page.tsx @@ -37,6 +37,7 @@ import { useDataPolling, LAYER_TOGGLE_EVENT } from '@/hooks/useDataPolling'; import { useBackendStatus, useDataKey, useDataKeys } from '@/hooks/useDataStore'; import { useReverseGeocode } from '@/hooks/useReverseGeocode'; import { useRegionDossier } from '@/hooks/useRegionDossier'; +import { useGtDossier } from '@/hooks/useGtDossier'; import { useAgentActions } from '@/hooks/useAgentActions'; import { useFeedHealth } from '@/hooks/useFeedHealth'; import { useKeyboardShortcuts } from '@/hooks/useKeyboardShortcuts'; @@ -237,6 +238,7 @@ export default function Dashboard() { wastewater: true, // CrowdThreat is operator opt-in only. crowdthreat: false, + gt_risk: false, // Shodan shodan_overlay: false, // AI Intel @@ -244,6 +246,16 @@ export default function Dashboard() { // SAR (Synthetic Aperture Radar) sar: true, }); + const regionLat = + selectedEntity?.type === 'region_dossier' ? selectedEntity.extra?.lat : undefined; + const regionLng = + selectedEntity?.type === 'region_dossier' ? selectedEntity.extra?.lng : undefined; + const { gtDossier, gtDossierLoading } = useGtDossier( + typeof regionLat === 'number' ? regionLat : undefined, + typeof regionLng === 'number' ? regionLng : undefined, + regionDossier?.country?.name, + activeLayers.gt_risk, + ); const [shodanResults, setShodanResults] = useState([]); const [, setShodanQueryLabel] = useState(''); const [shodanStyle, setShodanStyle] = useState({ shape: 'circle', color: '#16a34a', size: 'md' }); @@ -776,6 +788,8 @@ export default function Dashboard() { selectedEntity={selectedEntity} regionDossier={regionDossier} regionDossierLoading={regionDossierLoading} + gtDossier={gtDossier} + gtDossierLoading={gtDossierLoading} onExpandEntityGraph={() => { if (isEntityGraphEligible(selectedEntity)) setShowEntityGraph(true); }} diff --git a/frontend/src/components/GtAnalyticsHud.tsx b/frontend/src/components/GtAnalyticsHud.tsx new file mode 100644 index 0000000..2ffd3ae --- /dev/null +++ b/frontend/src/components/GtAnalyticsHud.tsx @@ -0,0 +1,77 @@ +'use client'; + +import React from 'react'; +import { GripVertical, Minus, Plus } from 'lucide-react'; +import { useTranslation } from '@/i18n'; +import { useFloatingPanel } from '@/hooks/useFloatingPanel'; +import GtBacktestPanel from '@/components/GtBacktestPanel'; +import GtTopAlertsStrip from '@/components/GtTopAlertsStrip'; +import type { SelectedEntity } from '@/types/dashboard'; + +interface Props { + layerEnabled?: boolean; + onFlyTo?: (lat: number, lng: number) => void; + onSelectEntity?: (entity: SelectedEntity | null) => void; +} + +export default function GtAnalyticsHud({ + layerEnabled = false, + onFlyTo, + onSelectEntity, +}: Props) { + const { t } = useTranslation(); + const { position, isMinimized, setIsMinimized, isDragging, onDragStart } = useFloatingPanel( + 'sb-gt-analytics-hud-v1', + { defaultPosition: { x: 24, y: 380 } }, + ); + + if (!layerEnabled) return null; + + return ( +
+
+ + + {t('gtHud.title')} + + {!isMinimized && ( + + {t('gtHud.dragHint')} + + )} + +
+ + {!isMinimized && ( +
+ + +
+ )} +
+ ); +} \ No newline at end of file diff --git a/frontend/src/components/GtBacktestPanel.tsx b/frontend/src/components/GtBacktestPanel.tsx new file mode 100644 index 0000000..2e57ff3 --- /dev/null +++ b/frontend/src/components/GtBacktestPanel.tsx @@ -0,0 +1,453 @@ +'use client'; + +import React, { useCallback, useEffect, useState } from 'react'; +import { CheckCircle2, Minus, Plus, Radar, RefreshCw, XCircle } from 'lucide-react'; +import { API_BASE } from '@/lib/api'; +import { useTranslation } from '@/i18n'; +import type { GtBacktestReport, GtMicroRollingReport, GtRollingReport } from '@/types/dashboard'; + +interface Props { + layerEnabled?: boolean; + embedded?: boolean; +} + +type TabId = 'benchmark' | 'operational'; + +function pct(value: number | undefined): string { + if (value == null || Number.isNaN(value)) return '—'; + return `${(value * 100).toFixed(1)}%`; +} + +export default function GtBacktestPanel({ layerEnabled = false, embedded = false }: Props) { + const { t } = useTranslation(); + const [isMinimized, setIsMinimized] = useState(false); + const [activeTab, setActiveTab] = useState('operational'); + const [benchmark, setBenchmark] = useState(null); + const [rolling, setRolling] = useState(null); + const [micro, setMicro] = useState(null); + const [loadingBenchmark, setLoadingBenchmark] = useState(false); + const [loadingRolling, setLoadingRolling] = useState(false); + const [loadingMicro, setLoadingMicro] = useState(false); + const [showFailures, setShowFailures] = useState(false); + + const refreshBenchmark = useCallback(async () => { + if (!layerEnabled) { + setBenchmark(null); + return; + } + setLoadingBenchmark(true); + try { + const res = await fetch(`${API_BASE}/api/analytics/backtest?expanded=true&tune=false`); + if (res.ok) setBenchmark(await res.json()); + } catch { + /* non-fatal */ + } finally { + setLoadingBenchmark(false); + } + }, [layerEnabled]); + + const refreshRolling = useCallback(async () => { + if (!layerEnabled) { + setRolling(null); + return; + } + setLoadingRolling(true); + try { + const res = await fetch(`${API_BASE}/api/analytics/rolling?weeks=8`); + if (res.ok) setRolling(await res.json()); + } catch { + /* non-fatal */ + } finally { + setLoadingRolling(false); + } + }, [layerEnabled]); + + const refreshMicro = useCallback(async () => { + if (!layerEnabled) { + setMicro(null); + return; + } + setLoadingMicro(true); + try { + const res = await fetch(`${API_BASE}/api/analytics/rolling/micro?window_days=3&limit=6`); + if (res.ok) setMicro(await res.json()); + } catch { + /* non-fatal */ + } finally { + setLoadingMicro(false); + } + }, [layerEnabled]); + + const refresh = useCallback(async () => { + await Promise.all([refreshBenchmark(), refreshRolling(), refreshMicro()]); + }, [refreshBenchmark, refreshRolling, refreshMicro]); + + useEffect(() => { + refresh(); + if (!layerEnabled) return undefined; + const id = setInterval(refresh, 15 * 60_000); + return () => clearInterval(id); + }, [refresh, layerEnabled]); + + const failures = (benchmark?.cases || []).filter((row) => !row.correct); + const operationalScorable = Boolean( + rolling && ((rolling.weeks_scorable ?? 0) > 0 || rolling.latest?.scorable), + ); + const benchmarkPass = benchmark?.meets_target; + const rollingPass = rolling?.meets_target; + const passBadge = + activeTab === 'benchmark' + ? benchmarkPass + : operationalScorable + ? rollingPass + : undefined; + const showCollectingBadge = + activeTab === 'operational' && layerEnabled && rolling?.enabled && !operationalScorable; + const loading = + activeTab === 'benchmark' + ? loadingBenchmark + : loadingRolling || loadingMicro; + const latest = rolling?.latest; + const microRegions = micro?.ignitions?.length + ? micro.ignitions + : (micro?.top_regions || []).slice(0, 4); + + const shellClass = embedded + ? 'pointer-events-auto flex-shrink-0 border-b border-amber-800/30 bg-black/70' + : 'pointer-events-auto flex-shrink-0 border border-amber-700/40 bg-black/75 backdrop-blur-sm shadow-[0_0_18px_rgba(245,158,11,0.10)]'; + + return ( +
+
setIsMinimized((prev) => !prev)} + > +
+ + + {t('gtBacktest.title').toUpperCase()} + + {showCollectingBadge && ( + + {t('gtBacktest.collecting')} + + )} + {layerEnabled && passBadge != null && ( + + {passBadge ? t('gtBacktest.pass') : t('gtBacktest.fail')} + + )} +
+
+ + {isMinimized ? ( + + ) : ( + + )} +
+
+ + {!isMinimized && ( +
+ {!layerEnabled ? ( +
+ {t('gtBacktest.layerOff')} +
+ ) : ( + <> +
+ {(['operational', 'benchmark'] as TabId[]).map((tab) => ( + + ))} +
+ + {activeTab === 'benchmark' ? ( + !benchmark?.enabled ? ( +
+ {t('gtBacktest.disabled')} +
+ ) : loadingBenchmark && !benchmark.accuracy ? ( +
+ {t('gtBacktest.loading')} +
+ ) : ( + <> +
+ {t('gtBacktest.benchmarkNote')} +
+
+
+
+ {t('gtBacktest.accuracy')} +
+
+ {pct(benchmark.accuracy)} +
+
+
+
+ {t('gtBacktest.confidence')} +
+
+ {pct(benchmark.confidence_rate)} +
+
+
+ +
+ {t('gtBacktest.cases').replace('{count}', String(benchmark.total_cases))} ·{' '} + {t('gtBacktest.threshold').replace('{value}', benchmark.alert_threshold.toFixed(2))} ·{' '} + {t('gtBacktest.target').replace('{value}', pct(benchmark.target_confidence))} +
+ +
+ TP {benchmark.true_positives} + TN {benchmark.true_negatives} + FP {benchmark.false_positives} + FN {benchmark.false_negatives} +
+ +
+ {benchmark.meets_target ? ( + + ) : ( + + )} + + {benchmark.meets_target + ? t('gtBacktest.meetsTarget') + : t('gtBacktest.belowTarget')} + +
+ + {failures.length > 0 && ( +
+ + {showFailures && ( +
+ {failures.map((row) => ( +
+ {row.name} ({row.kind}) +
+ ))} +
+ )} +
+ )} + + ) + ) : !rolling?.enabled && !micro?.enabled ? ( +
+ {t('gtBacktest.disabled')} +
+ ) : (loadingRolling || loadingMicro) && !rolling?.latest && !micro?.regions_tracked ? ( +
+ {t('gtBacktest.operationalLoading')} +
+ ) : ( + <> +
+
+ {t('gtBacktest.microTitle').toUpperCase()} +
+ {micro?.enabled ? ( + <> +
+ {t('gtBacktest.microWindow') + .replace('{days}', String(micro.window_days)) + .replace('{delta}', micro.ignition_delta.toFixed(2))} +
+
+ + {t('gtBacktest.microIgnitions').replace( + '{count}', + String(micro.ignition_count) + )} + + + {t('gtBacktest.microAlerted3d').replace( + '{count}', + String(micro.alerted_3d_count) + )} + +
+ {microRegions.length > 0 ? ( +
+ {microRegions.map((row) => ( +
+ {row.ignition && ( + + {t('gtBacktest.microIgnitionBadge')} + + )} + + {t('gtBacktest.microRegionLine') + .replace('{region}', row.region) + .replace('{spot}', pct(row.spot_risk)) + .replace('{avg}', pct(row.risk_3d_avg)) + .replace('{delta}', pct(row.risk_delta))} + +
+ ))} +
+ ) : ( +
+ {t('gtBacktest.microEmpty')} +
+ )} + + ) : ( +
+ {t('gtBacktest.microEmpty')} +
+ )} +
+ +
+ {t('gtBacktest.tabOperational').toUpperCase()} — {t('gtBacktest.operationalTrend')} +
+ + {!rolling || rolling.weeks_stored === 0 ? ( +
+ {t('gtBacktest.operationalEmpty')} +
+ ) : ( + <> +
+
+
+ {t('gtBacktest.accuracy')} +
+
+ {latest?.scorable ? pct(latest.accuracy) : '—'} +
+
+
+
+ {t('gtBacktest.confidence')} +
+
+ {latest?.scorable ? pct(latest.confidence_rate) : '—'} +
+
+
+ +
+ {t('gtBacktest.operationalWeeks') + .replace('{stored}', String(rolling.weeks_stored)) + .replace('{scorable}', String(rolling.weeks_scorable))} + {latest + ? ` · ${t('gtBacktest.operationalLabeled') + .replace('{labeled}', String(latest.labeled)) + .replace('{pending}', String(latest.pending))}` + : ''} +
+ + {latest && !latest.scorable && ( +
+ {t('gtBacktest.operationalMinLabels').replace( + '{count}', + String(rolling.min_labeled_per_week) + )} +
+ )} + + {latest?.scorable && ( +
+ TP {latest.true_positives} + TN {latest.true_negatives} + FP {latest.false_positives} + FN {latest.false_negatives} +
+ )} + + {(rolling.accuracy_series?.length ?? 0) > 0 && ( +
+
+ {t('gtBacktest.operationalTrend')} +
+
+ {rolling.accuracy_series.map((point) => ( + + {point.week_id.replace('-W', 'w')}: {pct(point.accuracy)} + + ))} +
+
+ )} + + {latest?.scorable && ( +
+ {rolling.meets_target ? ( + + ) : ( + + )} + + {rolling.improving_vs_prior + ? t('gtBacktest.operationalImproving') + : t('gtBacktest.operationalFlat')} + {' · '} + {rolling.meets_target + ? t('gtBacktest.meetsTarget') + : t('gtBacktest.belowTarget')} + +
+ )} + + )} + + )} + + )} +
+ )} +
+ ); +} \ No newline at end of file diff --git a/frontend/src/components/GtTopAlertsStrip.tsx b/frontend/src/components/GtTopAlertsStrip.tsx new file mode 100644 index 0000000..79eab09 --- /dev/null +++ b/frontend/src/components/GtTopAlertsStrip.tsx @@ -0,0 +1,121 @@ +'use client'; + +import React, { useMemo } from 'react'; +import { ChevronRight, Radar } from 'lucide-react'; +import { useTranslation } from '@/i18n'; +import { useDataKey } from '@/hooks/useDataStore'; +import { extractGtAlerts } from '@/lib/gtAlerts'; +import type { SelectedEntity } from '@/types/dashboard'; + +interface Props { + layerEnabled?: boolean; + onFlyTo?: (lat: number, lng: number) => void; + onSelectEntity?: (entity: SelectedEntity | null) => void; + embedded?: boolean; +} + +function pct(value: number): string { + return `${(value * 100).toFixed(0)}%`; +} + +export default function GtTopAlertsStrip({ + layerEnabled = false, + onFlyTo, + onSelectEntity, + embedded = false, +}: Props) { + const { t } = useTranslation(); + const gtRisk = useDataKey('gt_risk'); + + const { alerts, trackedRegions, plottedRegions, maxRegions } = useMemo( + () => extractGtAlerts(gtRisk, 8), + [gtRisk], + ); + + if (!layerEnabled || !gtRisk?.enabled) return null; + + const handleSelect = (alert: (typeof alerts)[number]) => { + onFlyTo?.(alert.lat, alert.lng); + onSelectEntity?.({ + id: alert.region, + type: 'gt_risk', + name: alert.regionLabel, + extra: { + region: alert.region, + risk: alert.risk, + financial: alert.financial, + unrest: alert.unrest, + conflict: alert.conflict, + contagion: alert.contagion, + lat: alert.lat, + lng: alert.lng, + risk_spot: alert.risk, + risk_3d_avg: alert.risk3d, + risk_delta: alert.riskDelta, + micro_ignition: alert.ignition, + }, + }); + }; + + const shellClass = embedded + ? 'pointer-events-auto border-t border-amber-800/30 bg-black/70' + : 'pointer-events-auto max-w-[min(92vw,52rem)] border border-amber-700/45 bg-black/80 backdrop-blur-sm shadow-[0_0_16px_rgba(245,158,11,0.12)]'; + + return ( +
+
+ + + {t('gtAlerts.title')} + + + {t('gtAlerts.counts') + .replace('{plotted}', String(plottedRegions)) + .replace('{tracked}', String(trackedRegions)) + .replace('{max}', String(maxRegions))} + +
+ + {alerts.length === 0 ? ( +
+ {t('gtAlerts.empty')} +
+ ) : ( +
+ {alerts.map((alert) => ( + + ))} +
+ )} + +
+ {t('gtAlerts.hint')} +
+
+ ); +} \ No newline at end of file diff --git a/frontend/src/components/MaplibreViewer.tsx b/frontend/src/components/MaplibreViewer.tsx index b36dc1f..c4da9a9 100644 --- a/frontend/src/components/MaplibreViewer.tsx +++ b/frontend/src/components/MaplibreViewer.tsx @@ -185,6 +185,7 @@ import { CorrelationPopup } from '@/components/MaplibreViewer/popups/Correlation import { WastewaterPopup } from '@/components/MaplibreViewer/popups/WastewaterPopup'; import { MilitaryBasePopup } from '@/components/MaplibreViewer/popups/MilitaryBasePopup'; import { RegionDossierPanel } from '@/components/MaplibreViewer/popups/RegionDossierPanel'; +import { GtRiskPopup } from '@/components/MaplibreViewer/popups/GtRiskPopup'; import { TelegramOsintPopup } from '@/components/MaplibreViewer/popups/TelegramOsintPopup'; import { buildSentinelTileUrl, @@ -196,6 +197,7 @@ import { buildEarthquakesGeoJSON, buildJammingGeoJSON, buildCorrelationsGeoJSON, + buildGtRiskGeoJSON, buildTinygsGeoJSON, buildShodanGeoJSON, buildAIIntelGeoJSON, @@ -306,6 +308,7 @@ const MAP_EXTRA_DATA_KEYS = [ 'crowdthreat', 'malware_threats', 'telegram_osint', + 'gt_risk', 'datacenters', 'firms_fires', 'fishing_activity', @@ -778,6 +781,11 @@ const MaplibreViewer = ({ [activeLayers.correlations, activeLayers.contradictions, data?.correlations], ); + const gtRiskGeoJSON = useMemo( + () => (activeLayers.gt_risk ? buildGtRiskGeoJSON(data?.gt_risk) : null), + [activeLayers.gt_risk, data?.gt_risk], + ); + const tinygsGeoJSON = useMemo( () => { void interpTick; @@ -1724,6 +1732,7 @@ const MaplibreViewer = ({ correlationsGeoJSON && 'corr-infra-fill', correlationsGeoJSON && 'corr-contra-fill', correlationsGeoJSON && 'corr-analysis-fill', + gtRiskGeoJSON && 'gt-risk-heatmap', ].filter(Boolean) as string[]; useEffect(() => { @@ -1820,7 +1829,7 @@ const MaplibreViewer = ({ return (
+ {/* Strategic Risk Heatmap — Bayesian posterior scores */} + + + + {/* Correlation Alerts — Emergent Intelligence grid squares */} {/* RF Anomaly — grey */} @@ -5712,6 +5770,28 @@ const MaplibreViewer = ({ return ; })()} + {(() => { + if (selectedEntity?.type !== 'gt_risk' || !selectedEntity.extra) return null; + const props = selectedEntity.extra as Record; + const lat = Number(props.lat); + const lng = Number(props.lng); + if (!Number.isFinite(lat) || !Number.isFinite(lng)) return null; + return ( + onEntityClick?.(null)} + /> + ); + })()} + {(() => { if (selectedEntity?.type !== 'telegram_osint' || !data?.telegram_osint?.posts) return null; const allPosts = data.telegram_osint.posts; diff --git a/frontend/src/components/MaplibreViewer/popups/GtRiskPopup.tsx b/frontend/src/components/MaplibreViewer/popups/GtRiskPopup.tsx new file mode 100644 index 0000000..eb26867 --- /dev/null +++ b/frontend/src/components/MaplibreViewer/popups/GtRiskPopup.tsx @@ -0,0 +1,188 @@ +'use client'; + +import React, { useEffect, useState } from 'react'; +import { Popup } from 'react-map-gl/maplibre'; +import { Radar } from 'lucide-react'; +import { useTranslation } from '@/i18n'; +import { API_BASE } from '@/lib/api'; +import { formatGtRegionLabel } from '@/lib/gtAlerts'; +import type { GtDossier } from '@/types/dashboard'; + +export interface GtRiskPopupProps { + region: string; + risk: number; + financial?: number; + unrest?: number; + conflict?: number; + contagion?: number; + interpretation?: string; + lat: number; + lng: number; + onClose: () => void; +} + +function riskColor(score: number): string { + if (score >= 0.6) return '#ef4444'; + if (score >= 0.4) return '#f97316'; + if (score >= 0.25) return '#eab308'; + return '#22c55e'; +} + +function formatSignalName(name: string): string { + return name.replace(/_/g, ' '); +} + +async function fetchDossier(region: string, lat: number, lng: number): Promise { + const candidates = [ + region.trim().toLowerCase(), + `${lat.toFixed(2)},${lng.toFixed(2)}`, + ].filter((value, index, list) => value && list.indexOf(value) === index); + + let best: GtDossier | null = null; + for (const key of candidates) { + try { + const response = await fetch(`${API_BASE}/api/analytics/dossier/${encodeURIComponent(key)}`); + if (!response.ok) continue; + const payload = (await response.json()) as GtDossier; + if (!payload.enabled) continue; + if (!best || (payload.current_risk ?? 0) >= (best.current_risk ?? 0)) { + best = payload; + } + } catch { + /* optional analytics */ + } + } + return best; +} + +export function GtRiskPopup({ + region, + risk, + financial, + unrest, + conflict, + contagion, + interpretation, + lat, + lng, + onClose, +}: GtRiskPopupProps) { + const { t } = useTranslation(); + const color = riskColor(risk); + const [dossier, setDossier] = useState(null); + const [loadingSignals, setLoadingSignals] = useState(true); + + useEffect(() => { + let cancelled = false; + setLoadingSignals(true); + void fetchDossier(region, lat, lng).then((result) => { + if (!cancelled) { + setDossier(result); + setLoadingSignals(false); + } + }); + return () => { + cancelled = true; + }; + }, [region, lat, lng]); + + const resolvedInterpretation = interpretation || dossier?.interpretation || ''; + const signals = dossier?.recent_signals || []; + + return ( + +
+
+ + + {t('gtRisk.popupTitle')} + + +
+
+
+ {t('gtRisk.region')} + {formatGtRegionLabel(region)} +
+
+ {t('gtRisk.composite')} + + {(risk * 100).toFixed(1)}% + +
+
+
+
{t('gtRisk.financial')}
+
{((financial ?? 0) * 100).toFixed(0)}%
+
+
+
{t('gtRisk.unrest')}
+
{((unrest ?? 0) * 100).toFixed(0)}%
+
+
+
{t('gtRisk.conflict')}
+
{((conflict ?? 0) * 100).toFixed(0)}%
+
+
+ {contagion != null && contagion > 0 && ( +
+ {t('gtRisk.contagion')} + {(contagion * 100).toFixed(1)}% +
+ )} + {resolvedInterpretation && ( +

+ >_ + {resolvedInterpretation} +

+ )} + +
+
+ {t('gtRisk.costlySignals')} +
+ {loadingSignals ? ( +
{t('gtRisk.loadingSignals')}
+ ) : signals.length > 0 ? ( +
+ {signals.slice(-4).reverse().map((entry, idx) => ( +
+
+ {Object.keys(entry.signals || {}) + .map(formatSignalName) + .join(', ') || entry.domain} +
+
+ {entry.source || t('gtRisk.unknownSource')} +
+
+ ))} +
+ ) : ( +
+ {t('gtRisk.noSignals')} +
+ )} +
+
+
+
+ ); +} \ No newline at end of file diff --git a/frontend/src/components/MaplibreViewer/popups/TelegramOsintPopup.tsx b/frontend/src/components/MaplibreViewer/popups/TelegramOsintPopup.tsx index a52dfd5..a36f24a 100644 --- a/frontend/src/components/MaplibreViewer/popups/TelegramOsintPopup.tsx +++ b/frontend/src/components/MaplibreViewer/popups/TelegramOsintPopup.tsx @@ -1,6 +1,6 @@ 'use client'; -import React, { useMemo } from 'react'; +import React, { useEffect, useMemo, useState } from 'react'; import { Popup } from 'react-map-gl/maplibre'; import { Radio } from 'lucide-react'; import { useTranslation } from '@/i18n'; @@ -69,11 +69,58 @@ function riskTheme(rs: number) { }; } -function postHeadline(post: TelegramOsintPost): string { - return String(post.title || post.description || 'Telegram intercept').trim(); +const CYRILLIC_RE = /[\u0400-\u04FF]/; + +function containsCyrillic(text: string): boolean { + return CYRILLIC_RE.test(text); } -function postDetail(post: TelegramOsintPost): string | null { +function sourceLangLabel(post: TelegramOsintPost): string { + if (post.source_lang_label) return post.source_lang_label; + const code = String(post.source_lang || '').trim().toLowerCase(); + const labels: Record = { + uk: 'Ukrainian', + ru: 'Russian', + en: 'English', + ar: 'Arabic', + he: 'Hebrew', + 'zh-cn': 'Chinese', + fr: 'French', + de: 'German', + pl: 'Polish', + }; + return labels[code] || code.toUpperCase(); +} + +function hasTranslation(post: TelegramOsintPost): boolean { + const translated = String(post.title_translated || post.description_translated || '').trim(); + const original = String(post.title || post.description || '').trim(); + return Boolean(translated && translated !== original); +} + +function postHeadline(post: TelegramOsintPost, showOriginal: boolean): string { + const original = String(post.title || post.description || 'Telegram intercept').trim(); + const translated = String(post.title_translated || post.description_translated || '').trim(); + if (!showOriginal && translated) { + return translated.split('\n', 1)[0].trim(); + } + if (!showOriginal && containsCyrillic(original) && translated) { + return translated.split('\n', 1)[0].trim(); + } + return original; +} + +function postDetail(post: TelegramOsintPost, showOriginal: boolean): string | null { + if (!showOriginal && post.description_translated) { + const translatedTitle = String(post.title_translated || '').trim(); + const translatedBody = String(post.description_translated || '').trim(); + if (!translatedBody || translatedBody === translatedTitle) return null; + const extra = translatedBody.startsWith(translatedTitle) + ? translatedBody.slice(translatedTitle.length).trim() + : translatedBody; + return extra || null; + } + const title = String(post.title || '').trim(); const description = String(post.description || '').trim(); if (!description || description === title || description.startsWith(title)) return null; @@ -126,10 +173,12 @@ function TelegramPostMedia({ post }: { post: TelegramOsintPost }) { function TelegramPostCard({ post }: { post: TelegramOsintPost }) { const { t } = useTranslation(); + const [showOriginal, setShowOriginal] = useState(false); const rs = post.risk_score ?? 1; const theme = riskTheme(rs); - const headline = postHeadline(post); - const detail = postDetail(post); + const translated = hasTranslation(post); + const headline = postHeadline(post, showOriginal); + const detail = postDetail(post, showOriginal); const isHigh = rs >= 8; return ( @@ -150,12 +199,29 @@ function TelegramPostCard({ post }: { post: TelegramOsintPost }) {

{detail}

) : null} + {translated && !showOriginal && post.source_lang ? ( +

+ {t('telegram.translatedFrom').replace('{lang}', sourceLangLabel(post))} +

+ ) : null} +
{isHigh ? 'BREAKING' : `LVL: ${rs}/10`} + {translated ? ( + + ) : null} {post.link ? ( { + setLocalizedPosts(posts); + }, [posts]); + + useEffect(() => { + const needsLocalizedFeed = posts.some((post) => !hasTranslation(post)); + if (!needsLocalizedFeed) { + return; + } + + let cancelled = false; + const controller = new AbortController(); + + fetch(`/api/telegram-feed?lang=${encodeURIComponent(locale)}`, { signal: controller.signal }) + .then((response) => (response.ok ? response.json() : null)) + .then((payload) => { + if (cancelled || !payload?.posts) return; + const byId = new Map( + (payload.posts as TelegramOsintPost[]).map((post) => [post.id, post]), + ); + setLocalizedPosts(posts.map((post) => byId.get(post.id) || post)); + }) + .catch(() => { + /* keep feed posts when locale translation fetch fails */ + }); + + return () => { + cancelled = true; + controller.abort(); + }; + }, [locale, posts]); + const sortedPosts = useMemo( () => - [...posts].sort( + [...localizedPosts].sort( (a, b) => (b.risk_score ?? 0) - (a.risk_score ?? 0) || String(b.published || '').localeCompare(String(a.published || '')), ), - [posts], + [localizedPosts], ); const maxRisk = sortedPosts[0]?.risk_score ?? 1; @@ -252,4 +352,4 @@ export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPo
); -} +} \ No newline at end of file diff --git a/frontend/src/components/NewsFeed.tsx b/frontend/src/components/NewsFeed.tsx index fd66973..49d2800 100644 --- a/frontend/src/components/NewsFeed.tsx +++ b/frontend/src/components/NewsFeed.tsx @@ -321,7 +321,7 @@ function EmissionsEstimateBlock({ flight }: { flight: any }) { ); } -function NewsFeedInner({ selectedEntity, regionDossier, regionDossierLoading, onArticleClick, onExpandEntityGraph }: { selectedEntity?: SelectedEntity | null, regionDossier?: RegionDossier | null, regionDossierLoading?: boolean, onArticleClick?: (idx: number, lat?: number, lng?: number, title?: string) => void, onExpandEntityGraph?: () => void }) { +function NewsFeedInner({ selectedEntity, regionDossier, regionDossierLoading, gtDossier, gtDossierLoading, onArticleClick, onExpandEntityGraph }: { selectedEntity?: SelectedEntity | null, regionDossier?: RegionDossier | null, regionDossierLoading?: boolean, gtDossier?: import('@/types/dashboard').GtDossier | null, gtDossierLoading?: boolean, onArticleClick?: (idx: number, lat?: number, lng?: number, title?: string) => void, onExpandEntityGraph?: () => void }) { const data = useDataKeys([ 'news', 'fimi', 'commercial_flights', 'private_flights', 'private_jets', 'military_flights', 'tracked_flights', 'ships', 'gdelt', 'liveuamap', @@ -535,6 +535,84 @@ function NewsFeedInner({ selectedEntity, regionDossier, regionDossierLoading, on )} {/* Sentinel-2 imagery now shown as map popup — see MaplibreViewer */} + + {(gtDossierLoading || gtDossier?.enabled) && ( + <> +
+ STRATEGIC RISK (GT) +
+ {gtDossierLoading ? ( +
Running game-theoretic analysis...
+ ) : gtDossier ? ( +
+
+ POSTERIOR RISK + + {((gtDossier.current_risk ?? 0) * 100).toFixed(1)}% + +
+ {gtDossier.domain_risks && ( +
+
+
FIN
+
+ {((gtDossier.domain_risks.financial ?? 0) * 100).toFixed(0)}% +
+
+
+
UNREST
+
+ {((gtDossier.domain_risks.unrest ?? 0) * 100).toFixed(0)}% +
+
+
+
CONFLICT
+
+ {((gtDossier.domain_risks.conflict ?? 0) * 100).toFixed(0)}% +
+
+
+ )} + {gtDossier.interpretation && ( +
+ >_ GT: + {gtDossier.interpretation} +
+ )} + {gtDossier.recent_signals && gtDossier.recent_signals.length > 0 && ( +
+
+ COSTLY SIGNALS +
+ {gtDossier.recent_signals.slice(-3).map((entry, idx) => ( +
+ + {Object.keys(entry.signals || {}).join(', ') || entry.domain} + + {' · '} + {entry.source} +
+ ))} +
+ )} + {gtDossier.scenarios && gtDossier.scenarios.length > 0 && ( +
+
SCENARIOS
+ {gtDossier.scenarios.map((scenario) => ( +
+ {scenario.name}: + {scenario.summary} +
+ ))} +
+ )} +
+ ) : null} + + )}
) : d?.error ? (
{d.error}
diff --git a/frontend/src/components/WorldviewLeftPanel.tsx b/frontend/src/components/WorldviewLeftPanel.tsx index a6c4310..57682e3 100644 --- a/frontend/src/components/WorldviewLeftPanel.tsx +++ b/frontend/src/components/WorldviewLeftPanel.tsx @@ -55,6 +55,8 @@ import { useTheme } from '@/lib/ThemeContext'; import { useTranslation } from '@/i18n'; import SarModeChooserModal from './SarModeChooserModal'; import KiwiSdrConsentDialog from './ui/KiwiSdrConsentDialog'; +import { extractGtAlerts } from '@/lib/gtAlerts'; +import { gtLeanLayerWarning, useRuntimeProfile } from '@/hooks/useRuntimeProfile'; function relativeTime(iso: string | undefined): string { if (!iso) return ''; @@ -115,6 +117,7 @@ const FRESHNESS_MAP: Record = { scm_suppliers: 'scm_suppliers', cyber_threats: 'cyber_threats', telegram_osint: 'telegram_osint', + gt_risk: 'gt_risk', }; // POTUS fleet ICAO hex codes for client-side filtering @@ -726,7 +729,11 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ const [liveuamapModalOpen, setLiveuamapModalOpen] = useState(false); const [liveuamapPendingEnable, setLiveuamapPendingEnable] = useState<(() => void) | null>(null); + const [gtLeanModalOpen, setGtLeanModalOpen] = useState(false); + const [gtLeanPendingEnable, setGtLeanPendingEnable] = useState<(() => void) | null>(null); const { needsConsentBeforeEnable, confirmOptIn } = useLiveUamapScraperOptIn(); + const runtimeProfile = useRuntimeProfile(); + const gtLeanWarning = gtLeanLayerWarning(runtimeProfile); const withGlobalIncidentsConsent = useCallback( (layerId: string, turningOn: boolean, apply: () => void) => { @@ -740,6 +747,18 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ [needsConsentBeforeEnable], ); + const withGtRiskLeanWarning = useCallback( + (layerId: string, turningOn: boolean, apply: () => void) => { + if (layerId === 'gt_risk' && turningOn && gtLeanWarning) { + setGtLeanPendingEnable(() => apply); + setGtLeanModalOpen(true); + return; + } + apply(); + }, + [gtLeanWarning], + ); + const isAllToggleableLayersOn = useMemo( () => Object.entries(activeLayers) @@ -1371,6 +1390,16 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ count: data?.correlations?.filter((c: { type: string }) => c.type === 'contradiction').length || 0, icon: Zap, }, + { + id: 'gt_risk', + name: t('layers.derivedOsint'), + source: t('layers.derivedOsintSource'), + count: + extractGtAlerts(data?.gt_risk).plottedRegions || + data?.gt_risk?.meta?.plotted_regions || + 0, + icon: Radar, + }, { id: 'day_night', name: t('layers.dayNight'), @@ -1394,7 +1423,7 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ sections.forEach((s) => { // Keep high-traffic intel overlays visible on first paint (GDELT, Telegram, etc.) initial[s.label] = s.layers.some((l) => - ['global_incidents', 'telegram_osint', 'ukraine_frontline'].includes(l.id), + ['global_incidents', 'telegram_osint', 'ukraine_frontline', 'gt_risk'].includes(l.id), ); }); return initial; @@ -1746,10 +1775,12 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ return; } withGlobalIncidentsConsent(layer.id, !active, () => { - setActiveLayers((prev: ActiveLayers) => ({ - ...prev, - [layer.id]: !active, - })); + withGtRiskLeanWarning(layer.id, !active, () => { + setActiveLayers((prev: ActiveLayers) => ({ + ...prev, + [layer.id]: !active, + })); + }); }); }} > @@ -2081,6 +2112,23 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ })(); }} /> + { + setGtLeanModalOpen(false); + setGtLeanPendingEnable(null); + }} + onConfirm={() => { + gtLeanPendingEnable?.(); + setGtLeanModalOpen(false); + setGtLeanPendingEnable(null); + }} + /> ); }); diff --git a/frontend/src/components/map/geoJSONBuilders.ts b/frontend/src/components/map/geoJSONBuilders.ts index 84d1240..413dd6e 100644 --- a/frontend/src/components/map/geoJSONBuilders.ts +++ b/frontend/src/components/map/geoJSONBuilders.ts @@ -1956,3 +1956,64 @@ export function buildSarAoisGeoJSON(aois?: SarAoi[]): FC { if (features.length === 0) return null; return { type: 'FeatureCollection' as const, features }; } + +// ─── Strategic Risk Analytics (GT early warning) ──────────────────────────── + +export function buildGtRiskGeoJSON( + payload?: { + enabled?: boolean; + heatmap?: { features?: Array }; + } | null, +): FC { + const features = payload?.heatmap?.features; + if (!features?.length) return null; + + const normalized = features + .map((feature, index) => { + const coords = feature.geometry?.coordinates; + if (!coords || coords.length < 2) return null; + const [lng, lat] = coords; + if (!Number.isFinite(lat) || !Number.isFinite(lng)) return null; + if (Math.abs(lat) < 0.001 && Math.abs(lng) < 0.001) return null; + const props = feature.properties || {}; + const region = String(props.region || `region-${index}`); + return { + type: 'Feature' as const, + properties: { + ...props, + type: 'gt_risk', + id: region, + name: region, + lat, + lng, + risk: Number(props.risk ?? 0), + financial: Number(props.financial ?? 0), + unrest: Number(props.unrest ?? 0), + conflict: Number(props.conflict ?? 0), + contagion: Number(props.contagion ?? 0), + }, + geometry: { + type: 'Point' as const, + coordinates: [lng, lat] as [number, number], + }, + }; + }) + .filter(Boolean) as GeoJSON.Feature[]; + + if (!normalized.length) return null; + return { type: 'FeatureCollection' as const, features: normalized }; +} + +type GTRiskHeatmapFeatureLike = { + properties?: { + region?: string; + risk?: number; + financial?: number; + unrest?: number; + conflict?: number; + contagion?: number; + }; + geometry?: { + coordinates?: [number, number]; + }; +}; diff --git a/frontend/src/hooks/useFloatingPanel.ts b/frontend/src/hooks/useFloatingPanel.ts new file mode 100644 index 0000000..e1e8cdb --- /dev/null +++ b/frontend/src/hooks/useFloatingPanel.ts @@ -0,0 +1,121 @@ +'use client'; + +import { useCallback, useEffect, useRef, useState } from 'react'; + +export interface FloatingPanelPosition { + x: number; + y: number; +} + +interface StoredFloatingPanelState { + position?: FloatingPanelPosition; + isMinimized?: boolean; +} + +interface UseFloatingPanelOptions { + defaultPosition?: FloatingPanelPosition; + minVisible?: number; +} + +export function useFloatingPanel( + storageKey: string, + { defaultPosition = { x: 24, y: 380 }, minVisible = 48 }: UseFloatingPanelOptions = {}, +) { + const [position, setPosition] = useState(defaultPosition); + const [isMinimized, setIsMinimized] = useState(false); + const [isDragging, setIsDragging] = useState(false); + const dragStartRef = useRef({ x: 0, y: 0, posX: 0, posY: 0 }); + const hydratedRef = useRef(false); + + useEffect(() => { + try { + const raw = localStorage.getItem(storageKey); + if (!raw) return; + const parsed = JSON.parse(raw) as StoredFloatingPanelState; + if ( + parsed.position && + Number.isFinite(parsed.position.x) && + Number.isFinite(parsed.position.y) + ) { + setPosition(parsed.position); + } + if (typeof parsed.isMinimized === 'boolean') { + setIsMinimized(parsed.isMinimized); + } + } catch { + /* non-fatal */ + } finally { + hydratedRef.current = true; + } + }, [storageKey]); + + useEffect(() => { + if (!hydratedRef.current) return; + try { + localStorage.setItem( + storageKey, + JSON.stringify({ position, isMinimized } satisfies StoredFloatingPanelState), + ); + } catch { + /* non-fatal */ + } + }, [storageKey, position, isMinimized]); + + const clampPosition = useCallback( + (next: FloatingPanelPosition): FloatingPanelPosition => { + const maxX = Math.max(0, window.innerWidth - minVisible); + const maxY = Math.max(0, window.innerHeight - minVisible); + return { + x: Math.min(Math.max(0, next.x), maxX), + y: Math.min(Math.max(0, next.y), maxY), + }; + }, + [minVisible], + ); + + const onDragStart = useCallback( + (event: React.MouseEvent) => { + event.preventDefault(); + setIsDragging(true); + dragStartRef.current = { + x: event.clientX, + y: event.clientY, + posX: position.x, + posY: position.y, + }; + }, + [position.x, position.y], + ); + + useEffect(() => { + if (!isDragging) return undefined; + + const handleMove = (event: MouseEvent) => { + const dx = event.clientX - dragStartRef.current.x; + const dy = event.clientY - dragStartRef.current.y; + setPosition( + clampPosition({ + x: dragStartRef.current.posX + dx, + y: dragStartRef.current.posY + dy, + }), + ); + }; + + const handleUp = () => setIsDragging(false); + + window.addEventListener('mousemove', handleMove); + window.addEventListener('mouseup', handleUp); + return () => { + window.removeEventListener('mousemove', handleMove); + window.removeEventListener('mouseup', handleUp); + }; + }, [isDragging, clampPosition]); + + return { + position, + isMinimized, + setIsMinimized, + isDragging, + onDragStart, + }; +} \ No newline at end of file diff --git a/frontend/src/hooks/useGtDossier.ts b/frontend/src/hooks/useGtDossier.ts new file mode 100644 index 0000000..69eeaa6 --- /dev/null +++ b/frontend/src/hooks/useGtDossier.ts @@ -0,0 +1,58 @@ +import { useEffect, useState } from 'react'; +import type { GtDossier } from '@/types/dashboard'; +import { API_BASE } from '@/lib/api'; + +export function useGtDossier( + lat: number | undefined, + lng: number | undefined, + countryName?: string, + enabled = true, +) { + const [gtDossier, setGtDossier] = useState(null); + const [gtDossierLoading, setGtDossierLoading] = useState(false); + + useEffect(() => { + if (!enabled || lat == null || lng == null) { + setGtDossier(null); + setGtDossierLoading(false); + return; + } + + let cancelled = false; + const regions = [ + `${lat.toFixed(2)},${lng.toFixed(2)}`, + countryName?.trim().toLowerCase(), + ].filter((value): value is string => Boolean(value)); + + const load = async () => { + setGtDossierLoading(true); + let best: GtDossier | null = null; + for (const region of regions) { + try { + const response = await fetch( + `${API_BASE}/api/analytics/dossier/${encodeURIComponent(region)}`, + ); + if (!response.ok) continue; + const payload = (await response.json()) as GtDossier; + if (!payload.enabled) continue; + if (!best || (payload.current_risk ?? 0) > (best.current_risk ?? 0)) { + best = { ...payload, region }; + } + } catch { + // GT analytics optional — ignore fetch errors + } + } + if (!cancelled) { + setGtDossier(best); + setGtDossierLoading(false); + } + }; + + void load(); + return () => { + cancelled = true; + }; + }, [lat, lng, countryName, enabled]); + + return { gtDossier, gtDossierLoading }; +} \ No newline at end of file diff --git a/frontend/src/hooks/useRuntimeProfile.ts b/frontend/src/hooks/useRuntimeProfile.ts new file mode 100644 index 0000000..7714b4b --- /dev/null +++ b/frontend/src/hooks/useRuntimeProfile.ts @@ -0,0 +1,60 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import { API_BASE } from '@/lib/api'; + +export interface RuntimeGtAnalytics { + enabled?: boolean; + operational?: boolean; + profile?: string; + lean_node?: boolean; + recommended?: boolean; + warning?: string | null; + experimental?: boolean; +} + +export interface RuntimeProfile { + profile?: string; + cpu_limit?: number | null; + memory_limit_mb?: number | null; + gt_analytics?: RuntimeGtAnalytics; +} + +export function useRuntimeProfile(): RuntimeProfile | null { + const [runtime, setRuntime] = useState(null); + + useEffect(() => { + let cancelled = false; + + const load = async () => { + try { + const res = await fetch(`${API_BASE}/api/health`, { cache: 'no-store' }); + if (!res.ok || cancelled) return; + const body = await res.json(); + if (!cancelled && body?.runtime) { + setRuntime(body.runtime as RuntimeProfile); + } + } catch { + /* health unavailable during boot */ + } + }; + + void load(); + const timer = window.setInterval(load, 60_000); + return () => { + cancelled = true; + window.clearInterval(timer); + }; + }, []); + + return runtime; +} + +export function gtLeanLayerWarning(runtime: RuntimeProfile | null): string | null { + const gt = runtime?.gt_analytics; + if (!gt?.lean_node) return null; + return ( + gt.warning || + 'This node is capped at 1 vCPU. Enabling Strategic Risk (Derived OSINT) may slow OSINT fetches.' + ); +} diff --git a/frontend/src/i18n/translations/en.json b/frontend/src/i18n/translations/en.json index e690d64..245c2a4 100644 --- a/frontend/src/i18n/translations/en.json +++ b/frontend/src/i18n/translations/en.json @@ -208,7 +208,79 @@ "malwareC2": "Malware C2", "scmSuppliers": "SCM Suppliers", "cyberThreats": "Cyber Threats", - "telegramOsint": "Telegram OSINT" + "telegramOsint": "Telegram OSINT", + "strategicRisk": "Strategic Risk Heatmap", + "derivedOsint": "Derived OSINT (Strategic Risk)", + "derivedOsintSource": "Experimental · off by default" + }, + "gtLean": { + "title": "Enable Derived OSINT on a lean node?", + "message": "Shadowbroker detected a 1 vCPU cap on this node. Turning on the Strategic Risk map layer is safe for display, but enabling the backend engine (GT_ANALYTICS_ENABLED) may slow Telegram, GDELT, and other OSINT fetches. Use OpenClaw watchdog alerts without the full engine on fleet nodes.", + "confirm": "Turn on layer anyway", + "cancel": "Cancel" + }, + "gtHud": { + "title": "GT ANALYTICS", + "dragHint": "drag to move", + "collapse": "Collapse panel", + "expand": "Expand panel" + }, + "gtAlerts": { + "title": "TOP ALERTS", + "counts": "{plotted} on map · {tracked} tracked (max {max})", + "empty": "No plottable regions yet — need geotagged intel (Telegram/GDELT/news).", + "ignition": "IGNITE", + "line": "risk {risk} · conflict {conflict}", + "hint": "500 = max tracked regions, not individual events. Click to fly there." + }, + "gtRisk": { + "popupTitle": "STRATEGIC RISK", + "region": "REGION", + "composite": "POSTERIOR RISK", + "financial": "FIN", + "unrest": "UNREST", + "conflict": "CONFLICT", + "contagion": "CONTAGION", + "costlySignals": "COSTLY SIGNALS", + "loadingSignals": "Loading feed matches…", + "noSignals": "No costly-signal text matched in recent Telegram/GDELT/news for this region. Scores can rise from domain priors or nearby contagion.", + "unknownSource": "unknown source" + }, + "gtBacktest": { + "title": "GT Backtest", + "layerOff": "Off — enable Strategic Risk Heatmap in Data Layers.", + "disabled": "GT analytics disabled (set GT_ANALYTICS_ENABLED=true).", + "loading": "Running historical validation…", + "refresh": "Refresh backtest", + "accuracy": "ACCURACY", + "confidence": "WILSON 95% LB", + "cases": "{count} labeled cases", + "threshold": "alert ≥ {value}", + "target": "target {value}", + "pass": "PASS", + "fail": "FAIL", + "collecting": "COLLECTING", + "meetsTarget": "Meets confidence target", + "belowTarget": "Below confidence target", + "misclassified": "{count} misclassified", + "tabBenchmark": "Benchmark", + "tabOperational": "Operational", + "benchmarkNote": "Static labeled corpus — regression test, not live forecasting.", + "operationalLoading": "Loading rolling operational trend…", + "operationalEmpty": "No weekly snapshots yet — freeze runs Mondays 00:05 UTC or via OpenClaw.", + "operationalWeeks": "{stored} weeks stored · {scorable} scorable", + "operationalLabeled": "{labeled} labeled · {pending} pending", + "operationalTrend": "Week-over-week accuracy", + "operationalImproving": "Improving vs prior scorable week", + "operationalFlat": "Flat or down vs prior scorable week", + "operationalMinLabels": "Need ≥{count} labels/week to score", + "microTitle": "3-day micro", + "microWindow": "{days}-day rolling avg · ignition Δ ≥ {delta}", + "microIgnitions": "{count} ignition(s)", + "microAlerted3d": "{count} above threshold on 3d avg", + "microEmpty": "Collecting daily readings — refreshes with GT ingest.", + "microRegionLine": "{region}: spot {spot} · 3d {avg} · Δ {delta}", + "microIgnitionBadge": "IGNITION" }, "roadCorridor": { "analyzeHere": "ANALYZE HERE", @@ -273,6 +345,9 @@ "loadMedia": "VIEW MEDIA (TELEGRAM)", "openOriginal": "OPEN ON TELEGRAM →", "embedTitle": "Telegram post embed", - "postsAtLocation": "{count} posts at this location — scroll for more" + "postsAtLocation": "{count} posts at this location — scroll for more", + "translatedFrom": "Translated from {lang}", + "showOriginal": "SHOW ORIGINAL ({lang})", + "showTranslation": "SHOW TRANSLATION" } } diff --git a/frontend/src/i18n/translations/fr.json b/frontend/src/i18n/translations/fr.json index b71cc72..db48880 100644 --- a/frontend/src/i18n/translations/fr.json +++ b/frontend/src/i18n/translations/fr.json @@ -208,7 +208,79 @@ "malwareC2": "Malware C2", "scmSuppliers": "Fournisseurs SCM", "cyberThreats": "Cybermenaces", - "telegramOsint": "OSINT Telegram" + "telegramOsint": "OSINT Telegram", + "strategicRisk": "Carte de risque stratégique", + "derivedOsint": "OSINT dérivé (risque stratégique)", + "derivedOsintSource": "Expérimental · désactivé par défaut" + }, + "gtLean": { + "title": "Activer l'OSINT dérivé sur un nœud limité ?", + "message": "Shadowbroker a détecté une limite de 1 vCPU. La couche carte peut s'afficher, mais activer le moteur backend peut ralentir les flux OSINT.", + "confirm": "Activer la couche", + "cancel": "Annuler" + }, + "gtHud": { + "title": "ANALYTIQUE GT", + "dragHint": "glisser pour déplacer", + "collapse": "Réduire le panneau", + "expand": "Développer le panneau" + }, + "gtAlerts": { + "title": "ALERTES TOP", + "counts": "{plotted} sur carte · {tracked} suivies (max {max})", + "empty": "Aucune région plottable — intel géolocalisée requise.", + "ignition": "IGNITE", + "line": "risque {risk} · conflit {conflict}", + "hint": "500 = régions max suivies, pas des événements. Cliquer pour voler." + }, + "gtRisk": { + "popupTitle": "RISQUE STRATÉGIQUE", + "region": "RÉGION", + "composite": "RISQUE POSTÉRIEUR", + "financial": "FIN", + "unrest": "TROUBLES", + "conflict": "CONFLIT", + "contagion": "CONTAGION", + "costlySignals": "SIGNAUX COÛTEUX", + "loadingSignals": "Chargement des correspondances…", + "noSignals": "Aucun signal coûteux récent pour cette région dans Telegram/GDELT/news.", + "unknownSource": "source inconnue" + }, + "gtBacktest": { + "title": "Backtest GT", + "layerOff": "Désactivé — activez la carte de risque stratégique dans Couches.", + "disabled": "Analytique GT désactivée (GT_ANALYTICS_ENABLED=true).", + "loading": "Validation historique en cours…", + "refresh": "Actualiser le backtest", + "accuracy": "PRÉCISION", + "confidence": "BORNE INF. WILSON 95%", + "cases": "{count} cas étiquetés", + "threshold": "alerte ≥ {value}", + "target": "cible {value}", + "pass": "OK", + "fail": "ÉCHEC", + "collecting": "COLLECTE", + "meetsTarget": "Objectif de confiance atteint", + "belowTarget": "Sous l'objectif de confiance", + "misclassified": "{count} mal classés", + "tabBenchmark": "Référence", + "tabOperational": "Opérationnel", + "benchmarkNote": "Corpus historique étiqueté — test de régression, pas prévision live.", + "operationalLoading": "Chargement de la tendance opérationnelle…", + "operationalEmpty": "Aucun instantané hebdomadaire — gel chaque lundi 00:05 UTC ou via OpenClaw.", + "operationalWeeks": "{stored} semaines · {scorable} exploitables", + "operationalLabeled": "{labeled} étiquetés · {pending} en attente", + "operationalTrend": "Précision semaine après semaine", + "operationalImproving": "En hausse vs semaine précédente", + "operationalFlat": "Stable ou en baisse vs semaine précédente", + "operationalMinLabels": "≥{count} étiquettes/semaine requis", + "microTitle": "Micro 3 jours", + "microWindow": "Moy. glissante {days} j · ignition Δ ≥ {delta}", + "microIgnitions": "{count} ignition(s)", + "microAlerted3d": "{count} au-dessus du seuil (moy. 3j)", + "microEmpty": "Lecture quotidienne en cours — mis à jour à chaque ingest GT.", + "microRegionLine": "{region}: spot {spot} · 3j {avg} · Δ {delta}", + "microIgnitionBadge": "IGNITION" }, "roadCorridor": { "analyzeHere": "ANALYSER ICI", @@ -273,6 +345,9 @@ "loadMedia": "AFFICHER LE MÉDIA (TELEGRAM)", "openOriginal": "OUVRIR SUR TELEGRAM →", "embedTitle": "Intégration Telegram", - "postsAtLocation": "{count} posts à cet endroit — faites défiler" + "postsAtLocation": "{count} posts à cet endroit — faites défiler", + "translatedFrom": "Traduit depuis {lang}", + "showOriginal": "AFFICHER L'ORIGINAL ({lang})", + "showTranslation": "AFFICHER LA TRADUCTION" } } diff --git a/frontend/src/i18n/translations/zh-CN.json b/frontend/src/i18n/translations/zh-CN.json index 5947697..568febe 100644 --- a/frontend/src/i18n/translations/zh-CN.json +++ b/frontend/src/i18n/translations/zh-CN.json @@ -208,7 +208,79 @@ "malwareC2": "恶意软件 C2", "scmSuppliers": "供应链供应商", "cyberThreats": "网络威胁", - "telegramOsint": "Telegram OSINT" + "telegramOsint": "Telegram OSINT", + "strategicRisk": "战略风险热力图", + "derivedOsint": "衍生 OSINT(战略风险)", + "derivedOsintSource": "实验功能 · 默认关闭" + }, + "gtLean": { + "title": "在低配节点上启用衍生 OSINT?", + "message": "Shadowbroker 检测到该节点 CPU 上限为 1 vCPU。开启地图图层通常安全,但启用后端引擎可能会拖慢 Telegram、GDELT 等 OSINT 抓取。", + "confirm": "仍要开启图层", + "cancel": "取消" + }, + "gtHud": { + "title": "GT 分析", + "dragHint": "拖动移动", + "collapse": "收起面板", + "expand": "展开面板" + }, + "gtAlerts": { + "title": "重点警报", + "counts": "地图 {plotted} · 跟踪 {tracked}(上限 {max})", + "empty": "尚无可绘制区域 — 需要带地理标签的情报。", + "ignition": "点火", + "line": "风险 {risk} · 冲突 {conflict}", + "hint": "500 = 最大跟踪区域数,非事件数。点击飞往。" + }, + "gtRisk": { + "popupTitle": "战略风险", + "region": "区域", + "composite": "后验风险", + "financial": "金融", + "unrest": "动荡", + "conflict": "冲突", + "contagion": "传染", + "costlySignals": "成本信号", + "loadingSignals": "正在加载情报匹配…", + "noSignals": "该区域最近在 Telegram/GDELT/新闻中未匹配到成本信号文本。", + "unknownSource": "未知来源" + }, + "gtBacktest": { + "title": "GT 回测", + "layerOff": "已关闭 — 请在数据图层中启用战略风险热力图。", + "disabled": "GT 分析未启用(需设置 GT_ANALYTICS_ENABLED=true)。", + "loading": "正在运行历史验证…", + "refresh": "刷新回测", + "accuracy": "准确率", + "confidence": "Wilson 95% 下界", + "cases": "{count} 个标注案例", + "threshold": "警报阈值 ≥ {value}", + "target": "目标 {value}", + "pass": "通过", + "fail": "未通过", + "collecting": "采集中", + "meetsTarget": "达到置信目标", + "belowTarget": "低于置信目标", + "misclassified": "{count} 个误分类", + "tabBenchmark": "基准测试", + "tabOperational": "运营验证", + "benchmarkNote": "静态标注语料 — 回归测试,非实时预测。", + "operationalLoading": "正在加载滚动运营趋势…", + "operationalEmpty": "尚无周快照 — 每周一 00:05 UTC 自动冻结,或通过 OpenClaw。", + "operationalWeeks": "已存 {stored} 周 · {scorable} 周可评分", + "operationalLabeled": "已标注 {labeled} · 待标注 {pending}", + "operationalTrend": "逐周准确率", + "operationalImproving": "较上一可评分周有所提升", + "operationalFlat": "较上一可评分周持平或下降", + "operationalMinLabels": "每周需 ≥{count} 条标注才可评分", + "microTitle": "3日微观", + "microWindow": "{days}日滚动均值 · 点火 Δ ≥ {delta}", + "microIgnitions": "{count} 个点火", + "microAlerted3d": "{count} 个区域 3日均值超阈值", + "microEmpty": "正在采集日读数 — 随 GT 摄入更新。", + "microRegionLine": "{region}:即时 {spot} · 3日 {avg} · Δ {delta}", + "microIgnitionBadge": "点火" }, "roadCorridor": { "analyzeHere": "分析此处", @@ -273,6 +345,9 @@ "loadMedia": "查看媒体(Telegram)", "openOriginal": "在 Telegram 打开 →", "embedTitle": "Telegram 帖子嵌入", - "postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多" + "postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多", + "translatedFrom": "由 {lang} 翻译", + "showOriginal": "显示原文({lang})", + "showTranslation": "显示译文" } } diff --git a/frontend/src/lib/gtAlerts.ts b/frontend/src/lib/gtAlerts.ts new file mode 100644 index 0000000..38a8e07 --- /dev/null +++ b/frontend/src/lib/gtAlerts.ts @@ -0,0 +1,104 @@ +import type { GTRiskPayload } from '@/types/dashboard'; + +export interface GtAlertRow { + region: string; + regionLabel: string; + risk: number; + conflict: number; + unrest: number; + financial: number; + contagion: number; + lat: number; + lng: number; + score: number; + ignition: boolean; + risk3d?: number; + riskDelta?: number; +} + +export function formatGtRegionLabel(region: string): string { + const text = String(region || '').trim(); + if (!text) return 'unknown'; + const coord = text.match(/^(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)$/); + if (coord) { + return `${Number(coord[1]).toFixed(2)}°, ${Number(coord[2]).toFixed(2)}°`; + } + const parts = text.split(',').map((piece) => piece.trim()).filter(Boolean); + if (parts.length >= 2) { + const lat = Number(parts[0]); + const lng = Number(parts[parts.length - 1]); + if (Number.isFinite(lat) && Number.isFinite(lng)) { + return `${lat.toFixed(2)}°, ${lng.toFixed(2)}°`; + } + } + return text.replace(/_/g, ' '); +} + +function validCoords(coords: unknown): { lat: number; lng: number } | null { + if (!Array.isArray(coords) || coords.length < 2) return null; + const lng = Number(coords[0]); + const lat = Number(coords[1]); + if (!Number.isFinite(lat) || !Number.isFinite(lng)) return null; + if (Math.abs(lat) < 0.001 && Math.abs(lng) < 0.001) return null; + return { lat, lng }; +} + +function peakScore(props: Record): number { + const composite = Number(props.risk ?? 0); + const financial = Number(props.financial ?? 0); + const unrest = Number(props.unrest ?? 0); + const conflict = Number(props.conflict ?? 0); + return Math.max(composite, financial, unrest, conflict); +} + +export function extractGtAlerts( + payload?: GTRiskPayload | null, + limit = 8, +): { + alerts: GtAlertRow[]; + trackedRegions: number; + plottedRegions: number; + maxRegions: number; +} { + const features = payload?.heatmap?.features || []; + const meta = payload?.meta; + const rows: GtAlertRow[] = []; + + for (const feature of features) { + const coords = validCoords(feature.geometry?.coordinates); + if (!coords) continue; + const props = (feature.properties || {}) as Record; + const region = String(props.region || '').trim().toLowerCase(); + if (!region) continue; + rows.push({ + region, + regionLabel: formatGtRegionLabel(region), + risk: Number(props.risk ?? 0), + financial: Number(props.financial ?? 0), + unrest: Number(props.unrest ?? 0), + conflict: Number(props.conflict ?? 0), + contagion: Number(props.contagion ?? 0), + lat: coords.lat, + lng: coords.lng, + score: peakScore(props), + ignition: Boolean(props.micro_ignition), + risk3d: props.risk_3d_avg != null ? Number(props.risk_3d_avg) : undefined, + riskDelta: props.risk_delta != null ? Number(props.risk_delta) : undefined, + }); + } + + rows.sort((a, b) => { + if (a.ignition !== b.ignition) return a.ignition ? -1 : 1; + const deltaA = a.riskDelta ?? 0; + const deltaB = b.riskDelta ?? 0; + if (deltaA !== deltaB) return deltaB - deltaA; + return b.score - a.score; + }); + + return { + alerts: rows.slice(0, limit), + trackedRegions: meta?.tracked_regions ?? features.length, + plottedRegions: meta?.plotted_regions ?? rows.length, + maxRegions: meta?.max_regions ?? 500, + }; +} \ No newline at end of file diff --git a/frontend/src/types/dashboard.ts b/frontend/src/types/dashboard.ts index 907cce0..f8d4d65 100644 --- a/frontend/src/types/dashboard.ts +++ b/frontend/src/types/dashboard.ts @@ -966,12 +966,193 @@ export interface DashboardData { timestamp?: string | null; channels?: string[]; }; + gt_risk?: GTRiskPayload; +} + +export interface GTRiskHeatmapFeature { + type: 'Feature'; + properties: { + region: string; + risk: number; + financial?: number; + unrest?: number; + conflict?: number; + contagion?: number; + updates?: number; + risk_spot?: number; + risk_3d_avg?: number; + risk_delta?: number; + micro_ignition?: boolean; + }; + geometry: { + type: 'Point'; + coordinates: [number, number]; + }; +} + +export interface GTRiskPayload { + enabled?: boolean; + timestamp?: string | null; + processed?: number; + meta?: { + tracked_regions?: number; + engine_regions?: number; + plotted_regions?: number; + max_regions?: number; + }; + heatmap?: { + type: 'FeatureCollection'; + features: GTRiskHeatmapFeature[]; + }; + clusters?: Array<{ + cluster_id: number; + size: number; + mean_risk: number; + regions?: string[]; + members?: string[]; + }>; +} + +export interface GtDossierSignalEntry { + timestamp: string; + domain: string; + signals: Record; + strength: number; + posterior: number; + source: string; + deviation_score?: number; +} + +export interface GtBacktestCaseResult { + case_id: string; + name: string; + kind: string; + correct: boolean; + alerted: boolean; + peak_domain_risk: number; + peak_composite_risk: number; + costly_signals: string[]; +} + +export interface GtBacktestReport { + enabled?: boolean; + total_cases: number; + correct: number; + accuracy: number; + confidence_rate: number; + wilson_lower_95: number; + wilson_upper_95: number; + true_positives: number; + true_negatives: number; + false_positives: number; + false_negatives: number; + sensitivity: number; + specificity: number; + alert_threshold: number; + target_confidence: number; + meets_target: boolean; + expanded_suite?: boolean; + tuned?: boolean; + recommended_alert_threshold?: number; + cases?: GtBacktestCaseResult[]; +} + +export interface GtRollingWeekScore { + week_id: string; + frozen_at?: string; + alert_threshold: number; + total_regions: number; + labeled: number; + pending: number; + alerted: number; + correct: number; + accuracy: number; + confidence_rate: number; + wilson_lower_95: number; + wilson_upper_95: number; + true_positives: number; + true_negatives: number; + false_positives: number; + false_negatives: number; + sensitivity: number; + specificity: number; + scorable: boolean; +} + +export interface GtMicroRegionView { + region: string; + spot_risk: number; + risk_3d_avg: number; + risk_delta: number; + days_in_window: number; + day_scores: number[]; + alerted_spot: boolean; + alerted_3d: boolean; + ignition: boolean; + financial: number; + unrest: number; + conflict: number; +} + +export interface GtMicroRollingReport { + enabled?: boolean; + mode?: string; + window_days: number; + alert_threshold: number; + ignition_delta: number; + as_of: string; + days_stored: number; + regions_tracked: number; + ignition_count: number; + alerted_3d_count: number; + ignitions: GtMicroRegionView[]; + top_regions: GtMicroRegionView[]; + note?: string; + message?: string; +} + +export interface GtRollingReport { + enabled?: boolean; + mode?: string; + alert_threshold: number; + target_confidence: number; + weeks_requested: number; + weeks_stored: number; + weeks_scorable: number; + min_labeled_per_week: number; + latest: GtRollingWeekScore | null; + trend: GtRollingWeekScore[]; + accuracy_series: { week_id: string; accuracy: number; labeled: number }[]; + improving_vs_prior: boolean; + meets_target: boolean; + note?: string; + message?: string; +} + +export interface GtDossier { + enabled?: boolean; + region: string; + current_risk: number; + domain_risks?: { + financial?: number; + unrest?: number; + conflict?: number; + }; + recent_signals?: GtDossierSignalEntry[]; + contagion_risk?: number; + interpretation?: string; + scenarios?: Array<{ name: string; summary: string }>; } export interface TelegramOsintPost { id: string; title?: string; description?: string; + title_translated?: string; + description_translated?: string; + source_lang?: string; + source_lang_label?: string; + translate_to?: string; link?: string; published?: string; source?: string; @@ -1120,6 +1301,7 @@ export interface ActiveLayers { scm_suppliers: boolean; cyber_threats: boolean; telegram_osint: boolean; + gt_risk: boolean; } export interface SelectedEntity { diff --git a/openclaw-skills/shadowbroker/SKILL.md b/openclaw-skills/shadowbroker/SKILL.md index 9ff549b..dff1158 100644 --- a/openclaw-skills/shadowbroker/SKILL.md +++ b/openclaw-skills/shadowbroker/SKILL.md @@ -170,6 +170,36 @@ The channel operates over HMAC-authenticated HTTP with body-integrity binding: | `await sb.get_slow_telemetry()` | Slow-tier: GDELT, news, earthquakes, markets, correlations, Telegram OSINT, malware/cyber threats, SCM suppliers | | `await sb.get_report()` | Full structured intelligence report | +### Strategic Risk Analytics (GT early warning) + +Requires `GT_ANALYTICS_ENABLED=true` on the ShadowBroker backend. + +| Method / command | What It Returns | +|------------------|----------------| +| `await sb.ask("Run GT analysis on UK/Europe feeds")` | Routes to `gt_analyze` | +| `await sb.gt_analyze(region="ukraine")` | Refresh beliefs from Telegram/news/GDELT + dossier | +| `await sb.gt_risk_heatmap()` | GeoJSON posterior risk overlay + Louvain clusters | +| `await sb.gt_dossier("ukraine")` | Costly signals, domain risks, scenarios | +| `await sb.gt_backtest()` | **Static benchmark** — labeled historical cases (regression test) | +| `await sb.gt_backtest(tune=True)` | Grid-search alert threshold for target confidence | +| `await sb.gt_rolling_backtest()` | **Macro operational** — week-over-week accuracy on frozen weekly alerts | +| `await sb.gt_micro_rolling()` | **Micro 3-day rolling avg** — spot vs baseline, ignition detection | +| `await sb.gt_rolling_freeze()` | Freeze this ISO week's GT scores before outcomes are known | +| `await sb.gt_rolling_label(week_id, region=..., label=...)` | Label prior-week outcomes (`true_escalation`, `false_alarm`, `benign`) | +| `await sb.gt_top_alerts()` | Ranked top GT regions with map coordinates | +| `await sb.ask("Run GT historical backtest")` | Routes to `gt_backtest` (benchmark, not operational) | +| `await sb.ask("GT rolling operational backtest trend")` | Routes to `gt_rolling_backtest` | +| `python sb_gt_report.py` | Local helper — backtest + heatmap (+ optional `--region`) | +| `await sb.send_command("gt_analyze", {"region": "europe"})` | Same as `gt_analyze()` | + +**Benchmark vs rolling:** Static `gt_backtest` checks the classifier on known textbook +cases. `gt_rolling_backtest` scores **frozen weekly live predictions** against delayed +operator labels — that week-over-week trend (e.g. 54% → 62% → 71%) is the macro +real-world metric. `gt_micro_rolling` adds a **3-day rolling average** per region: +spot risk vs the trailing baseline catches fast ignitions the weekly roll can miss. +Threshold is fixed (`GT_ROLLING_ALERT_THRESHOLD`, default 0.26); ignition when +spot − 3d avg ≥ `GT_MICRO_IGNITION_DELTA` (default 0.10). + **When to use**: Use `get_summary()` first. Use `get_layer_slice()` for the layers you actually need. Reserve full `get_telemetry()` / `get_slow_telemetry()` for rare cases where you genuinely need every field across every layer. @@ -692,6 +722,34 @@ When the user asks a question, follow this decision tree: - YES if the user has configured alert channels - Use the `AlertDispatcher` with the correct signature +### Telegram rhetoric monitoring (watchdog) + +Use watchdog watches for push alerts over SSE — no polling required. Keyword +watches now scan Telegram OSINT too (translated **and** original text). + +```python +# Alert when "nuclear" appears in news, GDELT, or Telegram OSINT +await sb.send_command("add_watch", { + "type": "keyword", + "params": {"keyword": "nuclear", "include_telegram": True}, +}) + +# Alert on new high-risk Telegram posts (LVL >= 7) — rhetoric/escalation monitor +await sb.send_command("add_watch", { + "type": "telegram_rhetoric", + "params": {"min_risk_score": 7, "channels": ["nexta_live", "war_monitor"]}, +}) + +# Combine risk threshold + topic filter +await sb.send_command("add_watch", { + "type": "telegram_rhetoric", + "params": {"min_risk_score": 8, "keywords": ["crimea", "escalation", "missile"]}, +}) +``` + +When a watch fires, you receive an SSE `alert` event. Forward it with +`sb_alerts.send_intel()` if the user has Discord/Telegram notification channels. + --- ## Important Rules diff --git a/openclaw-skills/shadowbroker/sb_get_summary.py b/openclaw-skills/shadowbroker/sb_get_summary.py new file mode 100755 index 0000000..c7443bd --- /dev/null +++ b/openclaw-skills/shadowbroker/sb_get_summary.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +"""One-shot get_summary for OpenClaw exec — loads .env.shadowbroker automatically.""" +from __future__ import annotations + +import asyncio +import json +import os +from pathlib import Path + + +def _load_env() -> None: + candidates = [ + Path.home() / ".openclaw" / "workspace" / ".env.shadowbroker", + Path(__file__).resolve().parent.parent.parent / ".env.shadowbroker", + ] + for path in candidates: + if not path.is_file(): + continue + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + os.environ.setdefault(key.strip(), value.strip()) + break + + +async def main() -> None: + _load_env() + from sb_query import ShadowBrokerClient + + sb = ShadowBrokerClient() + try: + resp = await sb.send_command("get_summary", {"compact": True}) + print(json.dumps(resp, indent=2)) + finally: + await sb.close() + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/openclaw-skills/shadowbroker/sb_gt_report.py b/openclaw-skills/shadowbroker/sb_gt_report.py new file mode 100755 index 0000000..61e9dac --- /dev/null +++ b/openclaw-skills/shadowbroker/sb_gt_report.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +"""GT Strategic Risk report — backtest + heatmap + optional region dossier. + +Backtest scores are benchmark validation on labeled historical snippets, not +forward-weeks prediction on live adversarial telemetry. See SKILL.md. +""" +from __future__ import annotations + +import argparse +import asyncio +import json +import os +from pathlib import Path + + +def _load_env() -> None: + for path in ( + Path.home() / ".openclaw" / "workspace" / ".env.shadowbroker", + Path(__file__).resolve().parent.parent.parent / ".env.shadowbroker", + ): + if not path.is_file(): + continue + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + os.environ.setdefault(key.strip(), value.strip()) + break + + +async def main() -> None: + parser = argparse.ArgumentParser(description="ShadowBroker GT analytics report") + parser.add_argument("--region", default="", help="Optional region for gt_analyze dossier") + parser.add_argument("--tune", action="store_true", help="Grid-search backtest threshold") + args = parser.parse_args() + + _load_env() + from sb_query import ShadowBrokerClient + + sb = ShadowBrokerClient() + report: dict[str, object] = { + "benchmark_note": ( + "Backtest accuracy is on curated pre-crisis snippets vs cheap-talk controls. " + "It does not claim multi-week forward prediction on live feeds." + ), + } + try: + report["backtest"] = await sb.gt_backtest(expanded=True, tune=args.tune) + heatmap = await sb.gt_risk_heatmap() + report["heatmap"] = { + "feature_count": len(heatmap.get("features") or []), + "clusters": heatmap.get("clusters") or [], + } + if args.region: + report["analyze"] = await sb.gt_analyze(region=args.region, refresh=True) + finally: + await sb.close() + + print(json.dumps(report, indent=2)) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/openclaw-skills/shadowbroker/sb_query.py b/openclaw-skills/shadowbroker/sb_query.py index df66dca..0b12e14 100644 --- a/openclaw-skills/shadowbroker/sb_query.py +++ b/openclaw-skills/shadowbroker/sb_query.py @@ -267,6 +267,17 @@ class ShadowBrokerClient: Returns: {ok, tier, reason, transport, pending_commands, pending_tasks, stats} """ + # /api/ai/channel/status is local-operator only. HMAC-signed remote + # agents must probe via the command channel instead. + if self._hmac_secret: + resp = await self.send_command("get_summary", {"compact": True}) + return { + "ok": bool(resp.get("ok")), + "tier": resp.get("tier"), + "status": resp.get("status"), + "transport": "http+hmac", + "reason": "remote_hmac_probe", + } r = await self._get("/api/ai/channel/status") return r.json() @@ -551,6 +562,123 @@ class ShadowBrokerClient: r.raise_for_status() return r.json() + # ── Strategic Risk Analytics (game-theoretic early warning) ─────── + + async def gt_risk_heatmap(self) -> dict: + """Cached Bayesian risk heatmap (GeoJSON features + Louvain clusters).""" + return self.unwrap_channel_result(await self.send_command("gt_risk_heatmap", {})) + + async def gt_dossier(self, region: str) -> dict: + """GT rationale, costly signals, and scenarios for a region.""" + return self.unwrap_channel_result( + await self.send_command("gt_dossier", {"region": region}), + ) + + async def gt_analyze( + self, + *, + region: str = "", + refresh: bool = True, + feeds: list[dict] | None = None, + ) -> dict: + """Refresh GT beliefs from intel feeds and return heatmap/dossier.""" + args: dict[str, Any] = {"refresh": refresh} + if region: + args["region"] = region + if feeds: + args["feeds"] = feeds + return self.unwrap_channel_result(await self.send_command("gt_analyze", args)) + + async def gt_backtest( + self, + *, + expanded: bool = True, + tune: bool = False, + target_confidence: float = 0.95, + alert_threshold: float | None = None, + include_cases: bool = False, + ) -> dict: + """Run labeled historical backtest; returns accuracy + Wilson 95% CI.""" + args: dict[str, Any] = { + "expanded": expanded, + "tune": tune, + "target_confidence": target_confidence, + "include_cases": include_cases, + "compact": True, + } + if alert_threshold is not None: + args["alert_threshold"] = alert_threshold + return self.unwrap_channel_result(await self.send_command("gt_backtest", args)) + + async def gt_rolling_freeze( + self, + *, + week_id: str | None = None, + force: bool = False, + ) -> dict: + """Freeze current GT scores for the ISO week (operational validation).""" + args: dict[str, Any] = {"compact": True, "force": force} + if week_id: + args["week_id"] = week_id + return self.unwrap_channel_result(await self.send_command("gt_rolling_freeze", args)) + + async def gt_rolling_label( + self, + week_id: str, + *, + region: str = "", + label: str = "", + notes: str = "", + labels: list[dict] | None = None, + ) -> dict: + """Apply delayed outcome labels to a frozen operational week.""" + args: dict[str, Any] = {"week_id": week_id} + if labels: + args["labels"] = labels + else: + args["region"] = region + args["label"] = label + args["notes"] = notes + return self.unwrap_channel_result(await self.send_command("gt_rolling_label", args)) + + async def gt_rolling_backtest( + self, + *, + weeks: int = 8, + target_confidence: float = 0.80, + ) -> dict: + """Rolling weekly operational accuracy trend (delayed labels).""" + return self.unwrap_channel_result( + await self.send_command( + "gt_rolling_backtest", + { + "weeks": weeks, + "target_confidence": target_confidence, + "compact": True, + }, + ) + ) + + async def gt_top_alerts(self, *, limit: int = 8) -> dict: + """Ranked top GT risk regions with map coordinates.""" + return self.unwrap_channel_result( + await self.send_command("gt_top_alerts", {"limit": limit, "compact": True}) + ) + + async def gt_micro_rolling( + self, + *, + window_days: int = 3, + limit: int = 15, + ) -> dict: + """3-day rolling micro average — spot vs baseline, ignition regions.""" + return self.unwrap_channel_result( + await self.send_command( + "gt_micro_rolling", + {"window_days": window_days, "limit": limit, "compact": True}, + ) + ) + # ── Geocoding ───────────────────────────────────────────────────── async def geocode(self, query: str) -> list[dict]: diff --git a/uv.lock b/uv.lock index 756a988..ffa9959 100644 --- a/uv.lock +++ b/uv.lock @@ -104,7 +104,7 @@ wheels = [ [[package]] name = "backend" -version = "0.9.82" +version = "0.9.83" source = { editable = "backend" } dependencies = [ { name = "apscheduler" }, @@ -116,6 +116,10 @@ dependencies = [ { name = "feedparser" }, { name = "httpx" }, { name = "meshtastic" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "orjson" }, { name = "paho-mqtt" }, { name = "playwright" }, @@ -171,6 +175,8 @@ requires-dist = [ { name = "httpx", specifier = "==0.28.1" }, { name = "imageio", marker = "extra == 'road-corridor'", specifier = ">=2.34.0" }, { name = "meshtastic", specifier = ">=2.5.0" }, + { name = "networkx", specifier = ">=3.4.0" }, + { name = "numpy", specifier = ">=2.2.0" }, { name = "orjson", specifier = ">=3.10.0" }, { name = "osmnx", marker = "extra == 'road-corridor'", specifier = ">=2.0.0" }, { name = "paho-mqtt", specifier = ">=1.6.0,<2.0.0" }, @@ -3052,7 +3058,7 @@ wheels = [ [[package]] name = "shadowbroker" -version = "0.9.82" +version = "0.9.83" source = { virtual = "." } [package.metadata]