feat: Telegram OSINT map layer, Osiris intel ports, and maritime settings

Add Telegram OSINT with hourly incremental t.me scraping, metro geocoding
separate from news centroids, threat-intercept popup UI with inline media,
and HTML markers above alert boxes so pins stay clickable. Expose GFW_API_TOKEN
in onboarding and Settings Maritime; harden GFW/CCTV/geo fetchers. Port Osiris-
derived recon, SCM, entity graph, malware/cyber feeds, sanctions, and submarine
cable layers with tests and documentation.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
BigBodyCobain
2026-06-08 21:04:08 -06:00
parent b64b9e0962
commit af9b3d08cc
76 changed files with 5769 additions and 218 deletions
+59
View File
@@ -77,3 +77,62 @@ def test_ingest_updates_existing_rows_in_persistent_data_dir(tmp_path, monkeypat
assert len(cameras) == 1
assert cameras[0]["media_url"] == "https://example.com/live.m3u8"
assert cameras[0]["media_type"] == "hls"
def test_scheduled_cctv_ingestors_include_asfinag_and_alpr():
names = {ing.__class__.__name__ for ing, _ in cctv_pipeline.scheduled_cctv_ingestors()}
assert "AsfinagIngestor" in names
assert "OSMALPRCameraIngestor" in names
assert "OSMTrafficCameraIngestor" in names
assert "Ontario511Ingestor" in names
assert "Alberta511Ingestor" in names
assert "Florida511Ingestor" in names
assert "AustraliaLiveTrafficIngestor" in names
assert "NetherlandsRWSIngestor" in names
assert len(names) == 21
def test_fetch_traveliq_v2_cameras_parses_views(monkeypatch):
class FakeResp:
status_code = 200
@staticmethod
def json():
return [
{
"Id": 9,
"Latitude": 45.0,
"Longitude": -75.0,
"Location": "Test Highway",
"Views": [
{
"Id": 42,
"Url": "/map/Cctv/42",
"Status": "Enabled",
"Description": "Northbound",
}
],
}
]
monkeypatch.setattr(cctv_pipeline, "fetch_with_curl", lambda *a, **k: FakeResp())
cameras = cctv_pipeline._fetch_traveliq_v2_cameras(
api_url="https://511on.ca/api/v2/get/cameras",
base_url="https://511on.ca",
id_prefix="ON511",
source_agency="511 Ontario",
)
assert len(cameras) == 1
assert cameras[0]["id"] == "ON511-9-42"
assert cameras[0]["media_url"] == "https://511on.ca/map/Cctv/42"
def test_ensure_https_upgrades_http_media_urls():
assert (
cctv_pipeline._ensure_https_url("http://example.com/camera.jpg")
== "https://example.com/camera.jpg"
)
assert (
cctv_pipeline._ensure_https_url("https://secure.example.com/live.m3u8")
== "https://secure.example.com/live.m3u8"
)
+10
View File
@@ -0,0 +1,10 @@
"""Datacenters load from static JSON regardless of layer toggle."""
from services.fetchers import _store
from services.fetchers.infrastructure import fetch_datacenters
def test_fetch_datacenters_populates_store_when_layer_disabled(monkeypatch):
monkeypatch.setitem(_store.active_layers, "datacenters", False)
_store.latest_data["datacenters"] = []
fetch_datacenters()
assert len(_store.latest_data.get("datacenters") or []) > 0
+49
View File
@@ -113,3 +113,52 @@ def test_fetch_fishing_activity_dedupes_to_latest_event_per_vessel(monkeypatch):
assert latest_data["fishing_activity"][0]["vessel_ssvid"] == "ssvid-1"
finally:
latest_data["fishing_activity"] = original
def test_fetch_fishing_activity_respects_max_pages(monkeypatch):
from services.fetchers import geo
from services.fetchers._store import latest_data
original = list(latest_data.get("fishing_activity") or [])
requests: list[str] = []
def fake_fetch(url, timeout=30, headers=None):
requests.append(url)
offset = 0
if "offset=500" in url:
offset = 500
payload = {
"total": 5000,
"entries": [
{
"id": f"evt-{offset + i}",
"position": {"lat": 10.0 + i, "lon": 20.0 + i},
"event": {"duration": 3600},
"vessel": {
"id": f"v-{offset + i}",
"ssvid": f"ssvid-{offset + i}",
"name": f"Vessel-{offset + i}",
"flag": "US",
},
}
for i in range(500)
],
"nextOffset": offset + 500,
}
return SimpleNamespace(status_code=200, json=lambda p=payload: p)
monkeypatch.setenv("GFW_API_TOKEN", "test-token")
monkeypatch.setenv("GFW_EVENTS_PAGE_SIZE", "500")
monkeypatch.setenv("GFW_EVENTS_MAX_PAGES", "2")
monkeypatch.setattr("services.fetchers._store.is_any_active", lambda *args: True)
monkeypatch.setattr(geo, "fetch_with_curl", fake_fetch)
monkeypatch.setattr(geo, "_mark_fresh", lambda *args, **kwargs: None)
monkeypatch.setattr(geo, "_last_fishing_fetch_ts", 0.0)
try:
geo.fetch_fishing_activity()
assert len(latest_data["fishing_activity"]) == 1000
assert len(requests) == 2
assert all("offset=0" in url or "offset=500" in url for url in requests)
finally:
latest_data["fishing_activity"] = original
+43
View File
@@ -0,0 +1,43 @@
"""Tests for Osiris-ported security and sanctions modules."""
from __future__ import annotations
import pytest
from services.ssrf_guard import validate_host, validate_domain
from services.sanctions.ofac import norm_name, search_sanctions
def test_ssrf_blocks_localhost():
result = validate_host("localhost")
assert result["ok"] is False
def test_ssrf_blocks_private_ip():
result = validate_host("192.168.1.1")
assert result["ok"] is False
def test_ssrf_blocks_metadata_endpoint():
result = validate_host("metadata.google.internal")
assert result["ok"] is False
def test_validate_domain_rejects_garbage():
assert validate_domain("not a domain") is False
assert validate_domain("example.com") is True
def test_norm_name_strips_punctuation():
assert norm_name("ACME, Inc.") == norm_name("acme inc")
def test_search_sanctions_requires_min_length():
assert search_sanctions("ab") == []
@pytest.mark.parametrize("query", ["127.0.0.1", "10.0.0.1"])
def test_sweep_init_rejects_private(query: str):
from services.osint.lookups import sweep_init
with pytest.raises(ValueError, match="Private|reserved|Invalid"):
sweep_init(query, 24)
+13
View File
@@ -0,0 +1,13 @@
from services.scm.suppliers import _seismic_risk_level
def test_micro_quakes_ignored():
assert _seismic_risk_level(10.0, 3.9) is None
assert _seismic_risk_level(10.0, 4.4) is None
def test_meaningful_quake_thresholds():
assert _seismic_risk_level(30.0, 4.6) == "HIGH"
assert _seismic_risk_level(80.0, 5.2) == "HIGH"
assert _seismic_risk_level(50.0, 5.6) == "CRITICAL"
assert _seismic_risk_level(150.0, 6.1) == "CRITICAL"
+103
View File
@@ -0,0 +1,103 @@
"""Telegram OSINT HTML parsing and geoparsing."""
from services.fetchers import telegram_osint
SAMPLE_HTML = """
<div class="tgme_widget_message_wrap js-widget_message_wrap">
<div class="tgme_widget_message_text">Missile strike reported near Kyiv overnight.</div>
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/12345">
<time datetime="2026-06-02T12:00:00+00:00"></time>
</a>
</div>
</div>
</div>
"""
SAMPLE_VIDEO_HTML = """
<div class="tgme_widget_message_wrap js-widget_message_wrap">
<div class="tgme_widget_message_text">Drone footage from Kharkiv.</div>
<video src="https://cdn4.telesco.pe/file/sample.mp4?token=abc" class="tgme_widget_message_video js-message_video"></video>
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/99999">
<time datetime="2026-06-02T13:00:00+00:00"></time>
</a>
</div>
</div>
</div>
"""
def test_parse_telegram_channel_html_extracts_geolocated_post():
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_HTML, "osintdefender")
assert len(posts) == 1
post = posts[0]
assert "Kyiv" in post["title"]
assert post["coords"] == [50.45, 30.523]
assert post["risk_score"] >= 3
assert post["link"].startswith("https://t.me/")
def test_resolve_telegram_coords_handles_cyrillic():
coords = telegram_osint._resolve_telegram_coords("Обстріл біля Харкова")
assert coords == (49.993, 36.231)
def test_resolve_telegram_coords_uses_metro_anchors_for_country_tags():
assert telegram_osint._resolve_telegram_coords("#Israel #Iran") == (32.085, 34.781)
assert telegram_osint._resolve_telegram_coords("China announces policy") == (39.904, 116.407)
assert telegram_osint._resolve_telegram_coords("#USA response") == (40.712, -74.006)
def test_resolve_telegram_coords_keeps_specific_cities_over_country_anchor():
assert telegram_osint._resolve_telegram_coords("Strike near Gaza") == (31.416, 34.333)
assert telegram_osint._resolve_telegram_coords("Missile strike reported near Kyiv overnight") == (
50.45,
30.523,
)
def test_parse_telegram_channel_html_extracts_video_media():
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_VIDEO_HTML, "osintdefender")
assert len(posts) == 1
post = posts[0]
assert post["media_type"] == "video"
assert post["media_url"].startswith("https://cdn4.telesco.pe/")
assert post["embed_url"] == "https://t.me/osintdefender/99999?embed=1"
def test_telegram_media_host_allowed():
assert telegram_osint.telegram_media_host_allowed("cdn4.telesco.pe")
assert telegram_osint.telegram_media_host_allowed("cdn4.telegram-cdn.org")
assert not telegram_osint.telegram_media_host_allowed("evil.example.com")
def test_extract_new_channel_posts_stops_at_known_links():
known = {"https://t.me/osintdefender/12345"}
fresh = telegram_osint._extract_new_channel_posts(SAMPLE_HTML, "osintdefender", known)
assert fresh == []
def test_merge_telegram_posts_keeps_existing_and_adds_only_new():
existing = [
{
"id": "old",
"link": "https://t.me/osintdefender/111",
"published": "2026-06-01T12:00:00+00:00",
}
]
incoming = [
{
"id": "dup",
"link": "https://t.me/osintdefender/111",
"published": "2026-06-02T12:00:00+00:00",
},
{
"id": "new",
"link": "https://t.me/osintdefender/222",
"published": "2026-06-03T12:00:00+00:00",
},
]
merged, added = telegram_osint._merge_telegram_posts(existing, incoming)
assert added == 1
assert len(merged) == 2
assert merged[0]["link"] == "https://t.me/osintdefender/222"