mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-09 15:53:56 +02:00
feat: Telegram OSINT map layer, Osiris intel ports, and maritime settings
Add Telegram OSINT with hourly incremental t.me scraping, metro geocoding separate from news centroids, threat-intercept popup UI with inline media, and HTML markers above alert boxes so pins stay clickable. Expose GFW_API_TOKEN in onboarding and Settings Maritime; harden GFW/CCTV/geo fetchers. Port Osiris- derived recon, SCM, entity graph, malware/cyber feeds, sanctions, and submarine cable layers with tests and documentation. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -77,3 +77,62 @@ def test_ingest_updates_existing_rows_in_persistent_data_dir(tmp_path, monkeypat
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["media_url"] == "https://example.com/live.m3u8"
|
||||
assert cameras[0]["media_type"] == "hls"
|
||||
|
||||
|
||||
def test_scheduled_cctv_ingestors_include_asfinag_and_alpr():
|
||||
names = {ing.__class__.__name__ for ing, _ in cctv_pipeline.scheduled_cctv_ingestors()}
|
||||
assert "AsfinagIngestor" in names
|
||||
assert "OSMALPRCameraIngestor" in names
|
||||
assert "OSMTrafficCameraIngestor" in names
|
||||
assert "Ontario511Ingestor" in names
|
||||
assert "Alberta511Ingestor" in names
|
||||
assert "Florida511Ingestor" in names
|
||||
assert "AustraliaLiveTrafficIngestor" in names
|
||||
assert "NetherlandsRWSIngestor" in names
|
||||
assert len(names) == 21
|
||||
|
||||
|
||||
def test_fetch_traveliq_v2_cameras_parses_views(monkeypatch):
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
@staticmethod
|
||||
def json():
|
||||
return [
|
||||
{
|
||||
"Id": 9,
|
||||
"Latitude": 45.0,
|
||||
"Longitude": -75.0,
|
||||
"Location": "Test Highway",
|
||||
"Views": [
|
||||
{
|
||||
"Id": 42,
|
||||
"Url": "/map/Cctv/42",
|
||||
"Status": "Enabled",
|
||||
"Description": "Northbound",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
monkeypatch.setattr(cctv_pipeline, "fetch_with_curl", lambda *a, **k: FakeResp())
|
||||
cameras = cctv_pipeline._fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["id"] == "ON511-9-42"
|
||||
assert cameras[0]["media_url"] == "https://511on.ca/map/Cctv/42"
|
||||
|
||||
|
||||
def test_ensure_https_upgrades_http_media_urls():
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("http://example.com/camera.jpg")
|
||||
== "https://example.com/camera.jpg"
|
||||
)
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("https://secure.example.com/live.m3u8")
|
||||
== "https://secure.example.com/live.m3u8"
|
||||
)
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
"""Datacenters load from static JSON regardless of layer toggle."""
|
||||
from services.fetchers import _store
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
|
||||
def test_fetch_datacenters_populates_store_when_layer_disabled(monkeypatch):
|
||||
monkeypatch.setitem(_store.active_layers, "datacenters", False)
|
||||
_store.latest_data["datacenters"] = []
|
||||
fetch_datacenters()
|
||||
assert len(_store.latest_data.get("datacenters") or []) > 0
|
||||
@@ -113,3 +113,52 @@ def test_fetch_fishing_activity_dedupes_to_latest_event_per_vessel(monkeypatch):
|
||||
assert latest_data["fishing_activity"][0]["vessel_ssvid"] == "ssvid-1"
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
|
||||
def test_fetch_fishing_activity_respects_max_pages(monkeypatch):
|
||||
from services.fetchers import geo
|
||||
from services.fetchers._store import latest_data
|
||||
|
||||
original = list(latest_data.get("fishing_activity") or [])
|
||||
requests: list[str] = []
|
||||
|
||||
def fake_fetch(url, timeout=30, headers=None):
|
||||
requests.append(url)
|
||||
offset = 0
|
||||
if "offset=500" in url:
|
||||
offset = 500
|
||||
payload = {
|
||||
"total": 5000,
|
||||
"entries": [
|
||||
{
|
||||
"id": f"evt-{offset + i}",
|
||||
"position": {"lat": 10.0 + i, "lon": 20.0 + i},
|
||||
"event": {"duration": 3600},
|
||||
"vessel": {
|
||||
"id": f"v-{offset + i}",
|
||||
"ssvid": f"ssvid-{offset + i}",
|
||||
"name": f"Vessel-{offset + i}",
|
||||
"flag": "US",
|
||||
},
|
||||
}
|
||||
for i in range(500)
|
||||
],
|
||||
"nextOffset": offset + 500,
|
||||
}
|
||||
return SimpleNamespace(status_code=200, json=lambda p=payload: p)
|
||||
|
||||
monkeypatch.setenv("GFW_API_TOKEN", "test-token")
|
||||
monkeypatch.setenv("GFW_EVENTS_PAGE_SIZE", "500")
|
||||
monkeypatch.setenv("GFW_EVENTS_MAX_PAGES", "2")
|
||||
monkeypatch.setattr("services.fetchers._store.is_any_active", lambda *args: True)
|
||||
monkeypatch.setattr(geo, "fetch_with_curl", fake_fetch)
|
||||
monkeypatch.setattr(geo, "_mark_fresh", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(geo, "_last_fishing_fetch_ts", 0.0)
|
||||
|
||||
try:
|
||||
geo.fetch_fishing_activity()
|
||||
assert len(latest_data["fishing_activity"]) == 1000
|
||||
assert len(requests) == 2
|
||||
assert all("offset=0" in url or "offset=500" in url for url in requests)
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Tests for Osiris-ported security and sanctions modules."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.ssrf_guard import validate_host, validate_domain
|
||||
from services.sanctions.ofac import norm_name, search_sanctions
|
||||
|
||||
|
||||
def test_ssrf_blocks_localhost():
|
||||
result = validate_host("localhost")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_ssrf_blocks_private_ip():
|
||||
result = validate_host("192.168.1.1")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_ssrf_blocks_metadata_endpoint():
|
||||
result = validate_host("metadata.google.internal")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_validate_domain_rejects_garbage():
|
||||
assert validate_domain("not a domain") is False
|
||||
assert validate_domain("example.com") is True
|
||||
|
||||
|
||||
def test_norm_name_strips_punctuation():
|
||||
assert norm_name("ACME, Inc.") == norm_name("acme inc")
|
||||
|
||||
|
||||
def test_search_sanctions_requires_min_length():
|
||||
assert search_sanctions("ab") == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("query", ["127.0.0.1", "10.0.0.1"])
|
||||
def test_sweep_init_rejects_private(query: str):
|
||||
from services.osint.lookups import sweep_init
|
||||
|
||||
with pytest.raises(ValueError, match="Private|reserved|Invalid"):
|
||||
sweep_init(query, 24)
|
||||
@@ -0,0 +1,13 @@
|
||||
from services.scm.suppliers import _seismic_risk_level
|
||||
|
||||
|
||||
def test_micro_quakes_ignored():
|
||||
assert _seismic_risk_level(10.0, 3.9) is None
|
||||
assert _seismic_risk_level(10.0, 4.4) is None
|
||||
|
||||
|
||||
def test_meaningful_quake_thresholds():
|
||||
assert _seismic_risk_level(30.0, 4.6) == "HIGH"
|
||||
assert _seismic_risk_level(80.0, 5.2) == "HIGH"
|
||||
assert _seismic_risk_level(50.0, 5.6) == "CRITICAL"
|
||||
assert _seismic_risk_level(150.0, 6.1) == "CRITICAL"
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Telegram OSINT HTML parsing and geoparsing."""
|
||||
|
||||
from services.fetchers import telegram_osint
|
||||
|
||||
|
||||
SAMPLE_HTML = """
|
||||
<div class="tgme_widget_message_wrap js-widget_message_wrap">
|
||||
<div class="tgme_widget_message_text">Missile strike reported near Kyiv overnight.</div>
|
||||
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/12345">
|
||||
<time datetime="2026-06-02T12:00:00+00:00"></time>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
SAMPLE_VIDEO_HTML = """
|
||||
<div class="tgme_widget_message_wrap js-widget_message_wrap">
|
||||
<div class="tgme_widget_message_text">Drone footage from Kharkiv.</div>
|
||||
<video src="https://cdn4.telesco.pe/file/sample.mp4?token=abc" class="tgme_widget_message_video js-message_video"></video>
|
||||
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/99999">
|
||||
<time datetime="2026-06-02T13:00:00+00:00"></time>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def test_parse_telegram_channel_html_extracts_geolocated_post():
|
||||
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_HTML, "osintdefender")
|
||||
assert len(posts) == 1
|
||||
post = posts[0]
|
||||
assert "Kyiv" in post["title"]
|
||||
assert post["coords"] == [50.45, 30.523]
|
||||
assert post["risk_score"] >= 3
|
||||
assert post["link"].startswith("https://t.me/")
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_handles_cyrillic():
|
||||
coords = telegram_osint._resolve_telegram_coords("Обстріл біля Харкова")
|
||||
assert coords == (49.993, 36.231)
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_uses_metro_anchors_for_country_tags():
|
||||
assert telegram_osint._resolve_telegram_coords("#Israel #Iran") == (32.085, 34.781)
|
||||
assert telegram_osint._resolve_telegram_coords("China announces policy") == (39.904, 116.407)
|
||||
assert telegram_osint._resolve_telegram_coords("#USA response") == (40.712, -74.006)
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_keeps_specific_cities_over_country_anchor():
|
||||
assert telegram_osint._resolve_telegram_coords("Strike near Gaza") == (31.416, 34.333)
|
||||
assert telegram_osint._resolve_telegram_coords("Missile strike reported near Kyiv overnight") == (
|
||||
50.45,
|
||||
30.523,
|
||||
)
|
||||
|
||||
|
||||
def test_parse_telegram_channel_html_extracts_video_media():
|
||||
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_VIDEO_HTML, "osintdefender")
|
||||
assert len(posts) == 1
|
||||
post = posts[0]
|
||||
assert post["media_type"] == "video"
|
||||
assert post["media_url"].startswith("https://cdn4.telesco.pe/")
|
||||
assert post["embed_url"] == "https://t.me/osintdefender/99999?embed=1"
|
||||
|
||||
|
||||
def test_telegram_media_host_allowed():
|
||||
assert telegram_osint.telegram_media_host_allowed("cdn4.telesco.pe")
|
||||
assert telegram_osint.telegram_media_host_allowed("cdn4.telegram-cdn.org")
|
||||
assert not telegram_osint.telegram_media_host_allowed("evil.example.com")
|
||||
|
||||
|
||||
def test_extract_new_channel_posts_stops_at_known_links():
|
||||
known = {"https://t.me/osintdefender/12345"}
|
||||
fresh = telegram_osint._extract_new_channel_posts(SAMPLE_HTML, "osintdefender", known)
|
||||
assert fresh == []
|
||||
|
||||
|
||||
def test_merge_telegram_posts_keeps_existing_and_adds_only_new():
|
||||
existing = [
|
||||
{
|
||||
"id": "old",
|
||||
"link": "https://t.me/osintdefender/111",
|
||||
"published": "2026-06-01T12:00:00+00:00",
|
||||
}
|
||||
]
|
||||
incoming = [
|
||||
{
|
||||
"id": "dup",
|
||||
"link": "https://t.me/osintdefender/111",
|
||||
"published": "2026-06-02T12:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"id": "new",
|
||||
"link": "https://t.me/osintdefender/222",
|
||||
"published": "2026-06-03T12:00:00+00:00",
|
||||
},
|
||||
]
|
||||
merged, added = telegram_osint._merge_telegram_posts(existing, incoming)
|
||||
assert added == 1
|
||||
assert len(merged) == 2
|
||||
assert merged[0]["link"] == "https://t.me/osintdefender/222"
|
||||
Reference in New Issue
Block a user