mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-07-04 19:37:54 +02:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c9c9a5262c |
@@ -26,6 +26,8 @@ AIS_API_KEY=
|
|||||||
# Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token).
|
# Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token).
|
||||||
# TELEGRAM_OSINT_ENABLED=true
|
# TELEGRAM_OSINT_ENABLED=true
|
||||||
# TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor
|
# TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor
|
||||||
|
# TELEGRAM_OSINT_TRANSLATE=true
|
||||||
|
# TELEGRAM_OSINT_TRANSLATE_TO=en
|
||||||
|
|
||||||
# Admin key to protect sensitive endpoints (settings, updates).
|
# Admin key to protect sensitive endpoints (settings, updates).
|
||||||
# If blank, loopback/localhost requests still work for local single-host dev.
|
# If blank, loopback/localhost requests still work for local single-host dev.
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from services.fetchers._store import get_latest_data_subset_refs
|
|||||||
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
||||||
from services.intel_feeds.country_risk import build_country_risk_payload
|
from services.intel_feeds.country_risk import build_country_risk_payload
|
||||||
from services.network_utils import outbound_user_agent
|
from services.network_utils import outbound_user_agent
|
||||||
|
from services.telegram_translate import apply_posts_translations, normalize_translate_target
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -45,12 +46,19 @@ async def country_risk(request: Request) -> dict:
|
|||||||
|
|
||||||
@router.get("/api/telegram-feed")
|
@router.get("/api/telegram-feed")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def telegram_feed(request: Request) -> dict:
|
async def telegram_feed(request: Request, lang: str | None = Query(default=None)) -> dict:
|
||||||
snap = get_latest_data_subset_refs("telegram_osint")
|
snap = get_latest_data_subset_refs("telegram_osint")
|
||||||
payload = snap.get("telegram_osint")
|
payload = snap.get("telegram_osint")
|
||||||
if isinstance(payload, dict) and payload.get("posts") is not None:
|
if not isinstance(payload, dict) or payload.get("posts") is None:
|
||||||
return payload
|
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
|
||||||
|
if lang:
|
||||||
|
target = normalize_translate_target(lang)
|
||||||
|
localized = dict(payload)
|
||||||
|
localized["posts"] = apply_posts_translations(list(payload.get("posts") or []), target)
|
||||||
|
localized["translate_locale"] = target
|
||||||
|
return localized
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import html
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -11,6 +12,7 @@ from typing import Any
|
|||||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||||
from services.fetchers.news import resolve_coords_match
|
from services.fetchers.news import resolve_coords_match
|
||||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||||
|
from services.telegram_translate import apply_post_translation, apply_posts_translations
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -174,13 +176,7 @@ def _extract_media(block: str, link: str) -> dict[str, Any]:
|
|||||||
def _strip_html(text: str) -> str:
|
def _strip_html(text: str) -> str:
|
||||||
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||||
return (
|
return html.unescape(cleaned).strip()
|
||||||
cleaned.replace(""", '"')
|
|
||||||
.replace("&", "&")
|
|
||||||
.replace("<", "<")
|
|
||||||
.replace(">", ">")
|
|
||||||
.strip()
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _score_risk(text: str) -> int:
|
def _score_risk(text: str) -> int:
|
||||||
@@ -293,20 +289,19 @@ def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]]
|
|||||||
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
media = _extract_media(block, link)
|
media = _extract_media(block, link)
|
||||||
posts.append(
|
post = {
|
||||||
{
|
"id": post_id,
|
||||||
"id": post_id,
|
"title": title,
|
||||||
"title": title,
|
"description": text[:1200],
|
||||||
"description": text[:1200],
|
"link": link,
|
||||||
"link": link,
|
"published": published,
|
||||||
"published": published,
|
"source": f"t.me/{channel}",
|
||||||
"source": f"t.me/{channel}",
|
"channel": channel,
|
||||||
"channel": channel,
|
"risk_score": risk_score,
|
||||||
"risk_score": risk_score,
|
"coords": [coords[0], coords[1]] if coords else None,
|
||||||
"coords": [coords[0], coords[1]] if coords else None,
|
**media,
|
||||||
**media,
|
}
|
||||||
}
|
posts.append(apply_post_translation(post))
|
||||||
)
|
|
||||||
return posts
|
return posts
|
||||||
|
|
||||||
|
|
||||||
@@ -358,6 +353,7 @@ def fetch_telegram_osint() -> dict[str, Any]:
|
|||||||
|
|
||||||
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
||||||
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
||||||
|
merged_posts = apply_posts_translations(merged_posts)
|
||||||
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
"""Shared Telegram OSINT post text helpers for search and watchdog matching."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from services.telegram_translate import source_lang_label
|
||||||
|
|
||||||
|
|
||||||
|
def iter_telegram_posts(layer_payload: Any) -> list[dict[str, Any]]:
|
||||||
|
"""Normalize telegram_osint layer payloads into a list of post dicts."""
|
||||||
|
if isinstance(layer_payload, list):
|
||||||
|
return [post for post in layer_payload if isinstance(post, dict)]
|
||||||
|
if isinstance(layer_payload, dict):
|
||||||
|
posts = layer_payload.get("posts")
|
||||||
|
if isinstance(posts, list):
|
||||||
|
return [post for post in posts if isinstance(post, dict)]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def telegram_post_search_text(post: dict[str, Any]) -> str:
|
||||||
|
"""Build a lowercase haystack for keyword matching (translated + original)."""
|
||||||
|
parts = (
|
||||||
|
post.get("title_translated"),
|
||||||
|
post.get("description_translated"),
|
||||||
|
post.get("title"),
|
||||||
|
post.get("description"),
|
||||||
|
post.get("source"),
|
||||||
|
post.get("channel"),
|
||||||
|
)
|
||||||
|
return " ".join(str(part).strip() for part in parts if str(part or "").strip()).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def telegram_post_display_title(post: dict[str, Any]) -> str:
|
||||||
|
"""Prefer translated headline for alerts and agent-facing summaries."""
|
||||||
|
translated = str(post.get("title_translated") or post.get("description_translated") or "").strip()
|
||||||
|
if translated:
|
||||||
|
return translated.split("\n", 1)[0][:200]
|
||||||
|
return str(post.get("title") or post.get("description") or "").strip()[:200]
|
||||||
|
|
||||||
|
|
||||||
|
def telegram_post_match_entry(post: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Compact match record for watchdog alerts and search results."""
|
||||||
|
lat, lng = None, None
|
||||||
|
coords = post.get("coords")
|
||||||
|
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||||
|
lat, lng = coords[0], coords[1]
|
||||||
|
return {
|
||||||
|
"source": "telegram_osint",
|
||||||
|
"title": telegram_post_display_title(post),
|
||||||
|
"original_title": str(post.get("title") or "").strip(),
|
||||||
|
"url": post.get("link") or "",
|
||||||
|
"channel": post.get("channel") or post.get("source") or "",
|
||||||
|
"risk_score": post.get("risk_score"),
|
||||||
|
"source_lang": post.get("source_lang"),
|
||||||
|
"source_lang_label": post.get("source_lang_label") or source_lang_label(post.get("source_lang")),
|
||||||
|
"lat": lat,
|
||||||
|
"lng": lng,
|
||||||
|
"id": post.get("id") or post.get("link") or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def keyword_matches_telegram_post(post: dict[str, Any], keyword: str) -> bool:
|
||||||
|
needle = str(keyword or "").strip().lower()
|
||||||
|
if not needle:
|
||||||
|
return False
|
||||||
|
return needle in telegram_post_search_text(post)
|
||||||
@@ -0,0 +1,243 @@
|
|||||||
|
"""Auto-translation for Telegram OSINT post text (server-side, cached)."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
from threading import Lock
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_CYRILLIC_RE = re.compile(r"[\u0400-\u04FF]")
|
||||||
|
_UKRAINIAN_MARKERS_RE = re.compile(r"[іїєґІЇЄҐ]")
|
||||||
|
_ARABIC_RE = re.compile(r"[\u0600-\u06FF]")
|
||||||
|
_HEBREW_RE = re.compile(r"[\u0590-\u05FF]")
|
||||||
|
_CJK_RE = re.compile(r"[\u4e00-\u9fff]")
|
||||||
|
|
||||||
|
# Common war-reporting shorthand that machine translation often transliterates.
|
||||||
|
_POST_TRANSLATION_GLOSSARY: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||||
|
(re.compile(r"\bBpLa\b", re.IGNORECASE), "UAV"),
|
||||||
|
(re.compile(r"\bБпЛА\b", re.IGNORECASE), "UAV"),
|
||||||
|
(re.compile(r"\bбпла\b"), "UAV"),
|
||||||
|
(re.compile(r"\bБПЛА\b"), "UAV"),
|
||||||
|
(re.compile(r"\bрсзв\b", re.IGNORECASE), "MLRS"),
|
||||||
|
(re.compile(r"\bРСЗВ\b"), "MLRS"),
|
||||||
|
)
|
||||||
|
|
||||||
|
_SOURCE_LANG_LABELS = {
|
||||||
|
"uk": "Ukrainian",
|
||||||
|
"ru": "Russian",
|
||||||
|
"en": "English",
|
||||||
|
"ar": "Arabic",
|
||||||
|
"he": "Hebrew",
|
||||||
|
"zh-cn": "Chinese",
|
||||||
|
"fr": "French",
|
||||||
|
"de": "German",
|
||||||
|
"pl": "Polish",
|
||||||
|
}
|
||||||
|
|
||||||
|
_CACHE: dict[str, tuple[str, str]] = {}
|
||||||
|
_CACHE_LOCK = Lock()
|
||||||
|
_CACHE_MAX = 512
|
||||||
|
|
||||||
|
_LOCALE_TO_GOOGLE = {
|
||||||
|
"en": "en",
|
||||||
|
"fr": "fr",
|
||||||
|
"zh-cn": "zh-CN",
|
||||||
|
"zh": "zh-CN",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def telegram_translate_enabled() -> bool:
|
||||||
|
return str(os.environ.get("TELEGRAM_OSINT_TRANSLATE", "true")).strip().lower() not in {
|
||||||
|
"0",
|
||||||
|
"false",
|
||||||
|
"no",
|
||||||
|
"off",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def telegram_translate_target() -> str:
|
||||||
|
raw = str(os.environ.get("TELEGRAM_OSINT_TRANSLATE_TO", "en")).strip().lower()
|
||||||
|
return _LOCALE_TO_GOOGLE.get(raw, raw or "en")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_translate_target(locale: str | None) -> str:
|
||||||
|
raw = str(locale or telegram_translate_target()).strip().lower().replace("_", "-")
|
||||||
|
return _LOCALE_TO_GOOGLE.get(raw, raw or "en")
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_english(text: str) -> bool:
|
||||||
|
letters = [char for char in text if char.isalpha()]
|
||||||
|
if not letters:
|
||||||
|
return True
|
||||||
|
ascii_letters = sum(1 for char in letters if ord(char) < 128)
|
||||||
|
return ascii_letters / len(letters) > 0.9
|
||||||
|
|
||||||
|
|
||||||
|
def contains_cyrillic(text: str) -> bool:
|
||||||
|
return bool(_CYRILLIC_RE.search(str(text or "")))
|
||||||
|
|
||||||
|
|
||||||
|
def source_lang_label(code: str | None) -> str:
|
||||||
|
raw = str(code or "").strip().lower().replace("_", "-")
|
||||||
|
return _SOURCE_LANG_LABELS.get(raw, raw.upper() if raw else "Unknown")
|
||||||
|
|
||||||
|
|
||||||
|
def polish_translation(text: str) -> str:
|
||||||
|
polished = str(text or "")
|
||||||
|
for pattern, replacement in _POST_TRANSLATION_GLOSSARY:
|
||||||
|
polished = pattern.sub(replacement, polished)
|
||||||
|
return polished.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def guess_source_lang(text: str) -> str:
|
||||||
|
if _UKRAINIAN_MARKERS_RE.search(text):
|
||||||
|
return "uk"
|
||||||
|
if _CYRILLIC_RE.search(text):
|
||||||
|
return "ru"
|
||||||
|
if _ARABIC_RE.search(text):
|
||||||
|
return "ar"
|
||||||
|
if _HEBREW_RE.search(text):
|
||||||
|
return "he"
|
||||||
|
if _CJK_RE.search(text):
|
||||||
|
return "zh-CN"
|
||||||
|
if _looks_english(text):
|
||||||
|
return "en"
|
||||||
|
return "auto"
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_key(text: str, target_lang: str) -> str:
|
||||||
|
digest = hashlib.sha1(f"{target_lang}|{text}".encode("utf-8")).hexdigest()
|
||||||
|
return digest
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_get(text: str, target_lang: str) -> tuple[str, str] | None:
|
||||||
|
key = _cache_key(text, target_lang)
|
||||||
|
with _CACHE_LOCK:
|
||||||
|
return _CACHE.get(key)
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_put(text: str, target_lang: str, translated: str, source_lang: str) -> None:
|
||||||
|
key = _cache_key(text, target_lang)
|
||||||
|
with _CACHE_LOCK:
|
||||||
|
if len(_CACHE) >= _CACHE_MAX:
|
||||||
|
_CACHE.pop(next(iter(_CACHE)))
|
||||||
|
_CACHE[key] = (translated, source_lang)
|
||||||
|
|
||||||
|
|
||||||
|
def _google_translate(clean: str, target: str, source: str | None = None) -> tuple[str, str]:
|
||||||
|
params = {
|
||||||
|
"client": "gtx",
|
||||||
|
"sl": source or "auto",
|
||||||
|
"tl": target,
|
||||||
|
"dt": "t",
|
||||||
|
"q": clean[:4500],
|
||||||
|
}
|
||||||
|
url = "https://translate.googleapis.com/translate_a/single?" + urllib.parse.urlencode(params)
|
||||||
|
resp = requests.get(
|
||||||
|
url,
|
||||||
|
timeout=8,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0 (compatible; Shadowbroker-Telegram-Translate/1.0)"},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
detected = str(data[2] or guess_source_lang(clean)).strip().lower()
|
||||||
|
if detected in {"zh-cn", "zh-tw"}:
|
||||||
|
detected = "zh-CN"
|
||||||
|
parts: list[str] = []
|
||||||
|
for chunk in data[0] or []:
|
||||||
|
if chunk and chunk[0]:
|
||||||
|
parts.append(str(chunk[0]))
|
||||||
|
translated = polish_translation("".join(parts).strip() or clean)
|
||||||
|
return translated, detected
|
||||||
|
|
||||||
|
|
||||||
|
def translate_text(text: str, target_lang: str | None = None) -> tuple[str, str]:
|
||||||
|
"""Translate text via Google Translate (unofficial client endpoint).
|
||||||
|
|
||||||
|
Returns ``(translated_text, detected_source_lang)``.
|
||||||
|
"""
|
||||||
|
clean = str(text or "").strip()
|
||||||
|
if not clean:
|
||||||
|
return "", "en"
|
||||||
|
|
||||||
|
target = normalize_translate_target(target_lang)
|
||||||
|
if _looks_english(clean) and target == "en":
|
||||||
|
return clean, "en"
|
||||||
|
|
||||||
|
cached = _cache_get(clean, target)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
translated, detected = _google_translate(clean, target)
|
||||||
|
if detected == target or (detected == "en" and target == "en"):
|
||||||
|
result = (clean, detected)
|
||||||
|
_cache_put(clean, target, clean, detected)
|
||||||
|
return result
|
||||||
|
if contains_cyrillic(translated) and contains_cyrillic(clean):
|
||||||
|
hinted = guess_source_lang(clean)
|
||||||
|
if hinted not in {"auto", target}:
|
||||||
|
retry_translated, retry_detected = _google_translate(clean, target, hinted)
|
||||||
|
if not contains_cyrillic(retry_translated) or len(retry_translated) > len(translated):
|
||||||
|
translated, detected = retry_translated, retry_detected
|
||||||
|
result = (translated, detected)
|
||||||
|
_cache_put(clean, target, translated, detected)
|
||||||
|
return result
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Telegram translation failed: %s", exc)
|
||||||
|
fallback_lang = guess_source_lang(clean)
|
||||||
|
return clean, fallback_lang
|
||||||
|
|
||||||
|
|
||||||
|
def apply_post_translation(post: dict[str, Any], target_lang: str | None = None) -> dict[str, Any]:
|
||||||
|
"""Add translation fields to a Telegram OSINT post dict."""
|
||||||
|
if not telegram_translate_enabled():
|
||||||
|
return post
|
||||||
|
|
||||||
|
target = normalize_translate_target(target_lang)
|
||||||
|
description = str(post.get("description") or "").strip()
|
||||||
|
title = str(post.get("title") or "").strip()
|
||||||
|
full_text = description or title
|
||||||
|
if not full_text:
|
||||||
|
return post
|
||||||
|
|
||||||
|
existing_translated = str(post.get("description_translated") or post.get("title_translated") or "").strip()
|
||||||
|
if post.get("translate_to") == target and existing_translated:
|
||||||
|
updated = dict(post)
|
||||||
|
polished = polish_translation(existing_translated)
|
||||||
|
if polished != existing_translated:
|
||||||
|
lines = polished.split("\n", 1)
|
||||||
|
updated["title_translated"] = lines[0][:160]
|
||||||
|
updated["description_translated"] = polished[:1200]
|
||||||
|
updated["source_lang_label"] = source_lang_label(str(post.get("source_lang") or ""))
|
||||||
|
return updated
|
||||||
|
|
||||||
|
translated_full, source_lang = translate_text(full_text, target)
|
||||||
|
updated = dict(post)
|
||||||
|
updated["source_lang"] = source_lang
|
||||||
|
updated["translate_to"] = target
|
||||||
|
updated["source_lang_label"] = source_lang_label(source_lang)
|
||||||
|
|
||||||
|
if translated_full != full_text and source_lang != target:
|
||||||
|
lines = translated_full.split("\n", 1)
|
||||||
|
updated["title_translated"] = lines[0][:160]
|
||||||
|
updated["description_translated"] = translated_full[:1200]
|
||||||
|
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def apply_posts_translations(
|
||||||
|
posts: list[dict[str, Any]],
|
||||||
|
target_lang: str | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
if not telegram_translate_enabled():
|
||||||
|
return posts
|
||||||
|
return [apply_post_translation(post, target_lang) for post in posts]
|
||||||
@@ -710,10 +710,10 @@ _UNIVERSAL_SEARCH_SPECS: dict[str, dict[str, Any]] = {
|
|||||||
"time_fields": ("updated_at", "timestamp"),
|
"time_fields": ("updated_at", "timestamp"),
|
||||||
},
|
},
|
||||||
"telegram_osint": {
|
"telegram_osint": {
|
||||||
"fields": ("title", "description", "source", "channel", "link"),
|
"fields": ("title", "description", "title_translated", "description_translated", "source", "channel", "link"),
|
||||||
"primary_fields": ("title", "description", "channel"),
|
"primary_fields": ("title_translated", "title", "description_translated", "description", "channel"),
|
||||||
"label_fields": ("title", "channel"),
|
"label_fields": ("title_translated", "title", "channel"),
|
||||||
"summary_fields": ("description", "source"),
|
"summary_fields": ("description_translated", "description", "source"),
|
||||||
"type_fields": ("channel", "source"),
|
"type_fields": ("channel", "source"),
|
||||||
"id_fields": ("id", "link"),
|
"id_fields": ("id", "link"),
|
||||||
"time_fields": ("published", "timestamp"),
|
"time_fields": ("published", "timestamp"),
|
||||||
@@ -2089,30 +2089,27 @@ def search_news(
|
|||||||
return {"results": out, "version": get_data_version(), "truncated": True}
|
return {"results": out, "version": get_data_version(), "truncated": True}
|
||||||
|
|
||||||
if include_telegram:
|
if include_telegram:
|
||||||
|
from services.telegram_osint_text import telegram_post_display_title, telegram_post_search_text
|
||||||
|
|
||||||
for post in _unwrap_layer_items(snap.get("telegram_osint"), "telegram_osint"):
|
for post in _unwrap_layer_items(snap.get("telegram_osint"), "telegram_osint"):
|
||||||
if not isinstance(post, dict):
|
if not isinstance(post, dict):
|
||||||
continue
|
continue
|
||||||
text = " ".join(
|
text = telegram_post_search_text(post)
|
||||||
(
|
|
||||||
_norm_text(post.get("title")),
|
|
||||||
_norm_text(post.get("description")),
|
|
||||||
_norm_text(post.get("source")),
|
|
||||||
_norm_text(post.get("channel")),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if not _text_matches_query(query_norm, text):
|
if not _text_matches_query(query_norm, text):
|
||||||
continue
|
continue
|
||||||
lat, lng = _extract_coords(post)
|
lat, lng = _extract_coords(post)
|
||||||
out.append(
|
out.append(
|
||||||
{
|
{
|
||||||
"source_layer": "telegram_osint",
|
"source_layer": "telegram_osint",
|
||||||
"title": post.get("title") or "",
|
"title": telegram_post_display_title(post),
|
||||||
"summary": post.get("description") or "",
|
"summary": post.get("description_translated") or post.get("description") or "",
|
||||||
"source": post.get("source") or post.get("channel") or "Telegram",
|
"source": post.get("source") or post.get("channel") or "Telegram",
|
||||||
"link": post.get("link") or "",
|
"link": post.get("link") or "",
|
||||||
"lat": lat,
|
"lat": lat,
|
||||||
"lng": lng,
|
"lng": lng,
|
||||||
"risk_score": post.get("risk_score"),
|
"risk_score": post.get("risk_score"),
|
||||||
|
"source_lang": post.get("source_lang"),
|
||||||
|
"source_lang_label": post.get("source_lang_label"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if len(out) >= limit:
|
if len(out) >= limit:
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
"""Telegram OSINT auto-translation."""
|
||||||
|
|
||||||
|
from services import telegram_translate
|
||||||
|
|
||||||
|
|
||||||
|
def test_guess_source_lang_detects_cyrillic():
|
||||||
|
assert telegram_translate.guess_source_lang("В Крым поедем несмотря ни на что") == "ru"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_post_translation_skips_english(monkeypatch):
|
||||||
|
monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True)
|
||||||
|
post = {
|
||||||
|
"title": "Missile strike reported near Kyiv overnight.",
|
||||||
|
"description": "Missile strike reported near Kyiv overnight.",
|
||||||
|
}
|
||||||
|
enriched = telegram_translate.apply_post_translation(post, "en")
|
||||||
|
assert enriched["source_lang"] == "en"
|
||||||
|
assert "title_translated" not in enriched
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_post_translation_adds_fields(monkeypatch):
|
||||||
|
monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
telegram_translate,
|
||||||
|
"translate_text",
|
||||||
|
lambda text, target_lang=None: (
|
||||||
|
"We will go to Crimea no matter what. This is our homeland!",
|
||||||
|
"ru",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
post = {
|
||||||
|
"title": "«В Крым поедем несмотря ни на что. Это наша родина!»",
|
||||||
|
"description": "«В Крым поедем несмотря ни на что. Это наша родина!»",
|
||||||
|
}
|
||||||
|
enriched = telegram_translate.apply_post_translation(post, "en")
|
||||||
|
assert enriched["source_lang"] == "ru"
|
||||||
|
assert enriched["translate_to"] == "en"
|
||||||
|
assert "Crimea" in enriched["title_translated"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_translate_target_maps_ui_locales():
|
||||||
|
assert telegram_translate.normalize_translate_target("zh-CN") == "zh-CN"
|
||||||
|
assert telegram_translate.normalize_translate_target("fr") == "fr"
|
||||||
|
|
||||||
|
|
||||||
|
def test_source_lang_label_avoids_uk_country_confusion():
|
||||||
|
assert telegram_translate.source_lang_label("uk") == "Ukrainian"
|
||||||
|
assert telegram_translate.source_lang_label("ru") == "Russian"
|
||||||
|
|
||||||
|
|
||||||
|
def test_polish_translation_expands_bpla_shorthand():
|
||||||
|
assert "UAV" in telegram_translate.polish_translation("Kyiv 1x BpLa on Rembazu.")
|
||||||
|
|
||||||
|
|
||||||
|
def test_guess_source_lang_prefers_ukrainian_markers():
|
||||||
|
assert telegram_translate.guess_source_lang("Київ 1х БпЛА") == "uk"
|
||||||
@@ -93,6 +93,8 @@ services:
|
|||||||
- TELEGRAM_OSINT_ENABLED=${TELEGRAM_OSINT_ENABLED:-true}
|
- TELEGRAM_OSINT_ENABLED=${TELEGRAM_OSINT_ENABLED:-true}
|
||||||
- TELEGRAM_OSINT_CHANNELS=${TELEGRAM_OSINT_CHANNELS:-}
|
- TELEGRAM_OSINT_CHANNELS=${TELEGRAM_OSINT_CHANNELS:-}
|
||||||
- TELEGRAM_OSINT_INTERVAL_MINUTES=${TELEGRAM_OSINT_INTERVAL_MINUTES:-60}
|
- TELEGRAM_OSINT_INTERVAL_MINUTES=${TELEGRAM_OSINT_INTERVAL_MINUTES:-60}
|
||||||
|
- TELEGRAM_OSINT_TRANSLATE=${TELEGRAM_OSINT_TRANSLATE:-true}
|
||||||
|
- TELEGRAM_OSINT_TRANSLATE_TO=${TELEGRAM_OSINT_TRANSLATE_TO:-en}
|
||||||
volumes:
|
volumes:
|
||||||
- backend_data:/app/data
|
- backend_data:/app/data
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import React, { useMemo } from 'react';
|
import React, { useEffect, useMemo, useState } from 'react';
|
||||||
import { Popup } from 'react-map-gl/maplibre';
|
import { Popup } from 'react-map-gl/maplibre';
|
||||||
import { Radio } from 'lucide-react';
|
import { Radio } from 'lucide-react';
|
||||||
import { useTranslation } from '@/i18n';
|
import { useTranslation } from '@/i18n';
|
||||||
@@ -69,11 +69,58 @@ function riskTheme(rs: number) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function postHeadline(post: TelegramOsintPost): string {
|
const CYRILLIC_RE = /[\u0400-\u04FF]/;
|
||||||
return String(post.title || post.description || 'Telegram intercept').trim();
|
|
||||||
|
function containsCyrillic(text: string): boolean {
|
||||||
|
return CYRILLIC_RE.test(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function postDetail(post: TelegramOsintPost): string | null {
|
function sourceLangLabel(post: TelegramOsintPost): string {
|
||||||
|
if (post.source_lang_label) return post.source_lang_label;
|
||||||
|
const code = String(post.source_lang || '').trim().toLowerCase();
|
||||||
|
const labels: Record<string, string> = {
|
||||||
|
uk: 'Ukrainian',
|
||||||
|
ru: 'Russian',
|
||||||
|
en: 'English',
|
||||||
|
ar: 'Arabic',
|
||||||
|
he: 'Hebrew',
|
||||||
|
'zh-cn': 'Chinese',
|
||||||
|
fr: 'French',
|
||||||
|
de: 'German',
|
||||||
|
pl: 'Polish',
|
||||||
|
};
|
||||||
|
return labels[code] || code.toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasTranslation(post: TelegramOsintPost): boolean {
|
||||||
|
const translated = String(post.title_translated || post.description_translated || '').trim();
|
||||||
|
const original = String(post.title || post.description || '').trim();
|
||||||
|
return Boolean(translated && translated !== original);
|
||||||
|
}
|
||||||
|
|
||||||
|
function postHeadline(post: TelegramOsintPost, showOriginal: boolean): string {
|
||||||
|
const original = String(post.title || post.description || 'Telegram intercept').trim();
|
||||||
|
const translated = String(post.title_translated || post.description_translated || '').trim();
|
||||||
|
if (!showOriginal && translated) {
|
||||||
|
return translated.split('\n', 1)[0].trim();
|
||||||
|
}
|
||||||
|
if (!showOriginal && containsCyrillic(original) && translated) {
|
||||||
|
return translated.split('\n', 1)[0].trim();
|
||||||
|
}
|
||||||
|
return original;
|
||||||
|
}
|
||||||
|
|
||||||
|
function postDetail(post: TelegramOsintPost, showOriginal: boolean): string | null {
|
||||||
|
if (!showOriginal && post.description_translated) {
|
||||||
|
const translatedTitle = String(post.title_translated || '').trim();
|
||||||
|
const translatedBody = String(post.description_translated || '').trim();
|
||||||
|
if (!translatedBody || translatedBody === translatedTitle) return null;
|
||||||
|
const extra = translatedBody.startsWith(translatedTitle)
|
||||||
|
? translatedBody.slice(translatedTitle.length).trim()
|
||||||
|
: translatedBody;
|
||||||
|
return extra || null;
|
||||||
|
}
|
||||||
|
|
||||||
const title = String(post.title || '').trim();
|
const title = String(post.title || '').trim();
|
||||||
const description = String(post.description || '').trim();
|
const description = String(post.description || '').trim();
|
||||||
if (!description || description === title || description.startsWith(title)) return null;
|
if (!description || description === title || description.startsWith(title)) return null;
|
||||||
@@ -126,10 +173,12 @@ function TelegramPostMedia({ post }: { post: TelegramOsintPost }) {
|
|||||||
|
|
||||||
function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
const [showOriginal, setShowOriginal] = useState(false);
|
||||||
const rs = post.risk_score ?? 1;
|
const rs = post.risk_score ?? 1;
|
||||||
const theme = riskTheme(rs);
|
const theme = riskTheme(rs);
|
||||||
const headline = postHeadline(post);
|
const translated = hasTranslation(post);
|
||||||
const detail = postDetail(post);
|
const headline = postHeadline(post, showOriginal);
|
||||||
|
const detail = postDetail(post, showOriginal);
|
||||||
const isHigh = rs >= 8;
|
const isHigh = rs >= 8;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -150,12 +199,29 @@ function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
|||||||
<p className="text-[11px] text-[var(--text-muted)] leading-relaxed whitespace-pre-wrap">{detail}</p>
|
<p className="text-[11px] text-[var(--text-muted)] leading-relaxed whitespace-pre-wrap">{detail}</p>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
{translated && !showOriginal && post.source_lang ? (
|
||||||
|
<p className="text-[10px] text-cyan-700/80 uppercase tracking-wider">
|
||||||
|
{t('telegram.translatedFrom').replace('{lang}', sourceLangLabel(post))}
|
||||||
|
</p>
|
||||||
|
) : null}
|
||||||
|
|
||||||
<TelegramPostMedia post={post} />
|
<TelegramPostMedia post={post} />
|
||||||
|
|
||||||
<div className="flex items-center gap-1.5 mt-1 flex-wrap">
|
<div className="flex items-center gap-1.5 mt-1 flex-wrap">
|
||||||
<span className={`text-[11px] font-bold font-mono px-1.5 py-0.5 rounded-sm border ${theme.badgeClass}`}>
|
<span className={`text-[11px] font-bold font-mono px-1.5 py-0.5 rounded-sm border ${theme.badgeClass}`}>
|
||||||
{isHigh ? 'BREAKING' : `LVL: ${rs}/10`}
|
{isHigh ? 'BREAKING' : `LVL: ${rs}/10`}
|
||||||
</span>
|
</span>
|
||||||
|
{translated ? (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => setShowOriginal((prev) => !prev)}
|
||||||
|
className="text-[11px] font-mono text-cyan-600 hover:text-cyan-300 transition-colors"
|
||||||
|
>
|
||||||
|
{showOriginal
|
||||||
|
? t('telegram.showTranslation')
|
||||||
|
: t('telegram.showOriginal').replace('{lang}', sourceLangLabel(post))}
|
||||||
|
</button>
|
||||||
|
) : null}
|
||||||
{post.link ? (
|
{post.link ? (
|
||||||
<a
|
<a
|
||||||
href={post.link}
|
href={post.link}
|
||||||
@@ -172,15 +238,49 @@ function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPopupProps) {
|
export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPopupProps) {
|
||||||
const { t } = useTranslation();
|
const { t, locale } = useTranslation();
|
||||||
|
const [localizedPosts, setLocalizedPosts] = useState(posts);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setLocalizedPosts(posts);
|
||||||
|
}, [posts]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const needsLocalizedFeed = posts.some((post) => !hasTranslation(post));
|
||||||
|
if (!needsLocalizedFeed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let cancelled = false;
|
||||||
|
const controller = new AbortController();
|
||||||
|
|
||||||
|
fetch(`/api/telegram-feed?lang=${encodeURIComponent(locale)}`, { signal: controller.signal })
|
||||||
|
.then((response) => (response.ok ? response.json() : null))
|
||||||
|
.then((payload) => {
|
||||||
|
if (cancelled || !payload?.posts) return;
|
||||||
|
const byId = new Map(
|
||||||
|
(payload.posts as TelegramOsintPost[]).map((post) => [post.id, post]),
|
||||||
|
);
|
||||||
|
setLocalizedPosts(posts.map((post) => byId.get(post.id) || post));
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
/* keep feed posts when locale translation fetch fails */
|
||||||
|
});
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
cancelled = true;
|
||||||
|
controller.abort();
|
||||||
|
};
|
||||||
|
}, [locale, posts]);
|
||||||
|
|
||||||
const sortedPosts = useMemo(
|
const sortedPosts = useMemo(
|
||||||
() =>
|
() =>
|
||||||
[...posts].sort(
|
[...localizedPosts].sort(
|
||||||
(a, b) =>
|
(a, b) =>
|
||||||
(b.risk_score ?? 0) - (a.risk_score ?? 0) ||
|
(b.risk_score ?? 0) - (a.risk_score ?? 0) ||
|
||||||
String(b.published || '').localeCompare(String(a.published || '')),
|
String(b.published || '').localeCompare(String(a.published || '')),
|
||||||
),
|
),
|
||||||
[posts],
|
[localizedPosts],
|
||||||
);
|
);
|
||||||
|
|
||||||
const maxRisk = sortedPosts[0]?.risk_score ?? 1;
|
const maxRisk = sortedPosts[0]?.risk_score ?? 1;
|
||||||
@@ -252,4 +352,4 @@ export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPo
|
|||||||
</div>
|
</div>
|
||||||
</Popup>
|
</Popup>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -273,6 +273,9 @@
|
|||||||
"loadMedia": "VIEW MEDIA (TELEGRAM)",
|
"loadMedia": "VIEW MEDIA (TELEGRAM)",
|
||||||
"openOriginal": "OPEN ON TELEGRAM →",
|
"openOriginal": "OPEN ON TELEGRAM →",
|
||||||
"embedTitle": "Telegram post embed",
|
"embedTitle": "Telegram post embed",
|
||||||
"postsAtLocation": "{count} posts at this location — scroll for more"
|
"postsAtLocation": "{count} posts at this location — scroll for more",
|
||||||
|
"translatedFrom": "Translated from {lang}",
|
||||||
|
"showOriginal": "SHOW ORIGINAL ({lang})",
|
||||||
|
"showTranslation": "SHOW TRANSLATION"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -273,6 +273,9 @@
|
|||||||
"loadMedia": "AFFICHER LE MÉDIA (TELEGRAM)",
|
"loadMedia": "AFFICHER LE MÉDIA (TELEGRAM)",
|
||||||
"openOriginal": "OUVRIR SUR TELEGRAM →",
|
"openOriginal": "OUVRIR SUR TELEGRAM →",
|
||||||
"embedTitle": "Intégration Telegram",
|
"embedTitle": "Intégration Telegram",
|
||||||
"postsAtLocation": "{count} posts à cet endroit — faites défiler"
|
"postsAtLocation": "{count} posts à cet endroit — faites défiler",
|
||||||
|
"translatedFrom": "Traduit depuis le {lang}",
|
||||||
|
"showOriginal": "AFFICHER L'ORIGINAL ({lang})",
|
||||||
|
"showTranslation": "AFFICHER LA TRADUCTION"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -273,6 +273,9 @@
|
|||||||
"loadMedia": "查看媒体(Telegram)",
|
"loadMedia": "查看媒体(Telegram)",
|
||||||
"openOriginal": "在 Telegram 打开 →",
|
"openOriginal": "在 Telegram 打开 →",
|
||||||
"embedTitle": "Telegram 帖子嵌入",
|
"embedTitle": "Telegram 帖子嵌入",
|
||||||
"postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多"
|
"postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多",
|
||||||
|
"translatedFrom": "译自{lang}",
|
||||||
|
"showOriginal": "显示原文({lang})",
|
||||||
|
"showTranslation": "显示译文"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -972,6 +972,11 @@ export interface TelegramOsintPost {
|
|||||||
id: string;
|
id: string;
|
||||||
title?: string;
|
title?: string;
|
||||||
description?: string;
|
description?: string;
|
||||||
|
title_translated?: string;
|
||||||
|
description_translated?: string;
|
||||||
|
source_lang?: string;
|
||||||
|
source_lang_label?: string;
|
||||||
|
translate_to?: string;
|
||||||
link?: string;
|
link?: string;
|
||||||
published?: string;
|
published?: string;
|
||||||
source?: string;
|
source?: string;
|
||||||
|
|||||||
Reference in New Issue
Block a user