mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-17 11:30:13 +02:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c9c9a5262c |
@@ -26,6 +26,8 @@ AIS_API_KEY=
|
||||
# Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token).
|
||||
# TELEGRAM_OSINT_ENABLED=true
|
||||
# TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor
|
||||
# TELEGRAM_OSINT_TRANSLATE=true
|
||||
# TELEGRAM_OSINT_TRANSLATE_TO=en
|
||||
|
||||
# Admin key to protect sensitive endpoints (settings, updates).
|
||||
# If blank, loopback/localhost requests still work for local single-host dev.
|
||||
|
||||
@@ -14,6 +14,7 @@ from services.fetchers._store import get_latest_data_subset_refs
|
||||
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
||||
from services.intel_feeds.country_risk import build_country_risk_payload
|
||||
from services.network_utils import outbound_user_agent
|
||||
from services.telegram_translate import apply_posts_translations, normalize_translate_target
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,12 +46,19 @@ async def country_risk(request: Request) -> dict:
|
||||
|
||||
@router.get("/api/telegram-feed")
|
||||
@limiter.limit("30/minute")
|
||||
async def telegram_feed(request: Request) -> dict:
|
||||
async def telegram_feed(request: Request, lang: str | None = Query(default=None)) -> dict:
|
||||
snap = get_latest_data_subset_refs("telegram_osint")
|
||||
payload = snap.get("telegram_osint")
|
||||
if isinstance(payload, dict) and payload.get("posts") is not None:
|
||||
return payload
|
||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||
if not isinstance(payload, dict) or payload.get("posts") is None:
|
||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||
|
||||
if lang:
|
||||
target = normalize_translate_target(lang)
|
||||
localized = dict(payload)
|
||||
localized["posts"] = apply_posts_translations(list(payload.get("posts") or []), target)
|
||||
localized["translate_locale"] = target
|
||||
return localized
|
||||
return payload
|
||||
|
||||
|
||||
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import html
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
@@ -11,6 +12,7 @@ from typing import Any
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.fetchers.news import resolve_coords_match
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.telegram_translate import apply_post_translation, apply_posts_translations
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -174,13 +176,7 @@ def _extract_media(block: str, link: str) -> dict[str, Any]:
|
||||
def _strip_html(text: str) -> str:
|
||||
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
return (
|
||||
cleaned.replace(""", '"')
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.strip()
|
||||
)
|
||||
return html.unescape(cleaned).strip()
|
||||
|
||||
|
||||
def _score_risk(text: str) -> int:
|
||||
@@ -293,20 +289,19 @@ def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]]
|
||||
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
media = _extract_media(block, link)
|
||||
posts.append(
|
||||
{
|
||||
"id": post_id,
|
||||
"title": title,
|
||||
"description": text[:1200],
|
||||
"link": link,
|
||||
"published": published,
|
||||
"source": f"t.me/{channel}",
|
||||
"channel": channel,
|
||||
"risk_score": risk_score,
|
||||
"coords": [coords[0], coords[1]] if coords else None,
|
||||
**media,
|
||||
}
|
||||
)
|
||||
post = {
|
||||
"id": post_id,
|
||||
"title": title,
|
||||
"description": text[:1200],
|
||||
"link": link,
|
||||
"published": published,
|
||||
"source": f"t.me/{channel}",
|
||||
"channel": channel,
|
||||
"risk_score": risk_score,
|
||||
"coords": [coords[0], coords[1]] if coords else None,
|
||||
**media,
|
||||
}
|
||||
posts.append(apply_post_translation(post))
|
||||
return posts
|
||||
|
||||
|
||||
@@ -358,6 +353,7 @@ def fetch_telegram_osint() -> dict[str, Any]:
|
||||
|
||||
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
||||
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
||||
merged_posts = apply_posts_translations(merged_posts)
|
||||
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
||||
|
||||
payload = {
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
"""Shared Telegram OSINT post text helpers for search and watchdog matching."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from services.telegram_translate import source_lang_label
|
||||
|
||||
|
||||
def iter_telegram_posts(layer_payload: Any) -> list[dict[str, Any]]:
|
||||
"""Normalize telegram_osint layer payloads into a list of post dicts."""
|
||||
if isinstance(layer_payload, list):
|
||||
return [post for post in layer_payload if isinstance(post, dict)]
|
||||
if isinstance(layer_payload, dict):
|
||||
posts = layer_payload.get("posts")
|
||||
if isinstance(posts, list):
|
||||
return [post for post in posts if isinstance(post, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def telegram_post_search_text(post: dict[str, Any]) -> str:
|
||||
"""Build a lowercase haystack for keyword matching (translated + original)."""
|
||||
parts = (
|
||||
post.get("title_translated"),
|
||||
post.get("description_translated"),
|
||||
post.get("title"),
|
||||
post.get("description"),
|
||||
post.get("source"),
|
||||
post.get("channel"),
|
||||
)
|
||||
return " ".join(str(part).strip() for part in parts if str(part or "").strip()).lower()
|
||||
|
||||
|
||||
def telegram_post_display_title(post: dict[str, Any]) -> str:
|
||||
"""Prefer translated headline for alerts and agent-facing summaries."""
|
||||
translated = str(post.get("title_translated") or post.get("description_translated") or "").strip()
|
||||
if translated:
|
||||
return translated.split("\n", 1)[0][:200]
|
||||
return str(post.get("title") or post.get("description") or "").strip()[:200]
|
||||
|
||||
|
||||
def telegram_post_match_entry(post: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Compact match record for watchdog alerts and search results."""
|
||||
lat, lng = None, None
|
||||
coords = post.get("coords")
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
lat, lng = coords[0], coords[1]
|
||||
return {
|
||||
"source": "telegram_osint",
|
||||
"title": telegram_post_display_title(post),
|
||||
"original_title": str(post.get("title") or "").strip(),
|
||||
"url": post.get("link") or "",
|
||||
"channel": post.get("channel") or post.get("source") or "",
|
||||
"risk_score": post.get("risk_score"),
|
||||
"source_lang": post.get("source_lang"),
|
||||
"source_lang_label": post.get("source_lang_label") or source_lang_label(post.get("source_lang")),
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"id": post.get("id") or post.get("link") or "",
|
||||
}
|
||||
|
||||
|
||||
def keyword_matches_telegram_post(post: dict[str, Any], keyword: str) -> bool:
|
||||
needle = str(keyword or "").strip().lower()
|
||||
if not needle:
|
||||
return False
|
||||
return needle in telegram_post_search_text(post)
|
||||
@@ -0,0 +1,243 @@
|
||||
"""Auto-translation for Telegram OSINT post text (server-side, cached)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
from threading import Lock
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CYRILLIC_RE = re.compile(r"[\u0400-\u04FF]")
|
||||
_UKRAINIAN_MARKERS_RE = re.compile(r"[іїєґІЇЄҐ]")
|
||||
_ARABIC_RE = re.compile(r"[\u0600-\u06FF]")
|
||||
_HEBREW_RE = re.compile(r"[\u0590-\u05FF]")
|
||||
_CJK_RE = re.compile(r"[\u4e00-\u9fff]")
|
||||
|
||||
# Common war-reporting shorthand that machine translation often transliterates.
|
||||
_POST_TRANSLATION_GLOSSARY: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||
(re.compile(r"\bBpLa\b", re.IGNORECASE), "UAV"),
|
||||
(re.compile(r"\bБпЛА\b", re.IGNORECASE), "UAV"),
|
||||
(re.compile(r"\bбпла\b"), "UAV"),
|
||||
(re.compile(r"\bБПЛА\b"), "UAV"),
|
||||
(re.compile(r"\bрсзв\b", re.IGNORECASE), "MLRS"),
|
||||
(re.compile(r"\bРСЗВ\b"), "MLRS"),
|
||||
)
|
||||
|
||||
_SOURCE_LANG_LABELS = {
|
||||
"uk": "Ukrainian",
|
||||
"ru": "Russian",
|
||||
"en": "English",
|
||||
"ar": "Arabic",
|
||||
"he": "Hebrew",
|
||||
"zh-cn": "Chinese",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"pl": "Polish",
|
||||
}
|
||||
|
||||
_CACHE: dict[str, tuple[str, str]] = {}
|
||||
_CACHE_LOCK = Lock()
|
||||
_CACHE_MAX = 512
|
||||
|
||||
_LOCALE_TO_GOOGLE = {
|
||||
"en": "en",
|
||||
"fr": "fr",
|
||||
"zh-cn": "zh-CN",
|
||||
"zh": "zh-CN",
|
||||
}
|
||||
|
||||
|
||||
def telegram_translate_enabled() -> bool:
|
||||
return str(os.environ.get("TELEGRAM_OSINT_TRANSLATE", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
|
||||
def telegram_translate_target() -> str:
|
||||
raw = str(os.environ.get("TELEGRAM_OSINT_TRANSLATE_TO", "en")).strip().lower()
|
||||
return _LOCALE_TO_GOOGLE.get(raw, raw or "en")
|
||||
|
||||
|
||||
def normalize_translate_target(locale: str | None) -> str:
|
||||
raw = str(locale or telegram_translate_target()).strip().lower().replace("_", "-")
|
||||
return _LOCALE_TO_GOOGLE.get(raw, raw or "en")
|
||||
|
||||
|
||||
def _looks_english(text: str) -> bool:
|
||||
letters = [char for char in text if char.isalpha()]
|
||||
if not letters:
|
||||
return True
|
||||
ascii_letters = sum(1 for char in letters if ord(char) < 128)
|
||||
return ascii_letters / len(letters) > 0.9
|
||||
|
||||
|
||||
def contains_cyrillic(text: str) -> bool:
|
||||
return bool(_CYRILLIC_RE.search(str(text or "")))
|
||||
|
||||
|
||||
def source_lang_label(code: str | None) -> str:
|
||||
raw = str(code or "").strip().lower().replace("_", "-")
|
||||
return _SOURCE_LANG_LABELS.get(raw, raw.upper() if raw else "Unknown")
|
||||
|
||||
|
||||
def polish_translation(text: str) -> str:
|
||||
polished = str(text or "")
|
||||
for pattern, replacement in _POST_TRANSLATION_GLOSSARY:
|
||||
polished = pattern.sub(replacement, polished)
|
||||
return polished.strip()
|
||||
|
||||
|
||||
def guess_source_lang(text: str) -> str:
|
||||
if _UKRAINIAN_MARKERS_RE.search(text):
|
||||
return "uk"
|
||||
if _CYRILLIC_RE.search(text):
|
||||
return "ru"
|
||||
if _ARABIC_RE.search(text):
|
||||
return "ar"
|
||||
if _HEBREW_RE.search(text):
|
||||
return "he"
|
||||
if _CJK_RE.search(text):
|
||||
return "zh-CN"
|
||||
if _looks_english(text):
|
||||
return "en"
|
||||
return "auto"
|
||||
|
||||
|
||||
def _cache_key(text: str, target_lang: str) -> str:
|
||||
digest = hashlib.sha1(f"{target_lang}|{text}".encode("utf-8")).hexdigest()
|
||||
return digest
|
||||
|
||||
|
||||
def _cache_get(text: str, target_lang: str) -> tuple[str, str] | None:
|
||||
key = _cache_key(text, target_lang)
|
||||
with _CACHE_LOCK:
|
||||
return _CACHE.get(key)
|
||||
|
||||
|
||||
def _cache_put(text: str, target_lang: str, translated: str, source_lang: str) -> None:
|
||||
key = _cache_key(text, target_lang)
|
||||
with _CACHE_LOCK:
|
||||
if len(_CACHE) >= _CACHE_MAX:
|
||||
_CACHE.pop(next(iter(_CACHE)))
|
||||
_CACHE[key] = (translated, source_lang)
|
||||
|
||||
|
||||
def _google_translate(clean: str, target: str, source: str | None = None) -> tuple[str, str]:
|
||||
params = {
|
||||
"client": "gtx",
|
||||
"sl": source or "auto",
|
||||
"tl": target,
|
||||
"dt": "t",
|
||||
"q": clean[:4500],
|
||||
}
|
||||
url = "https://translate.googleapis.com/translate_a/single?" + urllib.parse.urlencode(params)
|
||||
resp = requests.get(
|
||||
url,
|
||||
timeout=8,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; Shadowbroker-Telegram-Translate/1.0)"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
detected = str(data[2] or guess_source_lang(clean)).strip().lower()
|
||||
if detected in {"zh-cn", "zh-tw"}:
|
||||
detected = "zh-CN"
|
||||
parts: list[str] = []
|
||||
for chunk in data[0] or []:
|
||||
if chunk and chunk[0]:
|
||||
parts.append(str(chunk[0]))
|
||||
translated = polish_translation("".join(parts).strip() or clean)
|
||||
return translated, detected
|
||||
|
||||
|
||||
def translate_text(text: str, target_lang: str | None = None) -> tuple[str, str]:
|
||||
"""Translate text via Google Translate (unofficial client endpoint).
|
||||
|
||||
Returns ``(translated_text, detected_source_lang)``.
|
||||
"""
|
||||
clean = str(text or "").strip()
|
||||
if not clean:
|
||||
return "", "en"
|
||||
|
||||
target = normalize_translate_target(target_lang)
|
||||
if _looks_english(clean) and target == "en":
|
||||
return clean, "en"
|
||||
|
||||
cached = _cache_get(clean, target)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
try:
|
||||
translated, detected = _google_translate(clean, target)
|
||||
if detected == target or (detected == "en" and target == "en"):
|
||||
result = (clean, detected)
|
||||
_cache_put(clean, target, clean, detected)
|
||||
return result
|
||||
if contains_cyrillic(translated) and contains_cyrillic(clean):
|
||||
hinted = guess_source_lang(clean)
|
||||
if hinted not in {"auto", target}:
|
||||
retry_translated, retry_detected = _google_translate(clean, target, hinted)
|
||||
if not contains_cyrillic(retry_translated) or len(retry_translated) > len(translated):
|
||||
translated, detected = retry_translated, retry_detected
|
||||
result = (translated, detected)
|
||||
_cache_put(clean, target, translated, detected)
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("Telegram translation failed: %s", exc)
|
||||
fallback_lang = guess_source_lang(clean)
|
||||
return clean, fallback_lang
|
||||
|
||||
|
||||
def apply_post_translation(post: dict[str, Any], target_lang: str | None = None) -> dict[str, Any]:
|
||||
"""Add translation fields to a Telegram OSINT post dict."""
|
||||
if not telegram_translate_enabled():
|
||||
return post
|
||||
|
||||
target = normalize_translate_target(target_lang)
|
||||
description = str(post.get("description") or "").strip()
|
||||
title = str(post.get("title") or "").strip()
|
||||
full_text = description or title
|
||||
if not full_text:
|
||||
return post
|
||||
|
||||
existing_translated = str(post.get("description_translated") or post.get("title_translated") or "").strip()
|
||||
if post.get("translate_to") == target and existing_translated:
|
||||
updated = dict(post)
|
||||
polished = polish_translation(existing_translated)
|
||||
if polished != existing_translated:
|
||||
lines = polished.split("\n", 1)
|
||||
updated["title_translated"] = lines[0][:160]
|
||||
updated["description_translated"] = polished[:1200]
|
||||
updated["source_lang_label"] = source_lang_label(str(post.get("source_lang") or ""))
|
||||
return updated
|
||||
|
||||
translated_full, source_lang = translate_text(full_text, target)
|
||||
updated = dict(post)
|
||||
updated["source_lang"] = source_lang
|
||||
updated["translate_to"] = target
|
||||
updated["source_lang_label"] = source_lang_label(source_lang)
|
||||
|
||||
if translated_full != full_text and source_lang != target:
|
||||
lines = translated_full.split("\n", 1)
|
||||
updated["title_translated"] = lines[0][:160]
|
||||
updated["description_translated"] = translated_full[:1200]
|
||||
|
||||
return updated
|
||||
|
||||
|
||||
def apply_posts_translations(
|
||||
posts: list[dict[str, Any]],
|
||||
target_lang: str | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
if not telegram_translate_enabled():
|
||||
return posts
|
||||
return [apply_post_translation(post, target_lang) for post in posts]
|
||||
@@ -710,10 +710,10 @@ _UNIVERSAL_SEARCH_SPECS: dict[str, dict[str, Any]] = {
|
||||
"time_fields": ("updated_at", "timestamp"),
|
||||
},
|
||||
"telegram_osint": {
|
||||
"fields": ("title", "description", "source", "channel", "link"),
|
||||
"primary_fields": ("title", "description", "channel"),
|
||||
"label_fields": ("title", "channel"),
|
||||
"summary_fields": ("description", "source"),
|
||||
"fields": ("title", "description", "title_translated", "description_translated", "source", "channel", "link"),
|
||||
"primary_fields": ("title_translated", "title", "description_translated", "description", "channel"),
|
||||
"label_fields": ("title_translated", "title", "channel"),
|
||||
"summary_fields": ("description_translated", "description", "source"),
|
||||
"type_fields": ("channel", "source"),
|
||||
"id_fields": ("id", "link"),
|
||||
"time_fields": ("published", "timestamp"),
|
||||
@@ -2089,30 +2089,27 @@ def search_news(
|
||||
return {"results": out, "version": get_data_version(), "truncated": True}
|
||||
|
||||
if include_telegram:
|
||||
from services.telegram_osint_text import telegram_post_display_title, telegram_post_search_text
|
||||
|
||||
for post in _unwrap_layer_items(snap.get("telegram_osint"), "telegram_osint"):
|
||||
if not isinstance(post, dict):
|
||||
continue
|
||||
text = " ".join(
|
||||
(
|
||||
_norm_text(post.get("title")),
|
||||
_norm_text(post.get("description")),
|
||||
_norm_text(post.get("source")),
|
||||
_norm_text(post.get("channel")),
|
||||
)
|
||||
)
|
||||
text = telegram_post_search_text(post)
|
||||
if not _text_matches_query(query_norm, text):
|
||||
continue
|
||||
lat, lng = _extract_coords(post)
|
||||
out.append(
|
||||
{
|
||||
"source_layer": "telegram_osint",
|
||||
"title": post.get("title") or "",
|
||||
"summary": post.get("description") or "",
|
||||
"title": telegram_post_display_title(post),
|
||||
"summary": post.get("description_translated") or post.get("description") or "",
|
||||
"source": post.get("source") or post.get("channel") or "Telegram",
|
||||
"link": post.get("link") or "",
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"risk_score": post.get("risk_score"),
|
||||
"source_lang": post.get("source_lang"),
|
||||
"source_lang_label": post.get("source_lang_label"),
|
||||
}
|
||||
)
|
||||
if len(out) >= limit:
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Telegram OSINT auto-translation."""
|
||||
|
||||
from services import telegram_translate
|
||||
|
||||
|
||||
def test_guess_source_lang_detects_cyrillic():
|
||||
assert telegram_translate.guess_source_lang("В Крым поедем несмотря ни на что") == "ru"
|
||||
|
||||
|
||||
def test_apply_post_translation_skips_english(monkeypatch):
|
||||
monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True)
|
||||
post = {
|
||||
"title": "Missile strike reported near Kyiv overnight.",
|
||||
"description": "Missile strike reported near Kyiv overnight.",
|
||||
}
|
||||
enriched = telegram_translate.apply_post_translation(post, "en")
|
||||
assert enriched["source_lang"] == "en"
|
||||
assert "title_translated" not in enriched
|
||||
|
||||
|
||||
def test_apply_post_translation_adds_fields(monkeypatch):
|
||||
monkeypatch.setattr(telegram_translate, "telegram_translate_enabled", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
telegram_translate,
|
||||
"translate_text",
|
||||
lambda text, target_lang=None: (
|
||||
"We will go to Crimea no matter what. This is our homeland!",
|
||||
"ru",
|
||||
),
|
||||
)
|
||||
post = {
|
||||
"title": "«В Крым поедем несмотря ни на что. Это наша родина!»",
|
||||
"description": "«В Крым поедем несмотря ни на что. Это наша родина!»",
|
||||
}
|
||||
enriched = telegram_translate.apply_post_translation(post, "en")
|
||||
assert enriched["source_lang"] == "ru"
|
||||
assert enriched["translate_to"] == "en"
|
||||
assert "Crimea" in enriched["title_translated"]
|
||||
|
||||
|
||||
def test_normalize_translate_target_maps_ui_locales():
|
||||
assert telegram_translate.normalize_translate_target("zh-CN") == "zh-CN"
|
||||
assert telegram_translate.normalize_translate_target("fr") == "fr"
|
||||
|
||||
|
||||
def test_source_lang_label_avoids_uk_country_confusion():
|
||||
assert telegram_translate.source_lang_label("uk") == "Ukrainian"
|
||||
assert telegram_translate.source_lang_label("ru") == "Russian"
|
||||
|
||||
|
||||
def test_polish_translation_expands_bpla_shorthand():
|
||||
assert "UAV" in telegram_translate.polish_translation("Kyiv 1x BpLa on Rembazu.")
|
||||
|
||||
|
||||
def test_guess_source_lang_prefers_ukrainian_markers():
|
||||
assert telegram_translate.guess_source_lang("Київ 1х БпЛА") == "uk"
|
||||
@@ -93,6 +93,8 @@ services:
|
||||
- TELEGRAM_OSINT_ENABLED=${TELEGRAM_OSINT_ENABLED:-true}
|
||||
- TELEGRAM_OSINT_CHANNELS=${TELEGRAM_OSINT_CHANNELS:-}
|
||||
- TELEGRAM_OSINT_INTERVAL_MINUTES=${TELEGRAM_OSINT_INTERVAL_MINUTES:-60}
|
||||
- TELEGRAM_OSINT_TRANSLATE=${TELEGRAM_OSINT_TRANSLATE:-true}
|
||||
- TELEGRAM_OSINT_TRANSLATE_TO=${TELEGRAM_OSINT_TRANSLATE_TO:-en}
|
||||
volumes:
|
||||
- backend_data:/app/data
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use client';
|
||||
|
||||
import React, { useMemo } from 'react';
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { Popup } from 'react-map-gl/maplibre';
|
||||
import { Radio } from 'lucide-react';
|
||||
import { useTranslation } from '@/i18n';
|
||||
@@ -69,11 +69,58 @@ function riskTheme(rs: number) {
|
||||
};
|
||||
}
|
||||
|
||||
function postHeadline(post: TelegramOsintPost): string {
|
||||
return String(post.title || post.description || 'Telegram intercept').trim();
|
||||
const CYRILLIC_RE = /[\u0400-\u04FF]/;
|
||||
|
||||
function containsCyrillic(text: string): boolean {
|
||||
return CYRILLIC_RE.test(text);
|
||||
}
|
||||
|
||||
function postDetail(post: TelegramOsintPost): string | null {
|
||||
function sourceLangLabel(post: TelegramOsintPost): string {
|
||||
if (post.source_lang_label) return post.source_lang_label;
|
||||
const code = String(post.source_lang || '').trim().toLowerCase();
|
||||
const labels: Record<string, string> = {
|
||||
uk: 'Ukrainian',
|
||||
ru: 'Russian',
|
||||
en: 'English',
|
||||
ar: 'Arabic',
|
||||
he: 'Hebrew',
|
||||
'zh-cn': 'Chinese',
|
||||
fr: 'French',
|
||||
de: 'German',
|
||||
pl: 'Polish',
|
||||
};
|
||||
return labels[code] || code.toUpperCase();
|
||||
}
|
||||
|
||||
function hasTranslation(post: TelegramOsintPost): boolean {
|
||||
const translated = String(post.title_translated || post.description_translated || '').trim();
|
||||
const original = String(post.title || post.description || '').trim();
|
||||
return Boolean(translated && translated !== original);
|
||||
}
|
||||
|
||||
function postHeadline(post: TelegramOsintPost, showOriginal: boolean): string {
|
||||
const original = String(post.title || post.description || 'Telegram intercept').trim();
|
||||
const translated = String(post.title_translated || post.description_translated || '').trim();
|
||||
if (!showOriginal && translated) {
|
||||
return translated.split('\n', 1)[0].trim();
|
||||
}
|
||||
if (!showOriginal && containsCyrillic(original) && translated) {
|
||||
return translated.split('\n', 1)[0].trim();
|
||||
}
|
||||
return original;
|
||||
}
|
||||
|
||||
function postDetail(post: TelegramOsintPost, showOriginal: boolean): string | null {
|
||||
if (!showOriginal && post.description_translated) {
|
||||
const translatedTitle = String(post.title_translated || '').trim();
|
||||
const translatedBody = String(post.description_translated || '').trim();
|
||||
if (!translatedBody || translatedBody === translatedTitle) return null;
|
||||
const extra = translatedBody.startsWith(translatedTitle)
|
||||
? translatedBody.slice(translatedTitle.length).trim()
|
||||
: translatedBody;
|
||||
return extra || null;
|
||||
}
|
||||
|
||||
const title = String(post.title || '').trim();
|
||||
const description = String(post.description || '').trim();
|
||||
if (!description || description === title || description.startsWith(title)) return null;
|
||||
@@ -126,10 +173,12 @@ function TelegramPostMedia({ post }: { post: TelegramOsintPost }) {
|
||||
|
||||
function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
||||
const { t } = useTranslation();
|
||||
const [showOriginal, setShowOriginal] = useState(false);
|
||||
const rs = post.risk_score ?? 1;
|
||||
const theme = riskTheme(rs);
|
||||
const headline = postHeadline(post);
|
||||
const detail = postDetail(post);
|
||||
const translated = hasTranslation(post);
|
||||
const headline = postHeadline(post, showOriginal);
|
||||
const detail = postDetail(post, showOriginal);
|
||||
const isHigh = rs >= 8;
|
||||
|
||||
return (
|
||||
@@ -150,12 +199,29 @@ function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
||||
<p className="text-[11px] text-[var(--text-muted)] leading-relaxed whitespace-pre-wrap">{detail}</p>
|
||||
) : null}
|
||||
|
||||
{translated && !showOriginal && post.source_lang ? (
|
||||
<p className="text-[10px] text-cyan-700/80 uppercase tracking-wider">
|
||||
{t('telegram.translatedFrom').replace('{lang}', sourceLangLabel(post))}
|
||||
</p>
|
||||
) : null}
|
||||
|
||||
<TelegramPostMedia post={post} />
|
||||
|
||||
<div className="flex items-center gap-1.5 mt-1 flex-wrap">
|
||||
<span className={`text-[11px] font-bold font-mono px-1.5 py-0.5 rounded-sm border ${theme.badgeClass}`}>
|
||||
{isHigh ? 'BREAKING' : `LVL: ${rs}/10`}
|
||||
</span>
|
||||
{translated ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowOriginal((prev) => !prev)}
|
||||
className="text-[11px] font-mono text-cyan-600 hover:text-cyan-300 transition-colors"
|
||||
>
|
||||
{showOriginal
|
||||
? t('telegram.showTranslation')
|
||||
: t('telegram.showOriginal').replace('{lang}', sourceLangLabel(post))}
|
||||
</button>
|
||||
) : null}
|
||||
{post.link ? (
|
||||
<a
|
||||
href={post.link}
|
||||
@@ -172,15 +238,49 @@ function TelegramPostCard({ post }: { post: TelegramOsintPost }) {
|
||||
}
|
||||
|
||||
export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPopupProps) {
|
||||
const { t } = useTranslation();
|
||||
const { t, locale } = useTranslation();
|
||||
const [localizedPosts, setLocalizedPosts] = useState(posts);
|
||||
|
||||
useEffect(() => {
|
||||
setLocalizedPosts(posts);
|
||||
}, [posts]);
|
||||
|
||||
useEffect(() => {
|
||||
const needsLocalizedFeed = posts.some((post) => !hasTranslation(post));
|
||||
if (!needsLocalizedFeed) {
|
||||
return;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
const controller = new AbortController();
|
||||
|
||||
fetch(`/api/telegram-feed?lang=${encodeURIComponent(locale)}`, { signal: controller.signal })
|
||||
.then((response) => (response.ok ? response.json() : null))
|
||||
.then((payload) => {
|
||||
if (cancelled || !payload?.posts) return;
|
||||
const byId = new Map(
|
||||
(payload.posts as TelegramOsintPost[]).map((post) => [post.id, post]),
|
||||
);
|
||||
setLocalizedPosts(posts.map((post) => byId.get(post.id) || post));
|
||||
})
|
||||
.catch(() => {
|
||||
/* keep feed posts when locale translation fetch fails */
|
||||
});
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
controller.abort();
|
||||
};
|
||||
}, [locale, posts]);
|
||||
|
||||
const sortedPosts = useMemo(
|
||||
() =>
|
||||
[...posts].sort(
|
||||
[...localizedPosts].sort(
|
||||
(a, b) =>
|
||||
(b.risk_score ?? 0) - (a.risk_score ?? 0) ||
|
||||
String(b.published || '').localeCompare(String(a.published || '')),
|
||||
),
|
||||
[posts],
|
||||
[localizedPosts],
|
||||
);
|
||||
|
||||
const maxRisk = sortedPosts[0]?.risk_score ?? 1;
|
||||
@@ -252,4 +352,4 @@ export function TelegramOsintPopup({ posts, lat, lng, onClose }: TelegramOsintPo
|
||||
</div>
|
||||
</Popup>
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -273,6 +273,9 @@
|
||||
"loadMedia": "VIEW MEDIA (TELEGRAM)",
|
||||
"openOriginal": "OPEN ON TELEGRAM →",
|
||||
"embedTitle": "Telegram post embed",
|
||||
"postsAtLocation": "{count} posts at this location — scroll for more"
|
||||
"postsAtLocation": "{count} posts at this location — scroll for more",
|
||||
"translatedFrom": "Translated from {lang}",
|
||||
"showOriginal": "SHOW ORIGINAL ({lang})",
|
||||
"showTranslation": "SHOW TRANSLATION"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,6 +273,9 @@
|
||||
"loadMedia": "AFFICHER LE MÉDIA (TELEGRAM)",
|
||||
"openOriginal": "OUVRIR SUR TELEGRAM →",
|
||||
"embedTitle": "Intégration Telegram",
|
||||
"postsAtLocation": "{count} posts à cet endroit — faites défiler"
|
||||
"postsAtLocation": "{count} posts à cet endroit — faites défiler",
|
||||
"translatedFrom": "Traduit depuis le {lang}",
|
||||
"showOriginal": "AFFICHER L'ORIGINAL ({lang})",
|
||||
"showTranslation": "AFFICHER LA TRADUCTION"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,6 +273,9 @@
|
||||
"loadMedia": "查看媒体(Telegram)",
|
||||
"openOriginal": "在 Telegram 打开 →",
|
||||
"embedTitle": "Telegram 帖子嵌入",
|
||||
"postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多"
|
||||
"postsAtLocation": "此位置 {count} 条帖子 — 向下滚动查看更多",
|
||||
"translatedFrom": "译自{lang}",
|
||||
"showOriginal": "显示原文({lang})",
|
||||
"showTranslation": "显示译文"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -972,6 +972,11 @@ export interface TelegramOsintPost {
|
||||
id: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
title_translated?: string;
|
||||
description_translated?: string;
|
||||
source_lang?: string;
|
||||
source_lang_label?: string;
|
||||
translate_to?: string;
|
||||
link?: string;
|
||||
published?: string;
|
||||
source?: string;
|
||||
|
||||
Reference in New Issue
Block a user