Files
Shadowbroker/backend/services/network_utils.py
T
anoracleofra-code fc9eff865e v0.9.0: in-app auto-updater, ship toggle split, stable entity IDs, performance fixes
New features:
- In-app auto-updater with confirmation dialog, manual download fallback,
  restart polling, and protected file safety net
- Ship layers split into 4 independent toggles (Military/Carriers, Cargo/Tankers,
  Civilian, Cruise/Passenger) with per-category counts
- Stable entity IDs using MMSI/callsign instead of volatile array indices
- Dismissible threat alert bubbles (session-scoped, survives data refresh)

Performance:
- GDELT title fetching is now non-blocking (background enrichment)
- Removed duplicate startup fetch jobs
- Docker healthcheck start_period 15s → 90s

Bug fixes:
- Removed fake intelligence assessment generator (OSINT-only policy)
- Fixed carrier tracker GDELT 429/TypeError crash
- Fixed ETag collision (full payload hash)
- Added concurrent /api/refresh guard

Contributors: @imqdcr (ship split + stable IDs), @csysp (dismissible alerts, PR #48)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Former-commit-id: a2c4c67da54345393f70a9b33b52e7e4fd6c049f
2026-03-13 11:32:16 -06:00

120 lines
5.3 KiB
Python

import logging
import json
import subprocess
import shutil
import time
import requests
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
logger = logging.getLogger(__name__)
# Reusable session with connection pooling and retry logic.
# Only retry once (total=1) to fail fast — the curl fallback is the real safety net.
_session = requests.Session()
_retry = Retry(total=1, backoff_factor=0.3, status_forcelist=[502, 503, 504])
_session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
# Find bash for curl fallback — Git bash's curl has the TLS features
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
_BASH_PATH = shutil.which("bash") or "bash"
# Cache domains where requests fails — skip straight to curl for 5 minutes
_domain_fail_cache: dict[str, float] = {}
_DOMAIN_FAIL_TTL = 300 # 5 minutes
# Circuit breaker: track domains where BOTH requests AND curl fail
# If a domain failed completely within the last 2 minutes, skip it entirely
_circuit_breaker: dict[str, float] = {}
_CIRCUIT_BREAKER_TTL = 120 # 2 minutes
class _DummyResponse:
"""Minimal response object matching requests.Response interface."""
def __init__(self, status_code, text):
self.status_code = status_code
self.text = text
self.content = text.encode('utf-8', errors='replace')
def json(self):
return json.loads(self.text)
def raise_for_status(self):
if self.status_code >= 400:
raise Exception(f"HTTP {self.status_code}: {self.text[:100]}")
def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None):
"""Wrapper to bypass aggressive local firewall that blocks Python but permits curl.
Falls back to running curl through Git bash, which has the TLS features
(brotli, zstd, libpsl) needed to pass CDN fingerprint checks that block
both Python requests and the barebones Windows system curl.
"""
default_headers = {
"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard)",
}
if headers:
default_headers.update(headers)
domain = urlparse(url).netloc
# Circuit breaker: if domain failed completely <2min ago, fail fast
if domain in _circuit_breaker and (time.time() - _circuit_breaker[domain]) < _CIRCUIT_BREAKER_TTL:
raise Exception(f"Circuit breaker open for {domain} (failed <{_CIRCUIT_BREAKER_TTL}s ago)")
# Check if this domain recently failed with requests — skip straight to curl
if domain in _domain_fail_cache and (time.time() - _domain_fail_cache[domain]) < _DOMAIN_FAIL_TTL:
pass # Fall through to curl below
else:
try:
# Use a short connect timeout (3s) so firewall blocks fail fast,
# but allow the full timeout for reading the response body.
req_timeout = (min(3, timeout), timeout)
if method == "POST":
res = _session.post(url, json=json_data, timeout=req_timeout, headers=default_headers)
else:
res = _session.get(url, timeout=req_timeout, headers=default_headers)
res.raise_for_status()
# Clear failure caches on success
_domain_fail_cache.pop(domain, None)
_circuit_breaker.pop(domain, None)
return res
except (requests.RequestException, ConnectionError, TimeoutError, OSError) as e:
logger.warning(f"Python requests failed for {url} ({e}), falling back to bash curl...")
_domain_fail_cache[domain] = time.time()
# Build curl as argument list — never pass through shell to prevent injection
_CURL_PATH = shutil.which("curl") or "curl"
cmd = [_CURL_PATH, "-s", "-w", "\n%{http_code}"]
for k, v in default_headers.items():
cmd += ["-H", f"{k}: {v}"]
if method == "POST" and json_data:
cmd += ["-X", "POST", "-H", "Content-Type: application/json",
"--data-binary", "@-"]
cmd.append(url)
try:
stdin_data = json.dumps(json_data) if (method == "POST" and json_data) else None
res = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout + 5,
input=stdin_data
)
if res.returncode == 0 and res.stdout.strip():
# Parse HTTP status code from -w output (last line)
lines = res.stdout.rstrip().rsplit("\n", 1)
body = lines[0] if len(lines) > 1 else res.stdout
http_code = int(lines[-1]) if len(lines) > 1 and lines[-1].strip().isdigit() else 200
if http_code < 400:
_circuit_breaker.pop(domain, None) # Clear circuit breaker on success
return _DummyResponse(http_code, body)
else:
logger.error(f"bash curl fallback failed: exit={res.returncode} stderr={res.stderr[:200]}")
_circuit_breaker[domain] = time.time()
return _DummyResponse(500, "")
except (subprocess.SubprocessError, ConnectionError, TimeoutError, OSError) as curl_e:
logger.error(f"bash curl fallback exception: {curl_e}")
_circuit_breaker[domain] = time.time()
return _DummyResponse(500, "")