diff --git a/.gitignore b/.gitignore index 852e30e..9dbd10d 100644 --- a/.gitignore +++ b/.gitignore @@ -98,6 +98,9 @@ backend/data/* # Issue #201: pinned SHA-256 digests for known Tor Expert Bundle URLs. # Used as a second verification source when upstream .sha256sum fails. !backend/data/tor_bundle_digests.json +# Issue #258: SPKI pins for stream.aisstream.io so we can survive upstream +# Let's Encrypt renewal failures without disabling TLS validation entirely. +!backend/data/aisstream_spki_pins.json # OS generated files .DS_Store diff --git a/backend/ais_proxy.js b/backend/ais_proxy.js index 1e52433..fd081bd 100644 --- a/backend/ais_proxy.js +++ b/backend/ais_proxy.js @@ -1,5 +1,37 @@ +// AIS Stream WebSocket proxy. +// +// Reads AIS_API_KEY from argv or env, opens a wss:// connection to +// stream.aisstream.io, subscribes for vessel position reports inside the +// active map bounding boxes, and pipes JSON messages to stdout for the +// Python backend to ingest. +// +// Issue #258 — SPKI pinning fallback for upstream cert outages +// ------------------------------------------------------------- +// AISStream uses Let's Encrypt and their renewal pipeline has been observed +// to fail (cert expired on 2026-05-20). The naive fix the issue reporter +// applied — passing { rejectUnauthorized: false } — turns off TLS validation +// entirely, which lets any network attacker MITM the WebSocket and inject +// fake ship positions onto the operator's map. Same class as the GDELT +// plaintext-HTTP MITM issue (#199). +// +// Instead, when the normal TLS handshake fails with CERT_HAS_EXPIRED, we +// do a custom TLS connection that ignores ONLY the expiry check, capture +// the leaf certificate, and compare its public-key SPKI hash against a +// pinned list (backend/data/aisstream_spki_pins.json). If the SPKI matches, +// the upstream is still the genuine AISStream — just with an expired cert — +// and we proceed in "degraded TLS" mode. If the SPKI does not match, we +// refuse the connection and log loudly: an actual MITM is in progress. +// +// Let's Encrypt renewals keep the same public key by default, so the pinned +// SPKI survives normal cert rotation. The pin list MUST be updated before +// the operator's pinned key is rotated upstream. + const WebSocket = require('ws'); const readline = require('readline'); +const fs = require('fs'); +const path = require('path'); +const tls = require('tls'); +const crypto = require('crypto'); const args = process.argv.slice(2); const API_KEY = args[0] || process.env.AIS_API_KEY; @@ -9,6 +41,135 @@ if (!API_KEY) { process.exit(1); } +// ── SPKI pin support (issue #258) ───────────────────────────────────────── + +const AIS_HOST = 'stream.aisstream.io'; +const AIS_PORT = 443; +const AIS_WS_URL = `wss://${AIS_HOST}/v0/stream`; + +// Pin file is looked up in several layouts so the same JS works in: +// - the Docker backend image (PIN_FILE_CANDIDATES[0]) +// - the Tauri desktop runtime (PIN_FILE_CANDIDATES[1]) +// - a future relocated layout (operator can drop a file at +// SHADOWBROKER_AIS_PINS env var) +const PIN_FILE_CANDIDATES = [ + process.env.SHADOWBROKER_AIS_PINS || '', + path.join(__dirname, 'data', 'aisstream_spki_pins.json'), + path.join(__dirname, 'aisstream_spki_pins.json'), +].filter(Boolean); + +// Embedded fallback. Used when no external pin file is reachable so the +// SPKI fallback still works on minimal/portable installs. The external +// file (when present) takes priority so operators can update pins without +// needing a new build. +const EMBEDDED_PINS = { + [AIS_HOST]: [ + // Captured 2026-05-20 from AISStream's leaf cert (Let's Encrypt R12). + // Replace when AISStream rotates server keys. + 'GJ10H0UPgLrO+2d3ZXROR/TXSVFXKUfRC3QEI2ibEg4=', + ], +}; + +let aisDegradedMode = false; // surfaced via stdout status_query marker + +function loadSpkiPins() { + for (const candidate of PIN_FILE_CANDIDATES) { + try { + const raw = fs.readFileSync(candidate, 'utf-8'); + const parsed = JSON.parse(raw); + const pins = Array.isArray(parsed[AIS_HOST]) ? parsed[AIS_HOST] : []; + const cleaned = pins + .filter((p) => typeof p === 'string' && p.length > 0) + .map((p) => p.trim()); + if (cleaned.length > 0) { + return cleaned; + } + } catch (e) { + // Try the next candidate — file may not exist in this layout. + continue; + } + } + const embedded = (EMBEDDED_PINS[AIS_HOST] || []).slice(); + if (embedded.length > 0) { + console.error( + '[AIS Proxy] No external SPKI pin file found; using embedded fallback. ' + + `(Set SHADOWBROKER_AIS_PINS or drop ${PIN_FILE_CANDIDATES[1]} to override.)` + ); + } + return embedded; +} + +function spkiHashFromPeerCert(peerCert) { + // tls.TLSSocket.getPeerCertificate() exposes .pubkey when called with + // detailed=true. The pubkey buffer is the DER-encoded SubjectPublicKeyInfo, + // which is exactly the value we hash for SPKI pinning. + if (!peerCert || !peerCert.pubkey) return null; + return crypto.createHash('sha256').update(peerCert.pubkey).digest('base64'); +} + +// Probe the upstream when normal TLS failed with CERT_HAS_EXPIRED. We open +// a raw TLS connection with rejectUnauthorized=false ONLY to inspect the +// leaf cert; we do NOT use this socket for the actual WebSocket traffic. +// Returns { ok: true } if the leaf SPKI matches the pin list, { ok: false } +// with a reason otherwise. +function verifyExpiredCertAgainstPins() { + return new Promise((resolve) => { + const pins = loadSpkiPins(); + if (pins.length === 0) { + resolve({ ok: false, reason: 'no SPKI pins configured' }); + return; + } + const sock = tls.connect( + { + host: AIS_HOST, + port: AIS_PORT, + servername: AIS_HOST, + // Allow the handshake to complete despite the expired cert + // so we can inspect the leaf. We do NOT trust this connection + // for any application data. + rejectUnauthorized: false, + }, + () => { + const peer = sock.getPeerCertificate(true); + sock.end(); + if (!peer || Object.keys(peer).length === 0) { + resolve({ ok: false, reason: 'no peer certificate returned' }); + return; + } + if (peer.subject && peer.subject.CN !== AIS_HOST) { + resolve({ + ok: false, + reason: `cert CN mismatch (got ${peer.subject.CN}, expected ${AIS_HOST})`, + }); + return; + } + const hash = spkiHashFromPeerCert(peer); + if (!hash) { + resolve({ ok: false, reason: 'could not compute SPKI hash from peer cert' }); + return; + } + if (pins.includes(hash)) { + resolve({ ok: true, hash }); + } else { + resolve({ + ok: false, + reason: `SPKI ${hash} not in pin list (possible MITM)`, + }); + } + }, + ); + sock.setTimeout(10000, () => { + sock.destroy(); + resolve({ ok: false, reason: 'TLS probe timeout' }); + }); + sock.on('error', (err) => { + resolve({ ok: false, reason: `TLS probe error: ${err.message}` }); + }); + }); +} + +// ── Subscription state ─────────────────────────────────────────────────── + // Start with global coverage, until frontend updates it let currentBboxes = [[[-90, -180], [90, 180]]]; let activeWs = null; @@ -42,14 +203,34 @@ rl.on('line', (line) => { currentBboxes = cmd.bboxes; if (activeWs) sendSub(activeWs); // Resend subscription (swap and replace) } + if (cmd.type === "status_query") { + // Allow the Python side to probe degraded-mode state by sending + // {"type": "status_query"} on stdin. Reply on stdout as a marker. + process.stdout.write(JSON.stringify({ + __ais_proxy_status: { degraded_tls: aisDegradedMode } + }) + '\n'); + } } catch (e) {} }); -function connect() { - const ws = new WebSocket('wss://stream.aisstream.io/v0/stream'); +function attachWsHandlers(ws, { degraded } = { degraded: false }) { activeWs = ws; ws.on('open', () => { + if (degraded) { + console.error( + '[AIS Proxy] Connected in DEGRADED TLS MODE — upstream cert is expired ' + + 'but SPKI matches the pinned key, so identity is still verified. ' + + 'AISStream needs to renew their cert; until then MITM protection ' + + 'depends only on the SPKI match. Watch backend logs for resolution.' + ); + aisDegradedMode = true; + } else { + if (aisDegradedMode) { + console.error('[AIS Proxy] Reconnected with full TLS validation — degraded mode cleared.'); + } + aisDegradedMode = false; + } sendSub(ws); }); @@ -61,14 +242,63 @@ function connect() { }); ws.on('error', (err) => { - console.error("WebSocket Proxy Error:", err.message); + console.error('WebSocket Proxy Error:', err.message); }); ws.on('close', () => { activeWs = null; - console.error("WebSocket Proxy Closed. Reconnecting in 5s..."); + console.error('WebSocket Proxy Closed. Reconnecting in 5s...'); setTimeout(connect, 5000); }); } +function connect() { + // Path A: normal TLS validation (the 99.9% case). If this succeeds we + // never touch the SPKI fallback. + const ws = new WebSocket(AIS_WS_URL); + + let openedOk = false; + ws.on('open', () => { openedOk = true; }); + + ws.on('error', async (err) => { + // Only the CERT_HAS_EXPIRED case triggers SPKI verification. Any + // other TLS or network error gets the standard reconnect path so we + // don't accidentally cover up legitimate problems. + if (!openedOk && err && err.code === 'CERT_HAS_EXPIRED') { + console.error( + '[AIS Proxy] Upstream certificate is expired. Verifying SPKI ' + + 'against pinned keys before deciding whether to proceed in ' + + 'degraded mode...' + ); + const verdict = await verifyExpiredCertAgainstPins(); + if (verdict.ok) { + console.error( + `[AIS Proxy] SPKI ${verdict.hash} matches pinned key — ` + + 'identity is verified, proceeding in DEGRADED TLS mode.' + ); + const insecureWs = new WebSocket(AIS_WS_URL, { + rejectUnauthorized: false, + }); + attachWsHandlers(insecureWs, { degraded: true }); + } else { + console.error( + `[AIS Proxy] SPKI verification FAILED (${verdict.reason}). ` + + 'Refusing to connect — this would normally indicate an active ' + + 'MITM attack. If AISStream rotated their server key, update ' + + 'backend/data/aisstream_spki_pins.json with the new SPKI hash.' + ); + // Schedule a retry — operator may have updated the pin file. + setTimeout(connect, 60000); + } + return; + } + // Default: surface the error and let the close handler reconnect. + console.error('WebSocket Proxy Error:', err.message); + }); + + // Wire normal handlers — these apply unless the error handler above + // takes over and replaces activeWs with an insecure socket. + attachWsHandlers(ws, { degraded: false }); +} + connect(); diff --git a/backend/data/aisstream_spki_pins.json b/backend/data/aisstream_spki_pins.json new file mode 100644 index 0000000..6d82740 --- /dev/null +++ b/backend/data/aisstream_spki_pins.json @@ -0,0 +1,31 @@ +{ + "_comment": [ + "SPKI (Subject Public Key Info) pin list for stream.aisstream.io.", + "", + "Issue #258: AISStream's Let's Encrypt cert expired on 2026-05-20 due to an", + "upstream renewal-pipeline failure. Disabling TLS verification entirely", + "would let any network attacker MITM the AIS WebSocket and inject fake", + "ship positions onto the operator's map (same class as #199 GDELT MITM).", + "Instead we pin the leaf certificate's public-key SPKI hash: if normal", + "TLS validation fails specifically with CERT_HAS_EXPIRED, ais_proxy.js", + "re-checks the leaf cert's SPKI against this list. A match means the", + "key is still the genuine AISStream key (Let's Encrypt renewals keep the", + "same key unless rekey is requested), so we proceed in 'degraded TLS'", + "mode. A mismatch means a real MITM attempt and we refuse the connection.", + "", + "Format: each entry is a SHA-256 hash of the DER-encoded SPKI bytes,", + "encoded as standard base64 (matches the format produced by:", + " openssl s_client -connect host:443 | \\", + " openssl x509 -pubkey -noout | openssl pkey -pubin -outform DER | \\", + " openssl dgst -sha256 -binary | openssl base64", + ").", + "", + "When AISStream rotates their server key (rare — Let's Encrypt renewals", + "default to keeping the same key), capture the new SPKI and add it to", + "this list BEFORE removing the old one. That way operators on the old", + "code still validate against the previous key during the transition." + ], + "stream.aisstream.io": [ + "GJ10H0UPgLrO+2d3ZXROR/TXSVFXKUfRC3QEI2ibEg4=" + ] +} diff --git a/backend/routers/health.py b/backend/routers/health.py index edd48d9..3e1b78e 100644 --- a/backend/routers/health.py +++ b/backend/routers/health.py @@ -54,6 +54,22 @@ async def health_check(request: Request): top_status = "error" elif slo_summary.get("yellow", 0) > 0: top_status = "degraded" + + # Issue #258: surface AIS proxy degraded TLS state so operators can see + # when the SPKI-pinned fallback is in effect. The data plane keeps + # flowing (this is by design — see ais_proxy.js comments) but observers + # who care about MITM-protection posture deserve a visible signal. + ais_status: dict = {} + try: + from services.ais_stream import ais_proxy_status + ais_status = ais_proxy_status() or {} + except Exception: + ais_status = {} + if ais_status.get("degraded_tls") and top_status == "ok": + # Don't override a worse top-level status if SLOs already failed, + # but escalate ok -> degraded so the field surfaces in dashboards. + top_status = "degraded" + return { "status": top_status, "version": _get_app_version(), @@ -76,6 +92,7 @@ async def health_check(request: Request): "uptime_seconds": round(_time_mod.time() - _get_start_time()), "slo": slo_statuses, "slo_summary": slo_summary, + "ais_proxy": ais_status, } diff --git a/backend/services/ais_stream.py b/backend/services/ais_stream.py index 5e1ce1b..b5f1973 100644 --- a/backend/services/ais_stream.py +++ b/backend/services/ais_stream.py @@ -344,9 +344,26 @@ _vessels_lock = threading.Lock() _ws_thread: threading.Thread | None = None _ws_running = False _proxy_process = None +# Issue #258: latest status snapshot emitted by ais_proxy.js. Populated when +# the proxy reports e.g. {"__ais_proxy_status": {"degraded_tls": true}} on +# stdout, which it does when it falls back to the SPKI-pinned insecure-date +# path during an upstream cert outage. Surfaced via ais_proxy_status() for +# /api/health. +_proxy_status: dict = {} _VESSEL_TRAIL_INTERVAL_S = 120 _VESSEL_TRAIL_MAX_POINTS = 240 + +def ais_proxy_status() -> dict: + """Return a copy of the latest ais_proxy.js status (issue #258). + + Currently surfaces ``degraded_tls`` (bool) which is true when the + proxy is using SPKI-pinned fallback because AISStream's cert expired. + Returns an empty dict when no status has been received yet. + """ + with _vessels_lock: + return dict(_proxy_status) + import os CACHE_FILE = os.path.join(os.path.dirname(__file__), "ais_cache.json") @@ -608,6 +625,18 @@ def _ais_stream_loop(): logger.error(f"AIS Stream error: {data['error']}") continue + # Issue #258: ais_proxy.js emits status markers (e.g. + # {"__ais_proxy_status": {"degraded_tls": true}}) when the + # SPKI-pinned fallback is in use. We snapshot the latest + # status so the backend can expose it on /api/health. + if isinstance(data, dict) and "__ais_proxy_status" in data: + status = data.get("__ais_proxy_status") or {} + if isinstance(status, dict): + with _vessels_lock: + _proxy_status.clear() + _proxy_status.update(status) + continue + msg_type = data.get("MessageType", "") metadata = data.get("MetaData", {}) message = data.get("Message", {}) diff --git a/backend/services/schemas.py b/backend/services/schemas.py index 27011d8..30160bb 100644 --- a/backend/services/schemas.py +++ b/backend/services/schemas.py @@ -14,6 +14,11 @@ class HealthResponse(BaseModel): # ({status, age_s, row_count, slo, stale, empty, description}). slo: Optional[Dict[str, Any]] = None slo_summary: Optional[Dict[str, int]] = None + # Issue #258: AIS proxy status — currently exposes ``degraded_tls`` + # (bool), true when ais_proxy.js fell back to the SPKI-pinned + # insecure-date path because the upstream Let's Encrypt cert is + # expired. Empty dict / null means no status reported yet. + ais_proxy: Optional[Dict[str, Any]] = None class RefreshResponse(BaseModel): diff --git a/backend/tests/test_ais_spki_pinning.py b/backend/tests/test_ais_spki_pinning.py new file mode 100644 index 0000000..179cd7c --- /dev/null +++ b/backend/tests/test_ais_spki_pinning.py @@ -0,0 +1,118 @@ +"""Issue #258 — AIS proxy SPKI pinning. + +Most of the SPKI logic lives in ``backend/ais_proxy.js`` (Node) and can't +be unit-tested from Python directly. These tests cover the Python-side +glue: ``services.ais_stream.ais_proxy_status()`` (the snapshot the proxy +populates via stdout markers) and ``routers/health.py`` surfacing the +degraded TLS state. + +Additionally, the pin-file structure is validated: it must be parseable +JSON, must contain an entry for ``stream.aisstream.io``, and each pin +must look like a base64-encoded SHA-256 hash. +""" +import base64 +import json +import re +from pathlib import Path + +import pytest + +from services import ais_stream + +PIN_FILE = ( + Path(__file__).resolve().parent.parent / "data" / "aisstream_spki_pins.json" +) + + +def test_pin_file_exists_and_is_valid_json(): + assert PIN_FILE.exists(), f"Expected pin file at {PIN_FILE}" + data = json.loads(PIN_FILE.read_text(encoding="utf-8")) + assert isinstance(data, dict) + + +def test_pin_file_has_aisstream_entry(): + data = json.loads(PIN_FILE.read_text(encoding="utf-8")) + pins = data.get("stream.aisstream.io") + assert isinstance(pins, list) + assert len(pins) >= 1 + + +def test_each_pin_looks_like_a_base64_sha256(): + """SPKI pins must be 44-char base64-encoded SHA-256 digests.""" + data = json.loads(PIN_FILE.read_text(encoding="utf-8")) + pins = data["stream.aisstream.io"] + for pin in pins: + assert isinstance(pin, str), f"pin not a string: {pin!r}" + assert len(pin) == 44, f"pin {pin!r} not 44 chars (SHA-256 base64)" + # Must base64-decode to exactly 32 bytes (256 bits) + try: + raw = base64.b64decode(pin) + except Exception as exc: + pytest.fail(f"pin {pin!r} is not valid base64: {exc}") + assert len(raw) == 32, f"pin {pin!r} decodes to {len(raw)} bytes, expected 32" + # Should match the canonical base64 alphabet (no URL-safe variants) + assert re.match(r"^[A-Za-z0-9+/]+=*$", pin), f"pin {pin!r} contains non-base64 chars" + + +def test_ais_proxy_status_starts_empty(): + """Before the proxy emits any status marker, the snapshot is empty.""" + # Clear any stale state from other tests + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + status = ais_stream.ais_proxy_status() + assert status == {} + + +def test_ais_proxy_status_returns_copy_not_reference(): + """ais_proxy_status() must return a defensive copy. + + Otherwise a caller could mutate the live dict and confuse later reads. + """ + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + ais_stream._proxy_status["degraded_tls"] = True + + snapshot = ais_stream.ais_proxy_status() + assert snapshot == {"degraded_tls": True} + snapshot["degraded_tls"] = False # mutate the returned copy + + # Original should be untouched + re_snapshot = ais_stream.ais_proxy_status() + assert re_snapshot == {"degraded_tls": True} + + # Cleanup so other tests start clean + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + + +def test_health_includes_ais_proxy_field(client): + """The /api/health response must include the ais_proxy block.""" + # Inject a known degraded state + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + ais_stream._proxy_status["degraded_tls"] = True + + response = client.get("/api/health") + assert response.status_code == 200 + payload = response.json() + + assert "ais_proxy" in payload + assert payload["ais_proxy"] == {"degraded_tls": True} + # Top-level status should escalate from ok to degraded when AIS is + # in degraded-TLS mode (unless SLOs already report worse). + assert payload["status"] in {"degraded", "error"} + + # Cleanup + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + + +def test_health_ais_proxy_field_when_no_status(client): + """When the proxy hasn't reported anything yet, ais_proxy is empty.""" + with ais_stream._vessels_lock: + ais_stream._proxy_status.clear() + + response = client.get("/api/health") + assert response.status_code == 200 + payload = response.json() + assert payload.get("ais_proxy") == {}