From 63043b32b595a2689c2561cbfcc587ac607e298c Mon Sep 17 00:00:00 2001
From: BigBodyCobain <43977454+BigBodyCobain@users.noreply.github.com>
Date: Mon, 4 May 2026 12:37:23 -0600
Subject: [PATCH] Stabilize Docker startup and runtime proxy

Reduce cold-start stalls by raising the default backend memory limit, bounding heavy feed concurrency, preserving non-empty startup caches, and refreshing working news feeds. Fix the Next API proxy for Docker control-plane writes by stripping unsupported hop/body headers and forwarding small request bodies safely. Keep the dashboard dynamic so production users do not get stuck on a cached startup shell.
---
 .env.example                            |   9 ++
 README.md                               |   5 ++
 backend/config/news_feeds.json          |  30 ++++---
 backend/services/data_fetcher.py        | 109 ++++++++++++++----------
 backend/services/news_feed_config.py    |  11 ++-
 backend/tests/test_news_keywords.py     |   8 +-
 docker-compose.yml                      |   2 +-
 frontend/src/app/api/[...path]/route.ts |  16 +++-
 frontend/src/app/layout.tsx             |   6 ++
 frontend/src/middleware.ts              |   9 +-
 10 files changed, 142 insertions(+), 63 deletions(-)

diff --git a/.env.example b/.env.example
index 658087b..d4c78ea 100644
--- a/.env.example
+++ b/.env.example
@@ -58,6 +58,15 @@ ADMIN_KEY=
 # FAST_STARTUP_CACHE_MAX_AGE_S=21600
 # INTEL_STARTUP_CACHE_MAX_AGE_S=21600
 
+# Docker resource tuning. The backend synthesizes large geospatial feeds; keep
+# this at 4G or higher on hosts that run AIS, OpenSky, CCTV, satellites, and
+# threat feeds together. Lower caps can cause Docker OOM restarts and empty
+# slow layers such as news, UAP sightings, military bases, and wastewater.
+# BACKEND_MEMORY_LIMIT=4G
+# SHADOWBROKER_FETCH_WORKERS=8
+# SHADOWBROKER_SLOW_FETCH_CONCURRENCY=4
+# SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY=2
+
 # Google Earth Engine for VIIRS night lights change detection (optional).
 # pip install earthengine-api
 # GEE_SERVICE_ACCOUNT_KEY=
diff --git a/README.md b/README.md
index cd6b5cd..deaa930 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,8 @@ Open `http://localhost:3000` to view the dashboard! *(Requires [Docker Desktop](
 
 > **Backend port already in use?** The browser only needs port `3000`, but the backend API is also published on host port `8000` for local diagnostics. If another app already uses `8000`, create or edit `.env` next to `docker-compose.yml` and set `BACKEND_PORT=8001`, then run `docker compose up -d`.
 
+> **Blank news/UAP/bases/wastewater after several minutes?** Check for backend OOM restarts with `docker events --since 30m --filter container=shadowbroker-backend --filter event=oom`. The default compose file gives the backend 4GB; if your host has less memory, reduce enabled feeds or set `BACKEND_MEMORY_LIMIT=3G` and expect slower/heavier layers to warm more gradually.
+
 > **Podman users:** Podman works, but `podman compose` is a wrapper and still needs a Compose provider installed. On Windows/WSL, if you see `looking up compose provider failed`, install `podman-compose` and run `podman-compose pull` followed by `podman-compose up -d` from inside the cloned `Shadowbroker` folder. On Linux/macOS/WSL shells you can also use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d`.
 
 ---
@@ -567,6 +569,9 @@ Open `http://localhost:3000` to view the dashboard.
 > Host port `8000` is only published for local API/debug access. If it conflicts
 > with another service, set `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL`
 > as `http://backend:8000` because that is the Docker-internal port.
+> The backend memory cap is controlled by `BACKEND_MEMORY_LIMIT` and defaults
+> to `4G`. If Docker reports OOM events, the backend will restart and slow
+> layers can look empty until they repopulate.
 >
 > If your backend runs on a **different host or port**, set `BACKEND_URL` at runtime — no rebuild required:
 >
diff --git a/backend/config/news_feeds.json b/backend/config/news_feeds.json
index f1330ac..ebc06fc 100644
--- a/backend/config/news_feeds.json
+++ b/backend/config/news_feeds.json
@@ -1,15 +1,5 @@
 {
   "feeds": [
-    {
-      "name": "Reuters",
-      "url": "https://www.reutersagency.com/feed/?best-topics=world",
-      "weight": 5
-    },
-    {
-      "name": "AP News",
-      "url": "https://rsshub.app/apnews/topics/world-news",
-      "weight": 5
-    },
     {
       "name": "NPR",
       "url": "https://feeds.npr.org/1004/rss.xml",
@@ -99,6 +89,26 @@
       "name": "Japan Times",
       "url": "https://www.japantimes.co.jp/feed/",
       "weight": 3
+    },
+    {
+      "name": "CSM",
+      "url": "https://www.csmonitor.com/rss/world",
+      "weight": 4
+    },
+    {
+      "name": "PBS NewsHour",
+      "url": "https://www.pbs.org/newshour/feeds/rss/world",
+      "weight": 4
+    },
+    {
+      "name": "France 24",
+      "url": "https://www.france24.com/en/rss",
+      "weight": 4
+    },
+    {
+      "name": "DW",
+      "url": "https://rss.dw.com/xml/rss-en-world",
+      "weight": 4
     }
   ]
 }
diff --git a/backend/services/data_fetcher.py b/backend/services/data_fetcher.py
index 8b62f0e..bdbefb0 100644
--- a/backend/services/data_fetcher.py
+++ b/backend/services/data_fetcher.py
@@ -134,15 +134,22 @@ _INTEL_STARTUP_CACHE_KEYS = (
     "trending_markets",
     "correlations",
     "fimi",
+    "crowdthreat",
+    "uap_sightings",
+    "military_bases",
+    "wastewater",
 )
 _STARTUP_PRIORITY_TIMEOUT_S = float(os.environ.get("SHADOWBROKER_STARTUP_PRIORITY_TIMEOUT_S", "18"))
 _STARTUP_HEAVY_REFRESH_DELAY_S = float(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_REFRESH_DELAY_S", "90"))
 _STARTUP_HEAVY_REFRESH_STARTED = False
 _STARTUP_HEAVY_REFRESH_LOCK = threading.Lock()
+_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_FETCH_WORKERS", "8"))
+_SLOW_FETCH_CONCURRENCY = int(os.environ.get("SHADOWBROKER_SLOW_FETCH_CONCURRENCY", "4"))
+_STARTUP_HEAVY_CONCURRENCY = int(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY", "2"))
 
 # Shared thread pool — reused across all fetch cycles instead of creating/destroying per tick
 _SHARED_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
-    max_workers=20, thread_name_prefix="fetch"
+    max_workers=max(2, _FETCH_WORKERS), thread_name_prefix="fetch"
 )
 
 
@@ -156,6 +163,14 @@ def _cache_json_safe(value):
     return value
 
 
+def _has_cache_value(value) -> bool:
+    if value is None:
+        return False
+    if isinstance(value, (list, tuple, dict, set)):
+        return bool(value)
+    return True
+
+
 def _load_fast_startup_cache_if_available() -> bool:
     """Seed moving layers from a recent disk cache while live fetches warm up."""
     if _FAST_STARTUP_CACHE_MAX_AGE_S <= 0 or not _FAST_STARTUP_CACHE_PATH.exists():
@@ -200,10 +215,15 @@ def _save_fast_startup_cache() -> None:
     """Persist recent moving layers for the next cold start."""
     try:
         with _data_lock:
+            layers = {
+                key: latest_data.get(key)
+                for key in _FAST_STARTUP_CACHE_KEYS
+                if _has_cache_value(latest_data.get(key))
+            }
             payload = {
                 "cached_at": time.time(),
                 "last_updated": latest_data.get("last_updated"),
-                "layers": {key: latest_data.get(key) for key in _FAST_STARTUP_CACHE_KEYS},
+                "layers": layers,
                 "freshness": {
                     key: source_timestamps.get(key)
                     for key in _FAST_STARTUP_CACHE_KEYS
@@ -264,10 +284,15 @@ def _save_intel_startup_cache() -> None:
     """Persist compact right-side intelligence data for the next cold start."""
     try:
         with _data_lock:
+            layers = {
+                key: latest_data.get(key)
+                for key in _INTEL_STARTUP_CACHE_KEYS
+                if _has_cache_value(latest_data.get(key))
+            }
             payload = {
                 "cached_at": time.time(),
                 "last_updated": latest_data.get("last_updated"),
-                "layers": {key: latest_data.get(key) for key in _INTEL_STARTUP_CACHE_KEYS},
+                "layers": layers,
                 "freshness": {
                     key: source_timestamps.get(key)
                     for key in _INTEL_STARTUP_CACHE_KEYS
@@ -294,11 +319,27 @@ def seed_startup_caches() -> None:
 # ---------------------------------------------------------------------------
 # Scheduler & Orchestration
 # ---------------------------------------------------------------------------
-def _run_tasks(label: str, funcs: list):
+def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
     """Run tasks concurrently and log any exceptions (do not fail silently)."""
     if not funcs:
         return
-    futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in funcs}
+    if max_concurrency is None:
+        if label.startswith("slow-tier"):
+            max_concurrency = _SLOW_FETCH_CONCURRENCY
+        elif label.startswith("startup-heavy"):
+            max_concurrency = _STARTUP_HEAVY_CONCURRENCY
+        else:
+            max_concurrency = len(funcs)
+    max_concurrency = max(1, min(max_concurrency, len(funcs)))
+
+    remaining_funcs = list(funcs)
+    while remaining_funcs:
+        batch, remaining_funcs = remaining_funcs[:max_concurrency], remaining_funcs[max_concurrency:]
+        futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in batch}
+        _drain_task_futures(label, futures)
+
+
+def _drain_task_futures(label: str, futures: dict):
     # Iterate directly so future.result(timeout=...) is the blocking call.
     # as_completed() blocks inside __next__() waiting for completion — the timeout
     # on result() would never be reached for a hanging task under that pattern.
@@ -486,6 +527,7 @@ def update_all_data(*, startup_mode: bool = False):
         priority_funcs = [
             fetch_airports,
             update_fast_data,
+            fetch_news,
             fetch_gdelt,
             fetch_crowdthreat,
             fetch_firms_fires,
@@ -520,55 +562,36 @@ def update_all_data(*, startup_mode: bool = False):
             except Exception as e:
                 _record_fetch_failure("startup-priority", name, start, e)
         logger.info("Startup preload: deferring Playwright Liveuamap scraper to scheduled cadence")
+        _save_intel_startup_cache()
         _schedule_delayed_startup_heavy_refresh()
         logger.info("Startup priority preload complete; slow synthesis is warming in background.")
         return
-    futures = {
-        _SHARED_EXECUTOR.submit(fetch_airports): ("fetch_airports", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(update_fast_data): ("update_fast_data", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(update_slow_data): ("update_slow_data", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_volcanoes): ("fetch_volcanoes", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_viirs_change_nodes): ("fetch_viirs_change_nodes", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_unusual_whales): ("fetch_unusual_whales", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_fimi): ("fetch_fimi", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_gdelt): ("fetch_gdelt", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_uap_sightings): ("fetch_uap_sightings", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_wastewater): ("fetch_wastewater", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_crowdthreat): ("fetch_crowdthreat", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_sar_catalog): ("fetch_sar_catalog", time.perf_counter()),
-        _SHARED_EXECUTOR.submit(fetch_sar_products): ("fetch_sar_products", time.perf_counter()),
-    }
+    refresh_funcs = [
+        fetch_airports,
+        update_fast_data,
+        update_slow_data,
+        fetch_volcanoes,
+        fetch_viirs_change_nodes,
+        fetch_unusual_whales,
+        fetch_fimi,
+        fetch_gdelt,
+        fetch_uap_sightings,
+        fetch_wastewater,
+        fetch_crowdthreat,
+        fetch_sar_catalog,
+        fetch_sar_products,
+    ]
     if not startup_mode or not meshtastic_seeded:
-        futures[_SHARED_EXECUTOR.submit(fetch_meshtastic_nodes)] = (
-            "fetch_meshtastic_nodes",
-            time.perf_counter(),
-        )
+        refresh_funcs.append(fetch_meshtastic_nodes)
     else:
         logger.info(
             "Startup preload: Meshtastic cache already loaded, deferring remote map refresh to scheduled cadence"
         )
     if not startup_mode:
-        futures[_SHARED_EXECUTOR.submit(update_liveuamap)] = (
-            "update_liveuamap",
-            time.perf_counter(),
-        )
+        refresh_funcs.append(update_liveuamap)
     else:
         logger.info("Startup preload: deferring Playwright Liveuamap scraper to scheduled cadence")
-    for future, (name, start) in futures.items():
-        try:
-            future.result(timeout=_TASK_HARD_TIMEOUT_S)
-            duration = time.perf_counter() - start
-            from services.fetch_health import record_success
-
-            record_success(name, duration_s=duration)
-            if duration > _SLOW_FETCH_S:
-                logger.warning(f"full-refresh task slow: {name} took {duration:.2f}s")
-        except Exception as e:
-            duration = time.perf_counter() - start
-            from services.fetch_health import record_failure
-
-            record_failure(name, error=e, duration_s=duration)
-            logger.exception(f"full-refresh task failed: {name}")
+    _run_tasks("full-refresh", refresh_funcs, max_concurrency=_STARTUP_HEAVY_CONCURRENCY)
     # Run CCTV ingest immediately so cameras are available on first request
     # (the scheduled job also runs every 10 min for ongoing refresh).
     if startup_mode:
diff --git a/backend/services/news_feed_config.py b/backend/services/news_feed_config.py
index 3e21d3e..7f80eb6 100644
--- a/backend/services/news_feed_config.py
+++ b/backend/services/news_feed_config.py
@@ -15,6 +15,8 @@ _FEED_URL_REPLACEMENTS = {
     "https://www.channelnewsasia.com/rssfeed/8395986": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml",
 }
 _DEAD_FEED_URLS = {
+    "https://www.reutersagency.com/feed/?best-topics=world",
+    "https://rsshub.app/apnews/topics/world-news",
     "https://www3.nhk.or.jp/nhkworld/rss/world.xml",
     "https://focustaiwan.tw/rss",
     "https://english.kyodonews.net/rss/news.xml",
@@ -29,6 +31,11 @@ DEFAULT_FEEDS = [
     {"name": "AlJazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml", "weight": 2},
     {"name": "NYT", "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "weight": 1},
     {"name": "GDACS", "url": "https://www.gdacs.org/xml/rss.xml", "weight": 5},
+    {"name": "The War Zone", "url": "https://www.twz.com/feed", "weight": 4},
+    {"name": "Bellingcat", "url": "https://www.bellingcat.com/feed/", "weight": 4},
+    {"name": "Guardian", "url": "https://www.theguardian.com/world/rss", "weight": 3},
+    {"name": "TASS", "url": "https://tass.com/rss/v2.xml", "weight": 2},
+    {"name": "Xinhua", "url": "http://www.news.cn/english/rss/worldrss.xml", "weight": 2},
     {"name": "CNA", "url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml", "weight": 3},
     {"name": "Mercopress", "url": "https://en.mercopress.com/rss/", "weight": 3},
     {"name": "SCMP", "url": "https://www.scmp.com/rss/91/feed", "weight": 4},
@@ -73,7 +80,9 @@ def get_feeds() -> list[dict]:
                 normalised = _normalise_feeds(feeds)
                 if normalised != feeds:
                     save_feeds(normalised)
-                return normalised
+                if normalised:
+                    return normalised
+                logger.warning("News feed configuration contained no usable feeds; falling back to defaults")
     except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
         logger.warning(f"Failed to read news feed config: {e}")
     return list(DEFAULT_FEEDS)
diff --git a/backend/tests/test_news_keywords.py b/backend/tests/test_news_keywords.py
index f53a495..393d48b 100644
--- a/backend/tests/test_news_keywords.py
+++ b/backend/tests/test_news_keywords.py
@@ -145,6 +145,10 @@ class TestFeedConfig:
 
     def test_new_east_asia_feeds_present(self):
         names = {f["name"] for f in DEFAULT_FEEDS}
-        expected = {"FocusTaiwan", "Kyodo", "SCMP", "The Diplomat", "Stars and Stripes",
-                    "Yonhap", "Nikkei Asia", "Taipei Times", "Asia Times", "Defense News", "Japan Times"}
+        expected = {"SCMP", "The Diplomat", "Yonhap", "Asia Times", "Defense News", "Japan Times"}
         assert expected.issubset(names)
+
+    def test_known_dead_feeds_are_not_defaulted(self):
+        urls = {f["url"] for f in DEFAULT_FEEDS}
+        assert "https://www.reutersagency.com/feed/?best-topics=world" not in urls
+        assert "https://rsshub.app/apnews/topics/world-news" not in urls
diff --git a/docker-compose.yml b/docker-compose.yml
index 2e9cefd..1c2bf46 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -54,7 +54,7 @@ services:
     deploy:
       resources:
         limits:
-          memory: 2G
+          memory: ${BACKEND_MEMORY_LIMIT:-4G}
           cpus: '2'
 
   frontend:
diff --git a/frontend/src/app/api/[...path]/route.ts b/frontend/src/app/api/[...path]/route.ts
index a9ba365..4d84290 100644
--- a/frontend/src/app/api/[...path]/route.ts
+++ b/frontend/src/app/api/[...path]/route.ts
@@ -26,6 +26,8 @@ const STRIP_REQUEST = new Set([
   'transfer-encoding',
   'upgrade',
   'host',
+  'content-length',
+  'expect',
 ]);
 
 // Headers that must not be forwarded back to the browser.
@@ -201,9 +203,10 @@ async function proxy(req: NextRequest, pathSegments: string[]): Promise<NextResp
       cache: 'no-store',
     };
     if (!isBodyless) {
-      requestInit.body = req.body;
-      // Required for streaming request bodies in Node.js fetch
-      requestInit.duplex = 'half';
+      const body = await req.text();
+      if (body.length > 0) {
+        requestInit.body = body;
+      }
     }
     const maxAttempts = isBodyless ? 18 : 1;
     let fetchError: unknown = null;
@@ -214,6 +217,13 @@ async function proxy(req: NextRequest, pathSegments: string[]): Promise<NextResp
         break;
       } catch (error) {
         fetchError = error;
+        if (attempt >= maxAttempts) {
+          console.error('api proxy upstream fetch failed', {
+            method: req.method,
+            target: targetUrl.toString(),
+            error,
+          });
+        }
         if (attempt >= maxAttempts) break;
         await sleep(250);
       }
diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx
index 9f2254a..e2a81d2 100644
--- a/frontend/src/app/layout.tsx
+++ b/frontend/src/app/layout.tsx
@@ -8,6 +8,12 @@ export const metadata: Metadata = {
   description: 'Advanced Geopolitical Risk Dashboard',
 };
 
+// The dashboard is a live local runtime, not a static landing page. If Next
+// prerenders and caches the initial shell, Docker users can get stuck on the
+// "prioritizing map feeds" markup before client polling ever hydrates.
+export const dynamic = 'force-dynamic';
+export const revalidate = 0;
+
 export default function RootLayout({
   children,
 }: Readonly<{
diff --git a/frontend/src/middleware.ts b/frontend/src/middleware.ts
index 9dd9fea..d10fa5c 100644
--- a/frontend/src/middleware.ts
+++ b/frontend/src/middleware.ts
@@ -13,8 +13,8 @@ function buildCsp(nonce: string): string {
   const directives = [
     "default-src 'self'",
     isDev
-      ? `script-src 'self' 'unsafe-inline' 'unsafe-eval' 'nonce-${nonce}' blob:`
-      : `script-src 'self' 'unsafe-inline' 'nonce-${nonce}' blob:`,
+      ? "script-src 'self' 'unsafe-inline' 'unsafe-eval' blob:"
+      : "script-src 'self' 'unsafe-inline' blob:",
     "style-src 'self' 'unsafe-inline' https://fonts.googleapis.com",
     "img-src 'self' data: blob: https:",
     isDev
@@ -35,7 +35,10 @@ function buildCsp(nonce: string): string {
 export function middleware(request: NextRequest) {
   const nonce = Buffer.from(crypto.randomUUID()).toString('base64');
 
-  // Forward nonce to server components via request header.
+  // Forward a nonce for future fully-wired CSP support. Do not include it in
+  // script-src until every Next inline bootstrap script receives the nonce;
+  // otherwise production hydration can fail and leave the app on the static
+  // "prioritizing map feeds" shell.
   const requestHeaders = new Headers(request.headers);
   requestHeaders.set('x-nonce', nonce);