fix(uap): weekly live NUFORC refresh with 7-day cache for operators

Each install pulls ~60-day sightings from nuforc.org every Monday; disk cache
matches weekly cadence so users keep current pins between restarts.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
BigBodyCobain
2026-06-03 18:41:28 -06:00
parent 5f322b0a79
commit 3ac8442e4b
6 changed files with 34 additions and 15 deletions
+2
View File
@@ -70,7 +70,9 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
# FINANCIAL_ENABLED=false
#
# NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days).
# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs.
# NUFORC_RECENT_DAYS=60
# NUFORC_CACHE_TTL_HOURS=168
# On Windows, live scrape uses Python requests by default; optional:
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true
# NUFORC enrichment index (HF dataset) is separate — opt-in only:
+5 -3
View File
@@ -993,17 +993,19 @@ def start_scheduler():
misfire_grace_time=600,
)
# UAP sightings (NUFORC) — daily at 12:00 UTC. Rolling ~60-day window from
# live nuforc.org; disk cache is re-validated on every read.
# UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window;
# each self-hosted install pulls live nuforc.org so operators see current
# reports (typically ~400500 mappable pins). Disk cache TTL defaults to 7d.
_scheduler.add_job(
lambda: _run_task_with_health(
lambda: fetch_uap_sightings(force_refresh=True),
"fetch_uap_sightings",
),
"cron",
day_of_week="mon",
hour=12,
minute=0,
id="uap_sightings_daily",
id="uap_sightings_weekly",
max_instances=1,
misfire_grace_time=3600,
)
+12 -5
View File
@@ -701,6 +701,12 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1
# practice, so a 0.3s spacing keeps us well under any soft throttle while
# still rebuilding a full 12-month window in ~10 minutes.
_NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3"))
# Disk cache TTL — match the weekly scheduler so restarts between fetches still
# serve the same rolling 60-day snapshot without hammering nuforc.org daily.
_NUFORC_CACHE_TTL_S = max(
3600,
int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600,
)
_NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
_NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json"
_NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json"
@@ -832,7 +838,7 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] |
built_dt = datetime.fromisoformat(built) if built else None
if built_dt is None:
return None
if (datetime.utcnow() - built_dt).total_seconds() > 86400:
if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S:
return None
if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS:
logger.info(
@@ -1646,11 +1652,12 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
@with_retry(max_retries=1, base_delay=5)
def fetch_uap_sightings(*, force_refresh: bool = False):
"""Fetch last-year UAP sightings from NUFORC.
"""Fetch rolling-window UAP sightings from live NUFORC.
Startup reads the cached daily snapshot when it is still fresh. The daily
scheduler forces a rebuild so this layer updates once per day instead of
churning continuously.
Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS
(default 168h / one week). The weekly scheduler forces a rebuild so every
install refreshes the same ~60-day layer without daily load on nuforc.org.
Operators can also POST /api/refresh (admin) to pull immediately.
"""
from services.fetchers._store import is_any_active
+1 -1
View File
@@ -58,7 +58,7 @@ SLO_REGISTRY: Dict[str, SLO] = {
"uap_sightings": SLO(
max_age_s=26 * _HOUR,
min_rows=50,
description="NUFORC rolling 60-day window (daily refresh)",
description="NUFORC rolling 60-day window (weekly refresh)",
),
"wastewater": SLO(
max_age_s=30 * _HOUR,
+6 -6
View File
@@ -226,17 +226,17 @@ def test_fetch_uap_sightings_succeeds_when_fallback_returns_data(monkeypatch):
assert canary_calls == [], "canary should not trip when fallback supplies data"
def test_uap_scheduler_runs_daily():
"""UAP layer refreshes daily so the rolling ~60-day window stays current."""
def test_uap_scheduler_runs_weekly():
"""UAP layer refreshes weekly so each install pulls live NUFORC on a steady cadence."""
from services import data_fetcher
with open(data_fetcher.__file__, "r", encoding="utf-8") as f:
text = f.read()
assert "uap_sightings_daily" in text
idx = text.index("uap_sightings_daily")
block = text[max(0, idx - 600) : idx + 80]
assert 'day_of_week="mon"' not in block
assert "uap_sightings_weekly" in text
idx = text.index("uap_sightings_weekly")
block = text[max(0, idx - 600) : idx + 120]
assert 'day_of_week="mon"' in block
def test_uap_cache_rejects_stale_rows_on_load(tmp_path, monkeypatch):
+8
View File
@@ -51,6 +51,14 @@ Shadowbroker is **self-hosted**: each install uses its own backend egress IP. Th
- **Env:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true|false` overrides UI on all platforms
- **Honesty:** Backend-only; no browser-direct LiveUAMap from end users. Stealth remains a functional tradeoff for Turnstile; disable layer or env if unacceptable
## UAP sightings (NUFORC map layer)
- **Window:** last **60 days** (`NUFORC_RECENT_DAYS`, ~2 months) from **live** nuforc.org
- **Cadence:** **Weekly** (Monday 12:00 UTC) per install; typical yield **~400500** geocoded pins
- **Between weeks:** `backend/data/nuforc_recent_sightings.json` (7-day TTL) so restarts do not wipe the layer
- **Immediate pull:** admin `GET /api/refresh` on that install
- **Not used for map pins:** stale Hugging Face mirror (frozen ~2023) unless live is down and mirror happens to have in-window rows
---
## CCTV proxy Referer / Origin (#349)