diff --git a/backend/.env.example b/backend/.env.example index e63cb30..a95b2bf 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -70,7 +70,9 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key # FINANCIAL_ENABLED=false # # NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days). +# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs. # NUFORC_RECENT_DAYS=60 +# NUFORC_CACHE_TTL_HOURS=168 # On Windows, live scrape uses Python requests by default; optional: # SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true # NUFORC enrichment index (HF dataset) is separate — opt-in only: diff --git a/backend/services/data_fetcher.py b/backend/services/data_fetcher.py index 1c74c68..e7cd031 100644 --- a/backend/services/data_fetcher.py +++ b/backend/services/data_fetcher.py @@ -993,17 +993,19 @@ def start_scheduler(): misfire_grace_time=600, ) - # UAP sightings (NUFORC) — daily at 12:00 UTC. Rolling ~60-day window from - # live nuforc.org; disk cache is re-validated on every read. + # UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window; + # each self-hosted install pulls live nuforc.org so operators see current + # reports (typically ~400–500 mappable pins). Disk cache TTL defaults to 7d. _scheduler.add_job( lambda: _run_task_with_health( lambda: fetch_uap_sightings(force_refresh=True), "fetch_uap_sightings", ), "cron", + day_of_week="mon", hour=12, minute=0, - id="uap_sightings_daily", + id="uap_sightings_weekly", max_instances=1, misfire_grace_time=3600, ) diff --git a/backend/services/fetchers/earth_observation.py b/backend/services/fetchers/earth_observation.py index 5495474..257ed2a 100644 --- a/backend/services/fetchers/earth_observation.py +++ b/backend/services/fetchers/earth_observation.py @@ -701,6 +701,12 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1 # practice, so a 0.3s spacing keeps us well under any soft throttle while # still rebuilding a full 12-month window in ~10 minutes. _NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3")) +# Disk cache TTL — match the weekly scheduler so restarts between fetches still +# serve the same rolling 60-day snapshot without hammering nuforc.org daily. +_NUFORC_CACHE_TTL_S = max( + 3600, + int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600, +) _NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data" _NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json" _NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json" @@ -832,7 +838,7 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] | built_dt = datetime.fromisoformat(built) if built else None if built_dt is None: return None - if (datetime.utcnow() - built_dt).total_seconds() > 86400: + if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S: return None if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS: logger.info( @@ -1646,11 +1652,12 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]: @with_retry(max_retries=1, base_delay=5) def fetch_uap_sightings(*, force_refresh: bool = False): - """Fetch last-year UAP sightings from NUFORC. + """Fetch rolling-window UAP sightings from live NUFORC. - Startup reads the cached daily snapshot when it is still fresh. The daily - scheduler forces a rebuild so this layer updates once per day instead of - churning continuously. + Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS + (default 168h / one week). The weekly scheduler forces a rebuild so every + install refreshes the same ~60-day layer without daily load on nuforc.org. + Operators can also POST /api/refresh (admin) to pull immediately. """ from services.fetchers._store import is_any_active diff --git a/backend/services/slo.py b/backend/services/slo.py index eca1a4c..ee6db9a 100644 --- a/backend/services/slo.py +++ b/backend/services/slo.py @@ -58,7 +58,7 @@ SLO_REGISTRY: Dict[str, SLO] = { "uap_sightings": SLO( max_age_s=26 * _HOUR, min_rows=50, - description="NUFORC rolling 60-day window (daily refresh)", + description="NUFORC rolling 60-day window (weekly refresh)", ), "wastewater": SLO( max_age_s=30 * _HOUR, diff --git a/backend/tests/test_uap_hf_fallback_cutoff.py b/backend/tests/test_uap_hf_fallback_cutoff.py index 4d36ffa..54fa7cf 100644 --- a/backend/tests/test_uap_hf_fallback_cutoff.py +++ b/backend/tests/test_uap_hf_fallback_cutoff.py @@ -226,17 +226,17 @@ def test_fetch_uap_sightings_succeeds_when_fallback_returns_data(monkeypatch): assert canary_calls == [], "canary should not trip when fallback supplies data" -def test_uap_scheduler_runs_daily(): - """UAP layer refreshes daily so the rolling ~60-day window stays current.""" +def test_uap_scheduler_runs_weekly(): + """UAP layer refreshes weekly so each install pulls live NUFORC on a steady cadence.""" from services import data_fetcher with open(data_fetcher.__file__, "r", encoding="utf-8") as f: text = f.read() - assert "uap_sightings_daily" in text - idx = text.index("uap_sightings_daily") - block = text[max(0, idx - 600) : idx + 80] - assert 'day_of_week="mon"' not in block + assert "uap_sightings_weekly" in text + idx = text.index("uap_sightings_weekly") + block = text[max(0, idx - 600) : idx + 120] + assert 'day_of_week="mon"' in block def test_uap_cache_rejects_stale_rows_on_load(tmp_path, monkeypatch): diff --git a/docs/OUTBOUND_DATA.md b/docs/OUTBOUND_DATA.md index 10309d7..e07a8da 100644 --- a/docs/OUTBOUND_DATA.md +++ b/docs/OUTBOUND_DATA.md @@ -51,6 +51,14 @@ Shadowbroker is **self-hosted**: each install uses its own backend egress IP. Th - **Env:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true|false` overrides UI on all platforms - **Honesty:** Backend-only; no browser-direct LiveUAMap from end users. Stealth remains a functional tradeoff for Turnstile; disable layer or env if unacceptable +## UAP sightings (NUFORC map layer) + +- **Window:** last **60 days** (`NUFORC_RECENT_DAYS`, ~2 months) from **live** nuforc.org +- **Cadence:** **Weekly** (Monday 12:00 UTC) per install; typical yield **~400–500** geocoded pins +- **Between weeks:** `backend/data/nuforc_recent_sightings.json` (7-day TTL) so restarts do not wipe the layer +- **Immediate pull:** admin `GET /api/refresh` on that install +- **Not used for map pins:** stale Hugging Face mirror (frozen ~2023) unless live is down and mirror happens to have in-window rows + --- ## CCTV proxy Referer / Origin (#349)