mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-05 13:58:15 +02:00
fix(uap): weekly live NUFORC refresh with 7-day cache for operators
Each install pulls ~60-day sightings from nuforc.org every Monday; disk cache matches weekly cadence so users keep current pins between restarts. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -70,7 +70,9 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days).
|
||||
# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs.
|
||||
# NUFORC_RECENT_DAYS=60
|
||||
# NUFORC_CACHE_TTL_HOURS=168
|
||||
# On Windows, live scrape uses Python requests by default; optional:
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true
|
||||
# NUFORC enrichment index (HF dataset) is separate — opt-in only:
|
||||
|
||||
@@ -993,17 +993,19 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# UAP sightings (NUFORC) — daily at 12:00 UTC. Rolling ~60-day window from
|
||||
# live nuforc.org; disk cache is re-validated on every read.
|
||||
# UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window;
|
||||
# each self-hosted install pulls live nuforc.org so operators see current
|
||||
# reports (typically ~400–500 mappable pins). Disk cache TTL defaults to 7d.
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(
|
||||
lambda: fetch_uap_sightings(force_refresh=True),
|
||||
"fetch_uap_sightings",
|
||||
),
|
||||
"cron",
|
||||
day_of_week="mon",
|
||||
hour=12,
|
||||
minute=0,
|
||||
id="uap_sightings_daily",
|
||||
id="uap_sightings_weekly",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
@@ -701,6 +701,12 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1
|
||||
# practice, so a 0.3s spacing keeps us well under any soft throttle while
|
||||
# still rebuilding a full 12-month window in ~10 minutes.
|
||||
_NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3"))
|
||||
# Disk cache TTL — match the weekly scheduler so restarts between fetches still
|
||||
# serve the same rolling 60-day snapshot without hammering nuforc.org daily.
|
||||
_NUFORC_CACHE_TTL_S = max(
|
||||
3600,
|
||||
int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600,
|
||||
)
|
||||
_NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
||||
_NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json"
|
||||
_NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json"
|
||||
@@ -832,7 +838,7 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] |
|
||||
built_dt = datetime.fromisoformat(built) if built else None
|
||||
if built_dt is None:
|
||||
return None
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > 86400:
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S:
|
||||
return None
|
||||
if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS:
|
||||
logger.info(
|
||||
@@ -1646,11 +1652,12 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
"""Fetch last-year UAP sightings from NUFORC.
|
||||
"""Fetch rolling-window UAP sightings from live NUFORC.
|
||||
|
||||
Startup reads the cached daily snapshot when it is still fresh. The daily
|
||||
scheduler forces a rebuild so this layer updates once per day instead of
|
||||
churning continuously.
|
||||
Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS
|
||||
(default 168h / one week). The weekly scheduler forces a rebuild so every
|
||||
install refreshes the same ~60-day layer without daily load on nuforc.org.
|
||||
Operators can also POST /api/refresh (admin) to pull immediately.
|
||||
"""
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ SLO_REGISTRY: Dict[str, SLO] = {
|
||||
"uap_sightings": SLO(
|
||||
max_age_s=26 * _HOUR,
|
||||
min_rows=50,
|
||||
description="NUFORC rolling 60-day window (daily refresh)",
|
||||
description="NUFORC rolling 60-day window (weekly refresh)",
|
||||
),
|
||||
"wastewater": SLO(
|
||||
max_age_s=30 * _HOUR,
|
||||
|
||||
@@ -226,17 +226,17 @@ def test_fetch_uap_sightings_succeeds_when_fallback_returns_data(monkeypatch):
|
||||
assert canary_calls == [], "canary should not trip when fallback supplies data"
|
||||
|
||||
|
||||
def test_uap_scheduler_runs_daily():
|
||||
"""UAP layer refreshes daily so the rolling ~60-day window stays current."""
|
||||
def test_uap_scheduler_runs_weekly():
|
||||
"""UAP layer refreshes weekly so each install pulls live NUFORC on a steady cadence."""
|
||||
from services import data_fetcher
|
||||
|
||||
with open(data_fetcher.__file__, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
assert "uap_sightings_daily" in text
|
||||
idx = text.index("uap_sightings_daily")
|
||||
block = text[max(0, idx - 600) : idx + 80]
|
||||
assert 'day_of_week="mon"' not in block
|
||||
assert "uap_sightings_weekly" in text
|
||||
idx = text.index("uap_sightings_weekly")
|
||||
block = text[max(0, idx - 600) : idx + 120]
|
||||
assert 'day_of_week="mon"' in block
|
||||
|
||||
|
||||
def test_uap_cache_rejects_stale_rows_on_load(tmp_path, monkeypatch):
|
||||
|
||||
@@ -51,6 +51,14 @@ Shadowbroker is **self-hosted**: each install uses its own backend egress IP. Th
|
||||
- **Env:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true|false` overrides UI on all platforms
|
||||
- **Honesty:** Backend-only; no browser-direct LiveUAMap from end users. Stealth remains a functional tradeoff for Turnstile; disable layer or env if unacceptable
|
||||
|
||||
## UAP sightings (NUFORC map layer)
|
||||
|
||||
- **Window:** last **60 days** (`NUFORC_RECENT_DAYS`, ~2 months) from **live** nuforc.org
|
||||
- **Cadence:** **Weekly** (Monday 12:00 UTC) per install; typical yield **~400–500** geocoded pins
|
||||
- **Between weeks:** `backend/data/nuforc_recent_sightings.json` (7-day TTL) so restarts do not wipe the layer
|
||||
- **Immediate pull:** admin `GET /api/refresh` on that install
|
||||
- **Not used for map pins:** stale Hugging Face mirror (frozen ~2023) unless live is down and mirror happens to have in-window rows
|
||||
|
||||
---
|
||||
|
||||
## CCTV proxy Referer / Origin (#349)
|
||||
|
||||
Reference in New Issue
Block a user