mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-08 10:24:48 +02:00
350 lines
15 KiB
Python
350 lines
15 KiB
Python
"""Sprint 2A: Backend Reliability Core — regression tests.
|
||
|
||
Covers:
|
||
1. data_fetcher._run_tasks: future.result() now has a hard timeout; TimeoutError
|
||
is recorded as a failure, not an indefinite hang.
|
||
2. flights._fetch_supplemental_sources: cache read and write are both done under
|
||
_supplemental_cache_lock so the timestamp+data pair is atomic.
|
||
3. flights._enrich_with_opensky_and_supplemental (OpenSky path): cache check,
|
||
read, and write are all done under _opensky_cache_lock.
|
||
4. main._run_public_sync_cycle: reads _NODE_SYNC_STATE under _NODE_RUNTIME_LOCK.
|
||
5. main._public_infonet_sync_loop: reads _NODE_SYNC_STATE under _NODE_RUNTIME_LOCK.
|
||
6. main._record_public_push_result: reads _NODE_PUSH_STATE under _NODE_RUNTIME_LOCK
|
||
(build-snapshot-and-update is a single atomic block).
|
||
7. main._verify_loop: always passes verify_signatures=True regardless of any env var.
|
||
8. config.py: MESH_VERIFY_SIGNATURES field is no longer a recognised setting.
|
||
"""
|
||
|
||
import threading
|
||
import time
|
||
from unittest.mock import patch
|
||
|
||
import pytest
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 1. data_fetcher._run_tasks — TimeoutError propagates as failure
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestRunTasksTimeout:
|
||
"""_run_tasks must unblock within _TASK_HARD_TIMEOUT_S when a task hangs.
|
||
|
||
The fix uses futures.items() iteration so future.result(timeout=...) IS the
|
||
blocking call. as_completed() is no longer used because it blocks inside
|
||
__next__() waiting for completion — the timeout on result() would never be
|
||
reached for a hanging task under that pattern.
|
||
"""
|
||
|
||
def test_hanging_task_unblocks_run_tasks(self):
|
||
"""_run_tasks must return within timeout + epsilon even when a task hangs.
|
||
|
||
A real threading.Event holds the task indefinitely. _TASK_HARD_TIMEOUT_S
|
||
is patched to 0.3s so the test is fast. The wall-clock guard is 3× the
|
||
timeout to give generous CI headroom while still catching a true hang.
|
||
"""
|
||
import services.data_fetcher as df
|
||
|
||
hold = threading.Event() # never set — task blocks until TimeoutError
|
||
|
||
def hanging_task():
|
||
hold.wait() # blocks indefinitely
|
||
|
||
failure_names = []
|
||
|
||
def fake_record_failure(name, error, duration_s): # noqa: ARG001
|
||
failure_names.append(name)
|
||
|
||
SHORT_TIMEOUT = 0.3
|
||
wall_limit = SHORT_TIMEOUT * 3 + 1.0 # generous CI headroom
|
||
|
||
with patch.object(df, "_TASK_HARD_TIMEOUT_S", SHORT_TIMEOUT), \
|
||
patch("services.fetch_health.record_failure", fake_record_failure), \
|
||
patch("services.fetch_health.record_success", lambda *a, **kw: None):
|
||
started = time.perf_counter()
|
||
df._run_tasks("test", [hanging_task])
|
||
elapsed = time.perf_counter() - started
|
||
|
||
hold.set() # release the background thread so it can exit
|
||
|
||
assert elapsed < wall_limit, (
|
||
f"_run_tasks blocked for {elapsed:.2f}s — timeout not enforced "
|
||
f"(limit was {wall_limit:.2f}s)"
|
||
)
|
||
assert "hanging_task" in failure_names, (
|
||
"Timed-out task must be recorded via record_failure"
|
||
)
|
||
|
||
def test_as_completed_not_called_in_run_tasks(self):
|
||
"""_run_tasks must not call as_completed(futures) — that pattern makes
|
||
timeout= unreachable for hanging tasks."""
|
||
import inspect
|
||
import services.data_fetcher as df
|
||
source = inspect.getsource(df._run_tasks)
|
||
# The call expression — not a comment mention — must be absent.
|
||
assert "as_completed(futures)" not in source, (
|
||
"_run_tasks must not call as_completed(futures): "
|
||
"as_completed blocks in __next__() so result(timeout=) is never reached"
|
||
)
|
||
|
||
def test_as_completed_not_called_in_update_all_data(self):
|
||
"""update_all_data must not call as_completed(futures) for the same reason."""
|
||
import inspect
|
||
import services.data_fetcher as df
|
||
source = inspect.getsource(df.update_all_data)
|
||
assert "as_completed(futures)" not in source, (
|
||
"update_all_data must not call as_completed(futures)"
|
||
)
|
||
|
||
def test_hard_timeout_constant_present(self):
|
||
"""_TASK_HARD_TIMEOUT_S must be defined and positive in data_fetcher."""
|
||
import services.data_fetcher as df
|
||
assert hasattr(df, "_TASK_HARD_TIMEOUT_S")
|
||
assert df._TASK_HARD_TIMEOUT_S > 0
|
||
|
||
def test_future_result_called_with_timeout(self):
|
||
"""_run_tasks must pass timeout= to every future.result() call."""
|
||
import inspect
|
||
import services.data_fetcher as df
|
||
source = inspect.getsource(df._run_tasks)
|
||
assert "future.result(timeout=" in source, (
|
||
"_run_tasks must call future.result(timeout=...) not future.result()"
|
||
)
|
||
|
||
def test_update_all_data_future_result_called_with_timeout(self):
|
||
"""update_all_data must also pass timeout= to future.result()."""
|
||
import inspect
|
||
import services.data_fetcher as df
|
||
source = inspect.getsource(df.update_all_data)
|
||
assert "future.result(timeout=" in source, (
|
||
"update_all_data must call future.result(timeout=...) not future.result()"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 2 & 3. flights.py — locked cache access for OpenSky and supplemental
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestFlightsCacheLocks:
|
||
"""Verify that both cache pairs are protected by their respective locks."""
|
||
|
||
def test_supplemental_cache_lock_exists(self):
|
||
from services.fetchers import flights
|
||
assert hasattr(flights, "_supplemental_cache_lock")
|
||
assert isinstance(flights._supplemental_cache_lock, type(threading.Lock()))
|
||
|
||
def test_opensky_cache_lock_exists(self):
|
||
from services.fetchers import flights
|
||
assert hasattr(flights, "_opensky_cache_lock")
|
||
assert isinstance(flights._opensky_cache_lock, type(threading.Lock()))
|
||
|
||
def test_supplemental_read_uses_lock(self):
|
||
"""Cache-hit path in _fetch_supplemental_sources acquires the lock."""
|
||
from services.fetchers import flights
|
||
|
||
lock_acquired = []
|
||
|
||
class TrackingLock:
|
||
def __enter__(self):
|
||
lock_acquired.append(True)
|
||
return self
|
||
def __exit__(self, *args):
|
||
pass
|
||
|
||
with patch.object(flights, "_supplemental_cache_lock", TrackingLock()), \
|
||
patch.object(flights, "last_supplemental_fetch", time.time()):
|
||
# Cache is fresh — should hit the locked early-return path
|
||
flights._fetch_supplemental_sources(set())
|
||
|
||
assert len(lock_acquired) >= 1, "Lock must be acquired on cache-hit read"
|
||
|
||
def test_supplemental_write_uses_lock(self):
|
||
"""Cache-miss path in _fetch_supplemental_sources acquires the lock for write."""
|
||
from services.fetchers import flights
|
||
import inspect
|
||
source = inspect.getsource(flights._fetch_supplemental_sources)
|
||
# Both cache writes must be inside a with _supplemental_cache_lock block
|
||
assert "_supplemental_cache_lock" in source
|
||
# The write of the pair (timestamp + data) must appear inside the context
|
||
assert "cached_supplemental_flights = new_supplemental" in source
|
||
assert "last_supplemental_fetch = now" in source
|
||
|
||
def test_opensky_cache_lock_used_in_enrich(self):
|
||
"""_enrich_with_opensky_and_supplemental uses _opensky_cache_lock."""
|
||
from services.fetchers import flights
|
||
import inspect
|
||
source = inspect.getsource(flights._enrich_with_opensky_and_supplemental)
|
||
assert "_opensky_cache_lock" in source
|
||
|
||
def test_opensky_snapshot_local_variable_used(self):
|
||
"""After locking, a local opensky_snapshot is used for merging, not the global."""
|
||
from services.fetchers import flights
|
||
import inspect
|
||
source = inspect.getsource(flights._enrich_with_opensky_and_supplemental)
|
||
assert "opensky_snapshot" in source
|
||
# The merge loop must iterate over the local snapshot, not the global
|
||
assert "for osf in opensky_snapshot" in source
|
||
|
||
def test_concurrent_supplemental_reads_consistent(self):
|
||
"""Two threads reading _fetch_supplemental_sources on a warm cache both
|
||
see a consistent (non-empty) list without interleaving with a write."""
|
||
from services.fetchers import flights
|
||
|
||
original_fetch = flights.last_supplemental_fetch
|
||
original_cache = flights.cached_supplemental_flights
|
||
|
||
# Seed the cache
|
||
flights.last_supplemental_fetch = time.time()
|
||
flights.cached_supplemental_flights = [{"hex": "abc123", "lat": 1.0, "lon": 2.0}]
|
||
|
||
results = []
|
||
errors = []
|
||
|
||
def reader():
|
||
try:
|
||
result = flights._fetch_supplemental_sources(set())
|
||
results.append(result)
|
||
except Exception as e:
|
||
errors.append(e)
|
||
|
||
threads = [threading.Thread(target=reader) for _ in range(10)]
|
||
for t in threads:
|
||
t.start()
|
||
for t in threads:
|
||
t.join()
|
||
|
||
# Restore original state
|
||
flights.last_supplemental_fetch = original_fetch
|
||
flights.cached_supplemental_flights = original_cache
|
||
|
||
assert not errors, f"Concurrent reads raised exceptions: {errors}"
|
||
assert all(len(r) == 1 for r in results), "All readers should see the seeded entry"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 4 & 5. main.py — node-state reads are locked
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestNodeStateLockedReads:
|
||
"""_NODE_SYNC_STATE reads at the decision points must use _NODE_RUNTIME_LOCK."""
|
||
|
||
def test_run_public_sync_cycle_reads_sync_state_under_lock(self):
|
||
"""The assignment 'current_state = get_sync_state()' in
|
||
_run_public_sync_cycle must occur inside _NODE_RUNTIME_LOCK."""
|
||
import inspect
|
||
import main
|
||
source = inspect.getsource(main._run_public_sync_cycle)
|
||
# The lock acquisition must appear before the state read
|
||
lock_pos = source.find("_NODE_RUNTIME_LOCK")
|
||
read_pos = source.find("current_state = get_sync_state()")
|
||
assert lock_pos != -1, "_NODE_RUNTIME_LOCK must appear in _run_public_sync_cycle"
|
||
assert read_pos != -1, "current_state = get_sync_state() must appear in _run_public_sync_cycle"
|
||
# Lock block must precede the read (the read should be INSIDE the with block)
|
||
assert lock_pos < read_pos, (
|
||
"_NODE_RUNTIME_LOCK must be acquired before current_state = get_sync_state()"
|
||
)
|
||
|
||
def test_public_infonet_sync_loop_reads_sync_state_under_lock(self):
|
||
"""The assignment 'state = get_sync_state()' in _public_infonet_sync_loop
|
||
must occur inside _NODE_RUNTIME_LOCK."""
|
||
import inspect
|
||
import main
|
||
source = inspect.getsource(main._public_infonet_sync_loop)
|
||
lock_pos = source.find("_NODE_RUNTIME_LOCK")
|
||
read_pos = source.find("state = get_sync_state()")
|
||
assert lock_pos != -1
|
||
assert read_pos != -1
|
||
assert lock_pos < read_pos
|
||
|
||
def test_record_push_result_reads_push_state_under_lock(self):
|
||
"""_record_public_push_result must read _NODE_PUSH_STATE inside the lock,
|
||
not in a snapshot dict built outside it."""
|
||
import inspect
|
||
import main
|
||
source = inspect.getsource(main._record_public_push_result)
|
||
lock_pos = source.find("_NODE_RUNTIME_LOCK")
|
||
push_read_pos = source.find("_NODE_PUSH_STATE.get")
|
||
assert lock_pos != -1
|
||
assert push_read_pos != -1, "_NODE_PUSH_STATE.get must still be present"
|
||
assert lock_pos < push_read_pos, (
|
||
"The _NODE_PUSH_STATE.get read must be INSIDE _NODE_RUNTIME_LOCK"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 6. MESH_VERIFY_SIGNATURES — hardcoded True in verify loop
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestVerifySignaturesHardcoded:
|
||
"""The background verify loop must always pass verify_signatures=True.
|
||
|
||
MESH_VERIFY_SIGNATURES in config.py must no longer control the audit loop.
|
||
"""
|
||
|
||
def test_verify_loop_does_not_read_mesh_verify_signatures(self):
|
||
"""_verify_loop in main.py must not call get_settings().MESH_VERIFY_SIGNATURES."""
|
||
import inspect
|
||
import main
|
||
source = inspect.getsource(main.lifespan)
|
||
# The _verify_loop is a nested function inside lifespan — get its source
|
||
# by extracting the full lifespan body
|
||
assert "MESH_VERIFY_SIGNATURES" not in source, (
|
||
"_verify_loop must no longer read MESH_VERIFY_SIGNATURES from settings"
|
||
)
|
||
|
||
def test_verify_loop_passes_verify_signatures_true(self):
|
||
"""The validate_chain_incremental call must use verify_signatures=True (literal)."""
|
||
import inspect
|
||
import main
|
||
source = inspect.getsource(main.lifespan)
|
||
assert "verify_signatures=True" in source, (
|
||
"validate_chain_incremental must be called with verify_signatures=True"
|
||
)
|
||
|
||
def test_config_does_not_expose_mesh_verify_signatures(self):
|
||
"""Settings class must no longer have MESH_VERIFY_SIGNATURES as a field."""
|
||
from services.config import Settings
|
||
assert not hasattr(Settings, "MESH_VERIFY_SIGNATURES") or \
|
||
"MESH_VERIFY_SIGNATURES" not in Settings.model_fields, (
|
||
"MESH_VERIFY_SIGNATURES must be removed from Settings — "
|
||
"it can no longer silently weaken the audit loop"
|
||
)
|
||
|
||
def test_mesh_verify_signatures_env_var_ignored(self):
|
||
"""Setting MESH_VERIFY_SIGNATURES=false in env must have no effect on Settings."""
|
||
import os
|
||
from functools import lru_cache
|
||
import services.config as cfg
|
||
|
||
# Force a fresh Settings parse with the flag set to false
|
||
cfg.get_settings.cache_clear()
|
||
original = os.environ.get("MESH_VERIFY_SIGNATURES")
|
||
os.environ["MESH_VERIFY_SIGNATURES"] = "false"
|
||
try:
|
||
settings = cfg.get_settings()
|
||
# The field should simply not exist on the object
|
||
assert not hasattr(settings, "MESH_VERIFY_SIGNATURES"), (
|
||
"MESH_VERIFY_SIGNATURES must not be a recognised settings field"
|
||
)
|
||
finally:
|
||
cfg.get_settings.cache_clear()
|
||
if original is None:
|
||
os.environ.pop("MESH_VERIFY_SIGNATURES", None)
|
||
else:
|
||
os.environ["MESH_VERIFY_SIGNATURES"] = original
|
||
|
||
def test_append_time_enforcement_unchanged(self):
|
||
"""mesh_hashchain.Infonet.append must still enforce signatures unconditionally
|
||
(no verify_signatures flag on the append path — this is a read-only check)."""
|
||
import inspect
|
||
from services.mesh.mesh_hashchain import Infonet
|
||
source = inspect.getsource(Infonet.append)
|
||
# append() must still require signature fields
|
||
assert "Missing signature fields" in source, (
|
||
"Infonet.append must still raise on missing signature — "
|
||
"append-time enforcement must remain intact"
|
||
)
|
||
assert "verify_signature" in source, (
|
||
"Infonet.append must still call verify_signature — "
|
||
"append-time enforcement must remain intact"
|
||
)
|