diff --git a/backend/routers/tools.py b/backend/routers/tools.py index ee8a654..41fa6bb 100644 --- a/backend/routers/tools.py +++ b/backend/routers/tools.py @@ -85,6 +85,39 @@ async def api_geocode_reverse( return await asyncio.to_thread(reverse_geocode, lat, lng, local_only) +# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ─── +@router.get("/api/wikipedia/summary") +@limiter.limit("60/minute") +def api_wikipedia_summary( + request: Request, + title: str = Query(..., min_length=1, max_length=256), +): + """Proxy Wikipedia REST summaries through the self-hosted backend.""" + from services.region_dossier import fetch_wikipedia_page_summary + + summary = fetch_wikipedia_page_summary(title) + if summary is None: + return JSONResponse(status_code=404, content={"detail": "not_found"}) + return summary + + +class WikidataSparqlRequest(BaseModel): + query: str + + +@router.post("/api/wikidata/sparql") +@limiter.limit("30/minute") +def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest): + """Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org.""" + from services.region_dossier import fetch_wikidata_sparql_bindings + + q = (body.query or "").strip() + if len(q) > 12_000: + raise HTTPException(400, "SPARQL query too large") + bindings = fetch_wikidata_sparql_bindings(q) + return {"bindings": bindings} + + # ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ────────── # These three endpoints relay external Sentinel / Planetary Computer # requests through the backend to avoid browser CORS blocks. They are diff --git a/backend/services/network_utils.py b/backend/services/network_utils.py index a94a64f..e587221 100644 --- a/backend/services/network_utils.py +++ b/backend/services/network_utils.py @@ -146,7 +146,12 @@ def get_operator_handle() -> str: # 3. On-disk handle from a previous run. persisted = _load_persisted_operator_handle() if persisted: - _OPERATOR_HANDLE_CACHE = _normalize_handle(persisted) + normalized = _normalize_handle(persisted) + # Migrate legacy auto-generated handles (pre-Round-7a ``shadow-`` prefix). + if normalized.startswith("shadow-"): + normalized = f"operator-{normalized[len('shadow-'):]}" + _persist_operator_handle(normalized) + _OPERATOR_HANDLE_CACHE = normalized return _OPERATOR_HANDLE_CACHE # 4. Generate, persist, return. @@ -178,7 +183,7 @@ def outbound_user_agent(purpose: str = "") -> str: Returns something like:: - Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia; + Shadowbroker/0.9 (operator: operator-7f3a92; purpose: wikipedia; +https://github.com/BigBodyCobain/Shadowbroker/issues) The ``purpose`` is optional but recommended — it tells the upstream diff --git a/backend/services/region_dossier.py b/backend/services/region_dossier.py index 89871c2..fee65e1 100644 --- a/backend/services/region_dossier.py +++ b/backend/services/region_dossier.py @@ -301,3 +301,36 @@ def get_region_dossier(lat: float, lng: float) -> dict: dossier_cache[cache_key] = result return result + + +def fetch_wikipedia_page_summary(title: str) -> dict | None: + """Wikipedia REST summary for a page title (backend-proxied for #360).""" + trimmed = (title or "").strip() + if not trimmed: + return None + data = _fetch_local_wiki_summary(trimmed, "") + if not data.get("extract") and not data.get("description"): + return None + return { + "title": trimmed, + "description": data.get("description", ""), + "extract": data.get("extract", ""), + "thumbnail": data.get("thumbnail", ""), + "type": "standard", + } + + +def fetch_wikidata_sparql_bindings(sparql: str) -> list: + """Run a Wikidata SPARQL query; returns bindings list (empty on failure).""" + trimmed = (sparql or "").strip() + if not trimmed: + return [] + url = f"https://query.wikidata.org/sparql?query={quote(trimmed)}&format=json" + try: + res = fetch_with_curl(url, timeout=8, headers=_wikimedia_request_headers()) + if res.status_code == 200: + bindings = res.json().get("results", {}).get("bindings", []) + return bindings if isinstance(bindings, list) else [] + except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e: + logger.warning("Wikidata SPARQL failed: %s", e) + return [] diff --git a/backend/tests/test_wikimedia_proxy_routes.py b/backend/tests/test_wikimedia_proxy_routes.py new file mode 100644 index 0000000..de69df2 --- /dev/null +++ b/backend/tests/test_wikimedia_proxy_routes.py @@ -0,0 +1,34 @@ +"""Backend Wikimedia proxy routes (#360).""" +from __future__ import annotations + +from unittest.mock import patch + +import pytest + + +def test_wikipedia_summary_route_returns_payload(client): + sample = { + "title": "Paris", + "description": "capital", + "extract": "Paris is the capital of France.", + "thumbnail": "https://example.org/t.jpg", + "type": "standard", + } + with patch( + "services.region_dossier.fetch_wikipedia_page_summary", + return_value=sample, + ): + r = client.get("/api/wikipedia/summary", params={"title": "Paris"}) + assert r.status_code == 200 + assert r.json()["title"] == "Paris" + + +def test_wikidata_sparql_route_returns_bindings(client): + bindings = [{"x": {"value": "1"}}] + with patch( + "services.region_dossier.fetch_wikidata_sparql_bindings", + return_value=bindings, + ): + r = client.post("/api/wikidata/sparql", json={"query": "SELECT ?x WHERE {}"}) + assert r.status_code == 200 + assert r.json()["bindings"] == bindings diff --git a/frontend/src/__tests__/page/pageDecomposition.test.ts b/frontend/src/__tests__/page/pageDecomposition.test.ts index e3e4d5a..0b56e80 100644 --- a/frontend/src/__tests__/page/pageDecomposition.test.ts +++ b/frontend/src/__tests__/page/pageDecomposition.test.ts @@ -203,6 +203,19 @@ describe('page.tsx decomposition — no admin-session/proxy regression', () => { const locateBar = readAppFile('LocateBar.tsx'); expect(locateBar).toContain('API_BASE'); expect(locateBar).toContain('/api/geocode/search'); + expect(locateBar).not.toContain('nominatim.openstreetmap.org'); + }); + + it('useRegionDossier uses backend dossier APIs (no browser-direct enrichment)', () => { + const hook = fs.readFileSync( + path.resolve(__dirname, '../../hooks/useRegionDossier.ts'), + 'utf-8', + ); + expect(hook).toContain('/api/region-dossier'); + expect(hook).toContain('/api/sentinel2/search'); + expect(hook).not.toContain('nominatim.openstreetmap.org'); + expect(hook).not.toContain('planetarycomputer.microsoft.com'); + expect(hook).not.toContain('restcountries.com'); }); }); diff --git a/frontend/src/__tests__/utils/wikimediaClient.test.ts b/frontend/src/__tests__/utils/wikimediaClient.test.ts index 318dd17..3242ac3 100644 --- a/frontend/src/__tests__/utils/wikimediaClient.test.ts +++ b/frontend/src/__tests__/utils/wikimediaClient.test.ts @@ -1,21 +1,8 @@ /** - * Issues #218 / #219 / #220 (tg12 external audit) + Round 7a: - * - * Every browser-direct call to Wikipedia or Wikidata must send the - * `Api-User-Agent` header that Wikimedia's UA policy asks for, AND must - * embed the per-install operator handle so Wikimedia can rate-limit / - * contact the specific operator instead of treating "Shadowbroker" as - * one giant entity. - * - * These tests pin both requirements on the shared `lib/wikimediaClient` - * helper that WikiImage, NewsFeed, and useRegionDossier all route - * through. A future refactor that drops either the header OR the - * per-operator handle gets a loud test failure rather than a silent - * ToS / privacy regression. + * #360: Wikipedia / Wikidata traffic is proxied via the self-hosted backend. */ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { - buildWikimediaUserAgent, fetchWikipediaSummary, fetchWikidataSparql, _resetWikimediaClientCacheForTests, @@ -23,18 +10,6 @@ import { const originalFetch = globalThis.fetch; -// Helper: stub fetch so calls to /api/settings/operator-handle return a -// known handle, and everything else proxies to whatever the test set up. -function withHandle(handle: string, otherFetch: typeof globalThis.fetch) { - return vi.fn(async (input: any, init?: RequestInit) => { - const url = String(input); - if (url.endsWith('/api/settings/operator-handle')) { - return new Response(JSON.stringify({ handle }), { status: 200 }); - } - return otherFetch(input, init); - }); -} - describe('lib/wikimediaClient', () => { beforeEach(() => { _resetWikimediaClientCacheForTests(); @@ -45,194 +20,78 @@ describe('lib/wikimediaClient', () => { vi.restoreAllMocks(); }); - it('builds a stable per-operator Api-User-Agent with contact path', async () => { - globalThis.fetch = withHandle( - 'operator-abc123', - vi.fn(async () => new Response('{}', { status: 200 })) as any, - ) as any; - const ua = await buildWikimediaUserAgent('wikipedia-summary'); - expect(ua).toContain('Shadowbroker'); - expect(ua.toLowerCase()).toContain('github.com'); - expect(ua.toLowerCase()).toContain('issues'); - expect(ua).toContain('operator: operator-abc123'); - expect(ua).toContain('purpose: wikipedia-summary'); - }); - - it('falls back to "operator-offline" when handle endpoint is unreachable', async () => { + it('fetches Wikipedia summary through backend proxy', async () => { + const calls: string[] = []; globalThis.fetch = vi.fn(async (input: any) => { - const url = String(input); - if (url.endsWith('/api/settings/operator-handle')) { - return new Response('forbidden', { status: 403 }); - } - return new Response('{}', { status: 200 }); - }) as any; - const ua = await buildWikimediaUserAgent('test'); - expect(ua).toContain('operator: operator-offline'); - }); - - it('sends per-operator Api-User-Agent on Wikipedia summary fetch', async () => { - const wikiCalls: Array<{ url: string; init?: RequestInit }> = []; - const baseFetch = vi.fn(async (url: any, init?: RequestInit) => { - wikiCalls.push({ url: String(url), init }); + calls.push(String(input)); return new Response( JSON.stringify({ - type: 'standard', title: 'Boeing 747', description: 'aircraft', extract: 'long extract', - thumbnail: { source: 'https://example.org/thumb.jpg' }, - }), - { status: 200 }, - ); - }); - globalThis.fetch = withHandle('operator-test01', baseFetch as any) as any; - - const summary = await fetchWikipediaSummary('Boeing 747'); - expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg'); - // wikiCalls only captures calls to non-handle URLs. - expect(wikiCalls).toHaveLength(1); - const headers = (wikiCalls[0].init?.headers || {}) as Record; - expect(headers['Api-User-Agent']).toContain('operator: operator-test01'); - expect(headers['Api-User-Agent']).toContain('purpose: wikipedia-summary'); - }); - - it('sends per-operator Api-User-Agent on Wikidata SPARQL fetch', async () => { - const calls: Array<{ url: string; init?: RequestInit }> = []; - const baseFetch = vi.fn(async (url: any, init?: RequestInit) => { - calls.push({ url: String(url), init }); - return new Response( - JSON.stringify({ - results: { bindings: [{ leaderLabel: { value: 'Test Leader' } }] }, - }), - { status: 200 }, - ); - }); - globalThis.fetch = withHandle('operator-sparql', baseFetch as any) as any; - - const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }'); - expect(bindings).toHaveLength(1); - const headers = (calls[0].init?.headers || {}) as Record; - expect(headers['Api-User-Agent']).toContain('operator: operator-sparql'); - expect(headers['Api-User-Agent']).toContain('purpose: wikidata-sparql'); - expect(headers['Accept']).toBe('application/sparql-results+json'); - }); - - it('handle endpoint is queried only ONCE across many wiki fetches', async () => { - let handleCalls = 0; - let wikiCalls = 0; - globalThis.fetch = vi.fn(async (input: any) => { - const url = String(input); - if (url.endsWith('/api/settings/operator-handle')) { - handleCalls++; - return new Response(JSON.stringify({ handle: 'operator-cache' }), { status: 200 }); - } - wikiCalls++; - return new Response( - JSON.stringify({ + thumbnail: 'https://example.org/thumb.jpg', type: 'standard', - title: 'X', - description: '', - extract: '', - thumbnail: { source: 'https://example.org/x.jpg' }, }), { status: 200 }, ); }) as any; - await fetchWikipediaSummary('Eiffel Tower'); - await fetchWikipediaSummary('Mount Fuji'); - await fetchWikipediaSummary('Statue of Liberty'); - expect(handleCalls).toBe(1); - expect(wikiCalls).toBe(3); + const summary = await fetchWikipediaSummary('Boeing 747'); + expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg'); + expect(calls).toHaveLength(1); + expect(calls[0]).toContain('/api/wikipedia/summary'); + expect(calls[0]).not.toContain('wikipedia.org'); }); - it('shares cache across consecutive callers for the same Wikipedia title', async () => { - let fetchCount = 0; - const baseFetch = vi.fn(async () => { - fetchCount++; + it('fetches Wikidata SPARQL through backend proxy', async () => { + const calls: Array<{ url: string; init?: RequestInit }> = []; + globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => { + calls.push({ url: String(url), init }); return new Response( JSON.stringify({ - type: 'standard', - title: 'Eiffel Tower', - description: 'iron lattice tower', - extract: '...', - thumbnail: { source: 'https://example.org/eiffel.jpg' }, + bindings: [{ leaderLabel: { value: 'Test Leader' } }], }), { status: 200 }, ); - }); - globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any; + }) as any; - const a = await fetchWikipediaSummary('Eiffel Tower'); - const b = await fetchWikipediaSummary('Eiffel Tower'); - expect(fetchCount).toBe(1); - expect(a?.thumbnail).toBe(b?.thumbnail); + const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }'); + expect(bindings).toHaveLength(1); + expect(calls).toHaveLength(1); + expect(calls[0].url).toContain('/api/wikidata/sparql'); + expect(calls[0].init?.method).toBe('POST'); + expect(calls[0].url).not.toContain('wikidata.org'); }); - it('deduplicates concurrent in-flight requests for the same title', async () => { - let fetchCount = 0; - const baseFetch = vi.fn(async () => { - fetchCount++; - await new Promise((r) => setTimeout(r, 5)); + it('deduplicates concurrent Wikipedia summary requests', async () => { + let hits = 0; + globalThis.fetch = vi.fn(async () => { + hits += 1; return new Response( JSON.stringify({ - type: 'standard', title: 'Mount Fuji', - description: 'stratovolcano', - extract: '...', - thumbnail: { source: 'https://example.org/fuji.jpg' }, + description: 'mountain', + extract: 'extract', + thumbnail: '', + type: 'standard', }), { status: 200 }, ); - }); - globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any; + }) as any; const [a, b, c] = await Promise.all([ fetchWikipediaSummary('Mount Fuji'), fetchWikipediaSummary('Mount Fuji'), fetchWikipediaSummary('Mount Fuji'), ]); - expect(fetchCount).toBe(1); - expect(a?.thumbnail).toBe('https://example.org/fuji.jpg'); + expect(a?.title).toBe('Mount Fuji'); expect(b).toEqual(a); expect(c).toEqual(a); + expect(hits).toBe(1); }); - it('returns null on disambiguation pages without throwing', async () => { - globalThis.fetch = withHandle( - 'operator-cache', - vi.fn(async () => - new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }), - ) as any, - ) as any; - const summary = await fetchWikipediaSummary('Mercury'); - expect(summary).toBeNull(); - }); - - it('returns null on HTTP error without throwing', async () => { - globalThis.fetch = withHandle( - 'operator-cache', - vi.fn(async () => new Response('not found', { status: 404 })) as any, - ) as any; - const summary = await fetchWikipediaSummary('Nonexistent Article 12345'); - expect(summary).toBeNull(); - }); - - it('returns null on network error without throwing', async () => { - globalThis.fetch = withHandle( - 'operator-cache', - vi.fn(async () => { - throw new Error('network down'); - }) as any, - ) as any; - const summary = await fetchWikipediaSummary('Anything'); - expect(summary).toBeNull(); - }); - - it('returns null on empty input without fetching anything', async () => { - globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any; - expect(await fetchWikipediaSummary('')).toBeNull(); - expect(await fetchWikipediaSummary(' ')).toBeNull(); - expect(globalThis.fetch).not.toHaveBeenCalled(); + it('returns null on Wikipedia 404', async () => { + globalThis.fetch = vi.fn(async () => new Response('{}', { status: 404 })) as any; + expect(await fetchWikipediaSummary('Nonexistent Article 12345')).toBeNull(); }); }); diff --git a/frontend/src/app/LocateBar.tsx b/frontend/src/app/LocateBar.tsx index 04e0b8f..c0bddcf 100644 --- a/frontend/src/app/LocateBar.tsx +++ b/frontend/src/app/LocateBar.tsx @@ -12,6 +12,7 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number, const [value, setValue] = useState(''); const [results, setResults] = useState<{ label: string; lat: number; lng: number }[]>([]); const [loading, setLoading] = useState(false); + const [searchError, setSearchError] = useState(null); const inputRef = useRef(null); const timerRef = useRef | null>(null); const searchAbortRef = useRef(null); @@ -58,14 +59,15 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number, if (searchAbortRef.current) searchAbortRef.current.abort(); if (q.trim().length < 2) { setResults([]); + setSearchError(null); return; } timerRef.current = setTimeout(async () => { setLoading(true); + setSearchError(null); searchAbortRef.current = new AbortController(); const signal = searchAbortRef.current.signal; try { - // Try backend proxy first (has caching + rate-limit compliance) const res = await fetch( `${API_BASE}/api/geocode/search?q=${encodeURIComponent(q)}&limit=5`, { signal }, @@ -80,43 +82,19 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number, }), ); setResults(mapped); + if (mapped.length === 0) { + setSearchError('No places found'); + } } else { - // Backend proxy returned an error — fall back to direct Nominatim - console.warn(`[Locate] Proxy returned HTTP ${res.status}, falling back to Nominatim`); - const directRes = await fetch( - `https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`, - { headers: { 'Accept-Language': 'en' }, signal }, - ); - const data = await directRes.json(); - setResults( - data.map((r: { display_name: string; lat: string; lon: string }) => ({ - label: r.display_name, - lat: parseFloat(r.lat), - lng: parseFloat(r.lon), - })), - ); + console.warn(`[Locate] Geocode proxy HTTP ${res.status}`); + setResults([]); + setSearchError('Place search unavailable — check backend connection'); } } catch (err) { if ((err as Error)?.name !== 'AbortError') { - // Proxy completely failed — try direct Nominatim as last resort - try { - const directRes = await fetch( - `https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`, - { headers: { 'Accept-Language': 'en' } }, - ); - const data = await directRes.json(); - setResults( - data.map((r: { display_name: string; lat: string; lon: string }) => ({ - label: r.display_name, - lat: parseFloat(r.lat), - lng: parseFloat(r.lon), - })), - ); - } catch { - setResults([]); - } - } else { + console.warn('[Locate] Geocode proxy failed:', err); setResults([]); + setSearchError('Place search unavailable — check backend connection'); } } finally { setLoading(false); @@ -216,6 +194,11 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number, + {searchError && results.length === 0 && !loading && value.trim().length >= 2 && ( +
+ {searchError} +
+ )} {results.length > 0 && (
{results.map((r, i) => ( diff --git a/frontend/src/hooks/useRegionDossier.ts b/frontend/src/hooks/useRegionDossier.ts index 5311f2c..c4619bf 100644 --- a/frontend/src/hooks/useRegionDossier.ts +++ b/frontend/src/hooks/useRegionDossier.ts @@ -1,11 +1,10 @@ import { useCallback, useState, useEffect } from 'react'; import type { RegionDossier, SelectedEntity } from '@/types/dashboard'; -import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient'; +import { API_BASE } from '@/lib/api'; // ─── CACHE ───────────────────────────────────────────────────────────────── -// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL. const _dossierCache = new Map(); -const CACHE_TTL = 86400_000; // 24 hours in ms +const CACHE_TTL = 86400_000; function getCached(lat: number, lng: number): RegionDossier | null { const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`; @@ -18,14 +17,12 @@ function getCached(lat: number, lng: number): RegionDossier | null { function setCache(lat: number, lng: number, data: RegionDossier) { const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`; _dossierCache.set(key, { data, ts: Date.now() }); - // Evict oldest entries if cache exceeds 500 if (_dossierCache.size > 500) { const oldest = _dossierCache.keys().next().value; if (oldest) _dossierCache.delete(oldest); } } -// ─── ESRI WORLD IMAGERY FALLBACK ─────────────────────────────────────────── function buildLocalSentinelFallback(lat: number, lng: number) { const latSpan = 0.18; const lngSpan = 0.24; @@ -80,140 +77,56 @@ function buildLimitedDossier(lat: number, lng: number, error?: string): RegionDo } as RegionDossier; } -// ─── BROWSER-DIRECT API CALLS ────────────────────────────────────────────── -// All external APIs below support CORS — no backend proxy needed. +/** Self-hosted backend routes (#351) — no browser-direct third-party dossier calls. */ +async function fetchDossierBundle( + lat: number, + lng: number, +): Promise<{ dossier: Record | null; sentinel2: Record }> { + const qs = `lat=${encodeURIComponent(lat)}&lng=${encodeURIComponent(lng)}`; + const [dossierRes, sentinelRes] = await Promise.allSettled([ + fetch(`${API_BASE}/api/region-dossier?${qs}`), + fetch(`${API_BASE}/api/sentinel2/search?${qs}`), + ]); -/** Reverse geocode via Nominatim (direct browser call). */ -async function reverseGeocode(lat: number, lng: number) { - const url = - `https://nominatim.openstreetmap.org/reverse?` + - `lat=${lat}&lon=${lng}&format=json&zoom=10&addressdetails=1&accept-language=en`; - const res = await fetch(url, { - headers: { 'User-Agent': 'ShadowBroker-OSINT/1.0 (live-risk-dashboard)' }, - }); - if (!res.ok) throw new Error(`Nominatim HTTP ${res.status}`); - const data = await res.json(); - const addr = data.address || {}; + let dossier: Record | null = null; + if (dossierRes.status === 'fulfilled' && dossierRes.value.ok) { + dossier = await dossierRes.value.json(); + } else if (dossierRes.status === 'fulfilled') { + console.warn('[Dossier] Backend region-dossier HTTP', dossierRes.value.status); + } else { + console.warn('[Dossier] Backend region-dossier failed:', dossierRes.reason); + } + + let sentinel2: Record = buildLocalSentinelFallback(lat, lng); + if (sentinelRes.status === 'fulfilled' && sentinelRes.value.ok) { + sentinel2 = await sentinelRes.value.json(); + } else if (sentinelRes.status === 'rejected') { + console.warn('[Dossier] Backend sentinel2/search failed:', sentinelRes.reason); + } + + return { dossier, sentinel2 }; +} + +function dossierFromBackend( + lat: number, + lng: number, + raw: Record, + sentinel2: Record, +): RegionDossier { + const coords = (raw.coordinates as { lat?: number; lng?: number }) || { lat, lng }; return { - city: addr.city || addr.town || addr.village || addr.county || '', - state: addr.state || addr.region || '', - country: addr.country || '', - country_code: (addr.country_code || '').toUpperCase(), - display_name: data.display_name || '', - }; + lat, + lng, + coordinates: coords, + location: raw.location ?? {}, + country: raw.country ?? null, + local: raw.local ?? null, + error: raw.error as string | undefined, + warning: raw.warning as string | undefined, + sentinel2, + } as RegionDossier; } -/** Fetch country data from RestCountries (direct browser call). */ -async function fetchCountryData(countryCode: string) { - if (!countryCode) return {}; - const url = - `https://restcountries.com/v3.1/alpha/${countryCode}` + - `?fields=name,population,capital,languages,region,subregion,area,currencies,borders,flag`; - const res = await fetch(url); - if (!res.ok) throw new Error(`RestCountries HTTP ${res.status}`); - const data = await res.json(); - return Array.isArray(data) ? data[0] || {} : data || {}; -} - -/** Fetch head of state + government type from Wikidata SPARQL. - * - * Issue #218 (tg12): routes through lib/wikimediaClient so the - * Api-User-Agent header is set per Wikimedia's UA policy. - */ -async function fetchLeader(countryName: string) { - if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' }; - const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'"); - const sparql = ` - SELECT ?leaderLabel ?govTypeLabel WHERE { - ?country wdt:P31 wd:Q6256 ; - rdfs:label "${safeName}"@en . - OPTIONAL { ?country wdt:P35 ?leader . } - OPTIONAL { ?country wdt:P122 ?govType . } - SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } - } LIMIT 1 - `; - const results = await fetchWikidataSparql<{ - leaderLabel?: { value: string }; - govTypeLabel?: { value: string }; - }>(sparql); - if (results && results.length > 0) { - return { - leader: results[0].leaderLabel?.value || 'Unknown', - government_type: results[0].govTypeLabel?.value || 'Unknown', - }; - } - return { leader: 'Unknown', government_type: 'Unknown' }; -} - -/** Fetch Wikipedia summary for a place. - * - * Issue #219 (tg12): routes through lib/wikimediaClient so the - * Api-User-Agent header is set per Wikimedia's UA policy, AND the - * shared cache means consecutive useRegionDossier + WikiImage + - * NewsFeed lookups for the same article all hit the same slot. - */ -async function fetchLocalWikiSummary(placeName: string, countryName = '') { - if (!placeName) return {}; - const candidates = [placeName]; - if (countryName) candidates.push(`${placeName}, ${countryName}`); - for (const name of candidates) { - const summary = await fetchWikipediaSummary(name); - if (summary) { - return { - description: summary.description, - extract: summary.extract, - thumbnail: summary.thumbnail, - }; - } - } - return {}; -} - -/** Search for Sentinel-2 imagery via Microsoft Planetary Computer STAC (direct browser call). */ -async function fetchSentinel2Direct(lat: number, lng: number) { - const now = new Date(); - const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000); - const payload = { - collections: ['sentinel-2-l2a'], - intersects: { type: 'Point', coordinates: [lng, lat] }, - datetime: `${thirtyDaysAgo.toISOString()}/${now.toISOString()}`, - sortby: [{ field: 'datetime', direction: 'desc' }], - limit: 3, - query: { 'eo:cloud_cover': { lt: 30 } }, - }; - - const res = await fetch('https://planetarycomputer.microsoft.com/api/stac/v1/search', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(payload), - }); - - if (!res.ok) throw new Error(`Planetary Computer HTTP ${res.status}`); - const data = await res.json(); - const features = data.features || []; - if (!features.length) return null; // No scenes — caller uses Esri fallback - - const scenes = features.map((item: any) => { - const assets = item.assets || {}; - const rendered = assets.rendered_preview || {}; - const thumbnail = assets.thumbnail || {}; - return { - found: true, - scene_id: item.id, - datetime: item.properties?.datetime, - cloud_cover: item.properties?.['eo:cloud_cover'], - thumbnail_url: thumbnail.href || rendered.href, - fullres_url: rendered.href || thumbnail.href, - bbox: item.bbox ? [...item.bbox] : null, - platform: item.properties?.platform || 'Sentinel-2', - }; - }); - - return { ...scenes[0], scenes }; -} - -// ─── MAIN HOOK ───────────────────────────────────────────────────────────── - export function useRegionDossier( selectedEntity: SelectedEntity | null, setSelectedEntity: (entity: SelectedEntity | null) => void, @@ -233,7 +146,6 @@ export function useRegionDossier( }); setRegionDossierLoading(true); - // Check cache first const cached = getCached(lat, lng); if (cached) { setRegionDossier(cached); @@ -241,155 +153,23 @@ export function useRegionDossier( return; } - // Show fallback immediately while API calls are in flight setRegionDossier({ ...buildLimitedDossier(lat, lng), sentinel2: esriFallback, }); try { - // ── Phase 1: Geocode + Sentinel-2 in parallel ────────────────── - const [geoResult, sentinelResult] = await Promise.allSettled([ - reverseGeocode(lat, lng), - fetchSentinel2Direct(lat, lng), - ]); + const { dossier, sentinel2 } = await fetchDossierBundle(lat, lng); - // Parse geocode - let geo = { city: '', state: '', country: '', country_code: '', display_name: '' }; - if (geoResult.status === 'fulfilled') { - geo = geoResult.value; - } else { - console.warn('[Dossier] Reverse geocode failed:', geoResult.reason); - } - - // Parse sentinel - let sentinel2: Record = esriFallback; - if (sentinelResult.status === 'fulfilled' && sentinelResult.value) { - sentinel2 = sentinelResult.value; - } else if (sentinelResult.status === 'rejected') { - console.warn('[Dossier] Sentinel-2 search failed:', sentinelResult.reason); - } - // sentinelResult fulfilled but null → no scenes found, keep Esri fallback - - // If no country found (ocean, uninhabited), show limited dossier - if (!geo.country) { - const result: RegionDossier = { - lat, - lng, - coordinates: { lat, lng }, - location: geo.display_name - ? geo - : { display_name: `${lat.toFixed(4)}, ${lng.toFixed(4)}` }, - country: null, - local: null, - error: 'No country data — possibly international waters or uninhabited area', + if (!dossier) { + setRegionDossier({ + ...buildLimitedDossier(lat, lng, 'Region dossier unavailable — check backend connection'), sentinel2, - } as RegionDossier; - setRegionDossier(result); - setCache(lat, lng, result); - setRegionDossierLoading(false); + }); return; } - // ── Phase 2: Country + Leader + Wiki in parallel ─────────────── - const [countryResult, leaderResult, localWikiResult, countryWikiResult] = - await Promise.allSettled([ - fetchCountryData(geo.country_code), - fetchLeader(geo.country), - fetchLocalWikiSummary(geo.city || geo.state, geo.country), - fetchLocalWikiSummary(geo.country, ''), - ]); - - // Parse country data - let countryData: Record = {}; - if (countryResult.status === 'fulfilled') { - countryData = countryResult.value as Record; - } else { - console.warn('[Dossier] Country data failed:', countryResult.reason); - } - - // Parse leader data - let leaderData = { leader: 'Unknown', government_type: 'Unknown' }; - if (leaderResult.status === 'fulfilled') { - leaderData = leaderResult.value; - } else { - console.warn('[Dossier] Leader data failed:', leaderResult.reason); - } - - // Parse local wiki - let localData: Record = {}; - if (localWikiResult.status === 'fulfilled') { - localData = localWikiResult.value as Record; - } else { - console.warn('[Dossier] Local wiki failed:', localWikiResult.reason); - } - - // If no local data, try country wiki summary - if (!localData.extract && countryWikiResult.status === 'fulfilled') { - const cw = countryWikiResult.value as Record; - if (cw.extract) localData = cw; - } - - // Build languages list - const languages = countryData.languages as Record | undefined; - const langList = languages ? Object.values(languages) : []; - - // Build currencies list - const currencies = countryData.currencies as - | Record - | undefined; - const currencyList: string[] = []; - if (currencies) { - for (const v of Object.values(currencies)) { - if (v && typeof v === 'object') { - const sym = v.symbol || ''; - const nm = v.name || ''; - currencyList.push(sym ? `${nm} (${sym})` : nm); - } - } - } - - const nameData = countryData.name as - | { common?: string; official?: string } - | undefined; - const capitalData = countryData.capital as string[] | undefined; - - // ── Assemble final dossier (exact same shape as backend) ─────── - const result: RegionDossier = { - lat, - lng, - coordinates: { lat, lng }, - location: { - city: geo.city, - state: geo.state, - country: geo.country, - country_code: geo.country_code, - display_name: geo.display_name, - }, - country: { - name: nameData?.common || geo.country, - official_name: nameData?.official || '', - leader: leaderData.leader, - government_type: leaderData.government_type, - population: (countryData.population as number) || 0, - capital: capitalData?.length ? capitalData[0] : 'Unknown', - languages: langList, - currencies: currencyList, - region: (countryData.region as string) || '', - subregion: (countryData.subregion as string) || '', - area_km2: (countryData.area as number) || 0, - flag_emoji: (countryData.flag as string) || '', - }, - local: { - name: geo.city, - state: geo.state, - description: localData.description || '', - summary: localData.extract || '', - thumbnail: localData.thumbnail || '', - }, - sentinel2, - } as RegionDossier; - + const result = dossierFromBackend(lat, lng, dossier, sentinel2); setRegionDossier(result); setCache(lat, lng, result); } catch (e) { @@ -405,7 +185,6 @@ export function useRegionDossier( [setSelectedEntity], ); - // Clear dossier when selecting a different entity type useEffect(() => { if (selectedEntity?.type !== 'region_dossier') { setRegionDossier(null); diff --git a/frontend/src/lib/wikimediaClient.ts b/frontend/src/lib/wikimediaClient.ts index 7a92aa3..630f762 100644 --- a/frontend/src/lib/wikimediaClient.ts +++ b/frontend/src/lib/wikimediaClient.ts @@ -1,47 +1,18 @@ /** - * wikimediaClient — single fetch surface for Wikipedia / Wikidata. + * wikimediaClient — Wikipedia / Wikidata via the self-hosted backend (#360). * - * Issues #218, #219, #220 (tg12 external audit) + Round 7a: - * - * Wikimedia's User-Agent policy asks API clients to identify themselves - * via `Api-User-Agent` when calling from browser JavaScript (because the - * browser does not let JS set `User-Agent` directly). Three independent - * components used to issue anonymous browser fetches against Wikipedia / - * Wikidata: - * - * - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary) - * - WikiImage (Wikipedia REST summary) - * - NewsFeed (Wikipedia REST summary) - * - * PR #284 collapsed them into this shared helper with one stable - * `Api-User-Agent`. That fixed compliance but introduced a new problem: - * the `Api-User-Agent` was project-wide, so from Wikimedia's perspective - * every Shadowbroker install looked like one giant scraper. If one - * install misbehaved, Wikimedia's only recourse was to block the project - * as a whole. - * - * Round 7a fixes that. The frontend fetches the per-install operator - * handle from `GET /api/settings/operator-handle` once on first use and - * embeds it in the `Api-User-Agent`. Wikimedia can now rate-limit / - * contact the specific install instead of the project. The handle is - * auto-generated on the backend (`shadow-XXXXXX`) or operator-chosen via - * the `OPERATOR_HANDLE` setting. - * - * UX impact: zero. Same thumbnails, same summaries, same load behavior. - * The only observable change is the value of the outgoing - * `Api-User-Agent` header. + * The browser only calls `/api/wikipedia/summary` and `/api/wikidata/sparql`. + * Outbound Wikimedia traffic (with per-install operator attribution from + * Round 7a) is handled server-side in `services/region_dossier.py`. */ +import { API_BASE } from '@/lib/api'; -// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier. -// Keyed by Wikipedia article title (NOT slug — we keep the human-readable -// form so debugging the cache is easier). Values track in-flight state -// so concurrent callers for the same title share one network request. export interface WikipediaSummary { title: string; description: string; extract: string; thumbnail: string; - type: string; // 'standard' | 'disambiguation' | etc. + type: string; } interface CacheEntry { @@ -59,72 +30,6 @@ function evictIfOverCap() { if (oldest) _summaryCache.delete(oldest); } -// ─── Per-operator handle (Round 7a) ──────────────────────────────────────── - -// Fetched once from the backend on first need and cached for the page -// lifetime. The handle is NOT a secret — Wikimedia will see it on every -// Wikipedia / Wikidata request we make — but caching it locally avoids a -// round-trip on every Wikipedia fetch and lets the offline / no-backend -// case still produce a stable UA (the fallback handle). -let _handlePromise: Promise | null = null; -let _cachedHandle: string | null = null; - -const FALLBACK_HANDLE = 'operator-offline'; -const HANDLE_ENDPOINT = '/api/settings/operator-handle'; - -async function fetchOperatorHandle(): Promise { - try { - const res = await fetch(HANDLE_ENDPOINT, { - // Use the standard relative-path proxy so the Next.js admin-key - // injection (same-origin) flows naturally for legitimate browser - // sessions. A cross-origin scanner will be blocked by the proxy - // before this even leaves their browser. - credentials: 'same-origin', - }); - if (!res.ok) return FALLBACK_HANDLE; - const data = await res.json(); - const h = (data && typeof data.handle === 'string' && data.handle.trim()) || ''; - return h || FALLBACK_HANDLE; - } catch { - return FALLBACK_HANDLE; - } -} - -async function getOperatorHandle(): Promise { - if (_cachedHandle) return _cachedHandle; - if (!_handlePromise) { - _handlePromise = fetchOperatorHandle().then((h) => { - _cachedHandle = h; - return h; - }); - } - return _handlePromise; -} - -/** Build the Wikimedia Api-User-Agent for this install. - * - * Includes the per-install operator handle so Wikimedia can rate-limit / - * contact the specific operator instead of the project as a whole. - * Exported for tests; production callers should let - * `fetchWikipediaSummary` / `fetchWikidataSparql` build it implicitly. - */ -export async function buildWikimediaUserAgent(purpose: string): Promise { - const handle = await getOperatorHandle(); - const safePurpose = (purpose || '').replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase(); - return ( - `Shadowbroker/1.0 (operator: ${handle}; purpose: ${safePurpose}; ` + - '+https://github.com/BigBodyCobain/Shadowbroker; report issues at /issues)' - ); -} - -// ─── Wikipedia summary fetch ─────────────────────────────────────────────── - -/** Fetch a Wikipedia article summary (titles, NOT URLs). - * - * Empty / invalid input resolves to `null`. Network errors and disambig - * pages also resolve to `null` so callers can render a fallback without - * a try/catch. Per the audit's "fail forward, not loud" rule. - */ export async function fetchWikipediaSummary( title: string, ): Promise { @@ -135,22 +40,19 @@ export async function fetchWikipediaSummary( if (cached?.loaded) return cached.summary; if (cached?.inflight) return cached.inflight; - const slug = encodeURIComponent(trimmed.replace(/ /g, '_')); - const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`; - const promise = (async (): Promise => { try { - const ua = await buildWikimediaUserAgent('wikipedia-summary'); - const r = await fetch(url, { headers: { 'Api-User-Agent': ua } }); + const url = `${API_BASE}/api/wikipedia/summary?title=${encodeURIComponent(trimmed)}`; + const r = await fetch(url); + if (r.status === 404) return null; if (!r.ok) return null; const d = await r.json(); - if (d?.type === 'disambiguation') return null; return { - title: trimmed, - description: d?.description || '', - extract: d?.extract || '', - thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '', - type: d?.type || 'standard', + title: (d?.title as string) || trimmed, + description: (d?.description as string) || '', + extract: (d?.extract as string) || '', + thumbnail: (d?.thumbnail as string) || '', + type: (d?.type as string) || 'standard', }; } catch { return null; @@ -166,45 +68,32 @@ export async function fetchWikipediaSummary( return promise; } -// ─── Wikidata SPARQL ─────────────────────────────────────────────────────── - -/** Fetch a Wikidata SPARQL query result. - * - * Returns the parsed JSON `results.bindings` array on success; `null` - * (not throwing) on any failure so callers can render fallbacks - * silently. Per-install operator handle threaded through `Api-User-Agent` - * (Round 7a). - */ export async function fetchWikidataSparql>( sparql: string, ): Promise { const trimmed = (sparql || '').trim(); if (!trimmed) return null; - const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent( - trimmed, - )}&format=json`; try { - const ua = await buildWikimediaUserAgent('wikidata-sparql'); - const res = await fetch(url, { - headers: { - 'Api-User-Agent': ua, - Accept: 'application/sparql-results+json', - }, + const res = await fetch(`${API_BASE}/api/wikidata/sparql`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ query: trimmed }), }); if (!res.ok) return null; const json = await res.json(); - const bindings = json?.results?.bindings; + const bindings = json?.bindings; return Array.isArray(bindings) ? (bindings as T[]) : null; } catch { return null; } } -// ─── Test helpers ────────────────────────────────────────────────────────── +/** @deprecated Browser no longer builds Wikimedia UA; kept for tests that import it. */ +export async function buildWikimediaUserAgent(purpose: string): Promise { + void purpose; + return 'Shadowbroker/1.0 (backend-proxied; purpose: wikimedia)'; +} -/** Internal: clear the shared cache + the handle cache. Exposed for tests only. */ export function _resetWikimediaClientCacheForTests() { _summaryCache.clear(); - _handlePromise = null; - _cachedHandle = null; }