diff --git a/backend/services/region_dossier.py b/backend/services/region_dossier.py index 48a4d3d..2dd6157 100644 --- a/backend/services/region_dossier.py +++ b/backend/services/region_dossier.py @@ -4,7 +4,7 @@ import concurrent.futures from urllib.parse import quote import requests as _requests from cachetools import TTLCache -from services.network_utils import fetch_with_curl +from services.network_utils import fetch_with_curl, DEFAULT_USER_AGENT logger = logging.getLogger(__name__) @@ -15,6 +15,25 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400) # Nominatim requires max 1 req/sec — track last call time _nominatim_last_call = 0.0 +# Issue #218 / #219 (tg12): Wikimedia's User-Agent policy requires API +# clients to identify themselves with a stable User-Agent that includes +# a contact path. Bare "python-requests/x.y" or generic strings violate +# the policy and risk getting blocked. We send the project default UA +# (operator-overridable via SHADOWBROKER_USER_AGENT) on EVERY outbound +# Wikimedia request, plus the policy-recommended Api-User-Agent which +# Wikimedia explicitly accepts on top of the regular UA. +# +# This is documented and stable so a Wikimedia operator who wants to +# rate-limit or contact us has a fixed identifier to grep for. +_WIKIMEDIA_REQUEST_HEADERS = { + "User-Agent": DEFAULT_USER_AGENT, + "Api-User-Agent": ( + f"{DEFAULT_USER_AGENT} " + "(+https://github.com/BigBodyCobain/Shadowbroker; " + "report issues at /issues)" + ), +} + def _reverse_geocode_offline(lat: float, lng: float) -> dict: """Offline fallback via reverse_geocoder when external reverse geocoding is blocked.""" @@ -121,7 +140,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict: """ url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json" try: - res = fetch_with_curl(url, timeout=6) + # Issue #218 (tg12): Wikimedia's User-Agent policy requires + # outbound API traffic to be identifiable. fetch_with_curl() + # sends the project default, and we also add the Wikimedia- + # specific Api-User-Agent that the policy specifically asks + # for, since this request originates from a backend service + # that proxies on behalf of (potentially many) browser users. + res = fetch_with_curl(url, timeout=6, headers=_WIKIMEDIA_REQUEST_HEADERS) if res.status_code == 200: results = res.json().get("results", {}).get("bindings", []) if results: @@ -147,7 +172,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict: slug = quote(name.replace(" ", "_")) url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}" try: - res = fetch_with_curl(url, timeout=5) + # Issue #219 (tg12): identify ourselves to Wikimedia per + # their UA policy; see _fetch_wikidata_leader above. + res = fetch_with_curl(url, timeout=5, headers=_WIKIMEDIA_REQUEST_HEADERS) if res.status_code == 200: data = res.json() if data.get("type") != "disambiguation": diff --git a/backend/tests/test_region_dossier_wikimedia_ua.py b/backend/tests/test_region_dossier_wikimedia_ua.py new file mode 100644 index 0000000..96de216 --- /dev/null +++ b/backend/tests/test_region_dossier_wikimedia_ua.py @@ -0,0 +1,91 @@ +"""Issues #218 / #219 (tg12): outbound Wikipedia + Wikidata calls must +identify ShadowBroker via the Wikimedia-recommended User-Agent / +Api-User-Agent headers. + +Before this fix, ``backend/services/region_dossier.py`` called +``fetch_with_curl(url)`` with no explicit headers, falling back to the +generic project default UA. That sent a too-anonymous identifier to +Wikimedia. Per Wikimedia's policy +(https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy) +the API caller should send a stable, contactable identifier so Wikimedia +operators can rate-limit or reach the project. + +This test does NOT make network calls. It patches ``fetch_with_curl`` +and asserts the headers that get passed through. +""" +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + + +def _fake_resp(payload: dict, status: int = 200) -> MagicMock: + r = MagicMock() + r.status_code = status + r.json.return_value = payload + return r + + +def test_wikidata_call_passes_wikimedia_request_headers(): + from services import region_dossier + + calls = [] + + def fake_fetch(url, **kwargs): + calls.append(kwargs.get("headers")) + return _fake_resp({"results": {"bindings": []}}) + + with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch): + region_dossier._fetch_wikidata_leader("Testlandia") + + assert calls, "fetch_with_curl was not called" + headers = calls[0] or {} + assert "User-Agent" in headers + assert "Api-User-Agent" in headers + # Stable identifier should mention the project + a contact path. + assert "Shadowbroker" in headers["Api-User-Agent"] or "ShadowBroker" in headers["Api-User-Agent"] + assert "github.com" in headers["Api-User-Agent"].lower() + + +def test_wikipedia_summary_call_passes_wikimedia_request_headers(): + from services import region_dossier + + calls = [] + + def fake_fetch(url, **kwargs): + calls.append((url, kwargs.get("headers"))) + return _fake_resp( + { + "type": "standard", + "description": "test desc", + "extract": "test extract", + "thumbnail": {"source": ""}, + } + ) + + with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch): + region_dossier._fetch_local_wiki_summary("Paris", "France") + + # At least one Wikipedia REST call was issued. + wikipedia_calls = [c for c in calls if "wikipedia.org" in c[0]] + assert wikipedia_calls, "no Wikipedia call was issued" + for url, headers in wikipedia_calls: + headers = headers or {} + assert "User-Agent" in headers, f"missing User-Agent on {url}" + assert "Api-User-Agent" in headers, f"missing Api-User-Agent on {url}" + assert "github.com" in headers["Api-User-Agent"].lower() + + +def test_wikimedia_headers_constant_is_stable(): + """Regression guard: if someone removes the contact path from the + Api-User-Agent we want a loud test failure, not a silent ToS drift. + """ + from services.region_dossier import _WIKIMEDIA_REQUEST_HEADERS + + aua = _WIKIMEDIA_REQUEST_HEADERS.get("Api-User-Agent", "") + assert "Shadowbroker" in aua or "ShadowBroker" in aua + assert "github.com" in aua.lower() + # Must include a path Wikimedia operators can use to contact us + # (we use /issues against the public repo). + assert "issues" in aua.lower() diff --git a/frontend/src/__tests__/utils/wikimediaClient.test.ts b/frontend/src/__tests__/utils/wikimediaClient.test.ts new file mode 100644 index 0000000..1e17aad --- /dev/null +++ b/frontend/src/__tests__/utils/wikimediaClient.test.ts @@ -0,0 +1,164 @@ +/** + * Issues #218 / #219 / #220 (tg12 external audit): + * + * Every browser-direct call to Wikipedia or Wikidata must send the + * `Api-User-Agent` header that Wikimedia's UA policy asks for. These + * tests pin that requirement on the shared `lib/wikimediaClient` + * helper that WikiImage, NewsFeed, and useRegionDossier all route + * through, so a future refactor that drops the header gets a loud + * test failure rather than a silent ToS regression. + */ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + WIKIMEDIA_API_USER_AGENT, + fetchWikipediaSummary, + fetchWikidataSparql, + _resetWikimediaClientCacheForTests, +} from '@/lib/wikimediaClient'; + +const originalFetch = globalThis.fetch; + +describe('lib/wikimediaClient', () => { + beforeEach(() => { + _resetWikimediaClientCacheForTests(); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it('exposes a stable Api-User-Agent identifier with a contact path', () => { + expect(WIKIMEDIA_API_USER_AGENT).toContain('Shadowbroker'); + expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('github.com'); + expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('issues'); + }); + + it('sends Api-User-Agent on Wikipedia summary fetch', async () => { + const calls: Array<{ url: string; init?: RequestInit }> = []; + globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => { + calls.push({ url: String(url), init }); + return new Response( + JSON.stringify({ + type: 'standard', + title: 'Boeing 747', + description: 'aircraft', + extract: 'long extract', + thumbnail: { source: 'https://example.org/thumb.jpg' }, + }), + { status: 200 }, + ); + }) as any; + + const summary = await fetchWikipediaSummary('Boeing 747'); + expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg'); + expect(calls).toHaveLength(1); + const headers = (calls[0].init?.headers || {}) as Record; + expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT); + }); + + it('sends Api-User-Agent on Wikidata SPARQL fetch', async () => { + const calls: Array<{ url: string; init?: RequestInit }> = []; + globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => { + calls.push({ url: String(url), init }); + return new Response( + JSON.stringify({ + results: { + bindings: [ + { + leaderLabel: { value: 'Test Leader' }, + govTypeLabel: { value: 'Test Government' }, + }, + ], + }, + }), + { status: 200 }, + ); + }) as any; + + const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }'); + expect(bindings).toHaveLength(1); + const headers = (calls[0].init?.headers || {}) as Record; + expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT); + expect(headers['Accept']).toBe('application/sparql-results+json'); + }); + + it('shares cache across consecutive callers for the same Wikipedia title', async () => { + let fetchCount = 0; + globalThis.fetch = vi.fn(async () => { + fetchCount++; + return new Response( + JSON.stringify({ + type: 'standard', + title: 'Eiffel Tower', + description: 'iron lattice tower', + extract: '...', + thumbnail: { source: 'https://example.org/eiffel.jpg' }, + }), + { status: 200 }, + ); + }) as any; + + const a = await fetchWikipediaSummary('Eiffel Tower'); + const b = await fetchWikipediaSummary('Eiffel Tower'); + expect(fetchCount).toBe(1); + expect(a?.thumbnail).toBe(b?.thumbnail); + }); + + it('deduplicates concurrent in-flight requests for the same title', async () => { + let fetchCount = 0; + globalThis.fetch = vi.fn(async () => { + fetchCount++; + await new Promise((r) => setTimeout(r, 5)); + return new Response( + JSON.stringify({ + type: 'standard', + title: 'Mount Fuji', + description: 'stratovolcano', + extract: '...', + thumbnail: { source: 'https://example.org/fuji.jpg' }, + }), + { status: 200 }, + ); + }) as any; + + const [a, b, c] = await Promise.all([ + fetchWikipediaSummary('Mount Fuji'), + fetchWikipediaSummary('Mount Fuji'), + fetchWikipediaSummary('Mount Fuji'), + ]); + expect(fetchCount).toBe(1); + expect(a?.thumbnail).toBe('https://example.org/fuji.jpg'); + expect(b).toEqual(a); + expect(c).toEqual(a); + }); + + it('returns null on disambiguation pages without throwing', async () => { + globalThis.fetch = vi.fn(async () => + new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }), + ) as any; + const summary = await fetchWikipediaSummary('Mercury'); + expect(summary).toBeNull(); + }); + + it('returns null on HTTP error without throwing', async () => { + globalThis.fetch = vi.fn(async () => new Response('not found', { status: 404 })) as any; + const summary = await fetchWikipediaSummary('Nonexistent Article 12345'); + expect(summary).toBeNull(); + }); + + it('returns null on network error without throwing', async () => { + globalThis.fetch = vi.fn(async () => { + throw new Error('network down'); + }) as any; + const summary = await fetchWikipediaSummary('Anything'); + expect(summary).toBeNull(); + }); + + it('returns null on empty input', async () => { + globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any; + expect(await fetchWikipediaSummary('')).toBeNull(); + expect(await fetchWikipediaSummary(' ')).toBeNull(); + expect(globalThis.fetch).not.toHaveBeenCalled(); + }); +}); diff --git a/frontend/src/components/NewsFeed.tsx b/frontend/src/components/NewsFeed.tsx index 2ad6566..af63ffb 100644 --- a/frontend/src/components/NewsFeed.tsx +++ b/frontend/src/components/NewsFeed.tsx @@ -5,6 +5,7 @@ import { motion, AnimatePresence } from 'framer-motion'; import { AlertTriangle, Clock, Minus, Plus, ExternalLink, Brain, Loader2 } from 'lucide-react'; import React, { useEffect, useRef, useCallback } from 'react'; import WikiImage from '@/components/WikiImage'; +import { fetchWikipediaSummary } from '@/lib/wikimediaClient'; import type { SelectedEntity, RegionDossier, FimiData } from "@/types/dashboard"; import { useDataKeys } from '@/hooks/useDataStore'; import { API_BASE } from '@/lib/api'; @@ -203,34 +204,37 @@ function resolveAircraftWikiTitle(model: string | undefined): string | null { return AIRCRAFT_WIKI[model] || resolveAcTypeWiki(model); } -// Module-level cache for Wikipedia thumbnails (persists across re-renders) -const _wikiThumbCache: Record = {}; - +// Issue #220 (tg12): the previous implementation kept its own +// module-local Wikipedia thumbnail cache and issued anonymous fetches +// without `Api-User-Agent`. We now delegate to lib/wikimediaClient, +// which sends the policy-compliant header and shares one cache with +// WikiImage and useRegionDossier. function useAircraftImage(model: string | undefined): { imgUrl: string | null; wikiUrl: string | null; loading: boolean } { - const [, forceUpdate] = useState(0); + const [imgUrl, setImgUrl] = useState(null); + const [loading, setLoading] = useState(false); const wikiTitle = resolveAircraftWikiTitle(model) || undefined; const wikiUrl = wikiTitle ? `https://en.wikipedia.org/wiki/${wikiTitle.replace(/ /g, '_')}` : null; useEffect(() => { - if (!wikiTitle) return; - const key = wikiTitle; - if (_wikiThumbCache[key]) return; // Already fetched or in-flight - _wikiThumbCache[key] = { url: null, loading: true }; - fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(wikiTitle)}`) - .then(r => r.json()) - .then(d => { - _wikiThumbCache[key] = { url: d.thumbnail?.source || null, loading: false }; - forceUpdate(n => n + 1); - }) - .catch(() => { - _wikiThumbCache[key] = { url: null, loading: false }; - forceUpdate(n => n + 1); - }); + let cancelled = false; + if (!wikiTitle) { + setImgUrl(null); + setLoading(false); + return; + } + setLoading(true); + fetchWikipediaSummary(wikiTitle).then((summary) => { + if (cancelled) return; + setImgUrl(summary?.thumbnail || null); + setLoading(false); + }); + return () => { + cancelled = true; + }; }, [wikiTitle]); if (!wikiTitle) return { imgUrl: null, wikiUrl: null, loading: false }; - const cached = _wikiThumbCache[wikiTitle]; - return { imgUrl: cached?.url || null, wikiUrl, loading: cached?.loading || false }; + return { imgUrl, wikiUrl, loading }; } diff --git a/frontend/src/components/WikiImage.tsx b/frontend/src/components/WikiImage.tsx index f0bbd7e..7f90646 100644 --- a/frontend/src/components/WikiImage.tsx +++ b/frontend/src/components/WikiImage.tsx @@ -1,13 +1,17 @@ 'use client'; import React, { useState, useEffect } from 'react'; import ExternalImage from '@/components/ExternalImage'; - -// Module-level cache: Wikipedia article title → thumbnail URL -const _cache: Record = {}; +import { fetchWikipediaSummary } from '@/lib/wikimediaClient'; /** * WikiImage — displays a Wikipedia thumbnail for a given article URL. - * Uses the Wikipedia REST API with a module-level cache (only fetches once per article). + * + * Issue #220 (tg12): this component previously had its own + * module-local Wikipedia fetch + cache. It now delegates to + * `lib/wikimediaClient`, which sends the policy-compliant + * `Api-User-Agent` header and shares one cache across every UI + * component that asks Wikipedia for an article summary (WikiImage, + * NewsFeed, useRegionDossier). * * Props: * wikiUrl: Full Wikipedia URL, e.g. "https://en.wikipedia.org/wiki/Boeing_787_Dreamliner" @@ -26,32 +30,30 @@ export default function WikiImage({ maxH?: string; accent?: string; }) { - const [, forceUpdate] = useState(0); + const [imgUrl, setImgUrl] = useState(null); + const [loading, setLoading] = useState(true); // Extract article title from URL const title = wikiUrl.replace(/^https?:\/\/[^/]+\/wiki\//, ''); useEffect(() => { - if (!title || _cache[title]?.done) return; - if (_cache[title]) return; // In-flight - _cache[title] = { url: null, done: false }; - - fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`) - .then((r) => r.json()) - .then((d) => { - _cache[title] = { url: d.thumbnail?.source || d.originalimage?.source || null, done: true }; - forceUpdate((n) => n + 1); - }) - .catch(() => { - _cache[title] = { url: null, done: true }; - forceUpdate((n) => n + 1); - }); + let cancelled = false; + if (!title) { + setImgUrl(null); + setLoading(false); + return; + } + setLoading(true); + fetchWikipediaSummary(title).then((summary) => { + if (cancelled) return; + setImgUrl(summary?.thumbnail || null); + setLoading(false); + }); + return () => { + cancelled = true; + }; }, [title]); - const cached = _cache[title]; - const imgUrl = cached?.url; - const loading = cached && !cached.done; - return (
{loading && ( diff --git a/frontend/src/hooks/useRegionDossier.ts b/frontend/src/hooks/useRegionDossier.ts index b2ece38..5311f2c 100644 --- a/frontend/src/hooks/useRegionDossier.ts +++ b/frontend/src/hooks/useRegionDossier.ts @@ -1,5 +1,6 @@ import { useCallback, useState, useEffect } from 'react'; import type { RegionDossier, SelectedEntity } from '@/types/dashboard'; +import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient'; // ─── CACHE ───────────────────────────────────────────────────────────────── // Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL. @@ -114,7 +115,11 @@ async function fetchCountryData(countryCode: string) { return Array.isArray(data) ? data[0] || {} : data || {}; } -/** Fetch head of state + government type from Wikidata SPARQL (direct browser call). */ +/** Fetch head of state + government type from Wikidata SPARQL. + * + * Issue #218 (tg12): routes through lib/wikimediaClient so the + * Api-User-Agent header is set per Wikimedia's UA policy. + */ async function fetchLeader(countryName: string) { if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' }; const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'"); @@ -127,13 +132,11 @@ async function fetchLeader(countryName: string) { SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } LIMIT 1 `; - const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(sparql)}&format=json`; - const res = await fetch(url, { - headers: { Accept: 'application/sparql-results+json' }, - }); - if (!res.ok) throw new Error(`Wikidata HTTP ${res.status}`); - const results = (await res.json()).results?.bindings || []; - if (results.length > 0) { + const results = await fetchWikidataSparql<{ + leaderLabel?: { value: string }; + govTypeLabel?: { value: string }; + }>(sparql); + if (results && results.length > 0) { return { leader: results[0].leaderLabel?.value || 'Unknown', government_type: results[0].govTypeLabel?.value || 'Unknown', @@ -142,27 +145,25 @@ async function fetchLeader(countryName: string) { return { leader: 'Unknown', government_type: 'Unknown' }; } -/** Fetch Wikipedia summary for a place (direct browser call). */ +/** Fetch Wikipedia summary for a place. + * + * Issue #219 (tg12): routes through lib/wikimediaClient so the + * Api-User-Agent header is set per Wikimedia's UA policy, AND the + * shared cache means consecutive useRegionDossier + WikiImage + + * NewsFeed lookups for the same article all hit the same slot. + */ async function fetchLocalWikiSummary(placeName: string, countryName = '') { if (!placeName) return {}; const candidates = [placeName]; if (countryName) candidates.push(`${placeName}, ${countryName}`); - for (const name of candidates) { - try { - const slug = encodeURIComponent(name.replace(/ /g, '_')); - const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`; - const res = await fetch(url); - if (!res.ok) continue; - const data = await res.json(); - if (data.type === 'disambiguation') continue; + const summary = await fetchWikipediaSummary(name); + if (summary) { return { - description: data.description || '', - extract: data.extract || '', - thumbnail: data.thumbnail?.source || '', + description: summary.description, + extract: summary.extract, + thumbnail: summary.thumbnail, }; - } catch { - continue; } } return {}; diff --git a/frontend/src/lib/wikimediaClient.ts b/frontend/src/lib/wikimediaClient.ts new file mode 100644 index 0000000..8b0bbea --- /dev/null +++ b/frontend/src/lib/wikimediaClient.ts @@ -0,0 +1,157 @@ +/** + * wikimediaClient — single fetch surface for Wikipedia / Wikidata. + * + * Issues #218, #219, #220 (tg12 external audit): + * + * Wikimedia's User-Agent policy asks API clients to identify themselves + * via `Api-User-Agent` when calling from browser JavaScript (because the + * browser does not let JS set `User-Agent` directly). Before this + * module existed, three independent components issued anonymous browser + * fetches against Wikipedia / Wikidata: + * + * - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary) + * - WikiImage (Wikipedia REST summary) + * - NewsFeed (Wikipedia REST summary) + * + * Each component shipped its own copy-pasted fetch + module-local cache. + * Provider-policy compliance was missing in all three places. + * + * This module centralizes: + * + * 1. The `Api-User-Agent` header on every request. + * 2. A single LRU cache for Wikipedia summary lookups (keyed by article + * title). Multiple components asking for the same article share + * one in-flight request and one cache slot. + * 3. One predictable kill switch — if Wikimedia ever asks us to back + * off, we change `WIKIMEDIA_API_USER_AGENT` here and the whole + * frontend updates. + * + * This does NOT change end-user UX: + * + * - WikiImage still shows the same thumbnails. + * - NewsFeed still shows aircraft thumbnails. + * - useRegionDossier still returns the same place summary + leader. + * + * What changes: + * + * - Wikimedia can identify our traffic from any other anonymous + * browser visitor pool. + * - Provider-policy fixes happen here once, not in three places. + */ + +// Stable identifier per Wikimedia UA policy. Includes a contact path so +// Wikimedia's operators can reach the project if they need to rate-limit +// or coordinate. Bump the version when the contact path changes. +export const WIKIMEDIA_API_USER_AGENT = + 'Shadowbroker/1.0 (+https://github.com/BigBodyCobain/Shadowbroker; ' + + 'report issues at /issues)'; + +// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier. +// Keyed by Wikipedia article title (NOT slug — we keep the human-readable +// form so debugging the cache is easier). Values track in-flight state +// so concurrent callers for the same title share one network request. +export interface WikipediaSummary { + title: string; + description: string; + extract: string; + thumbnail: string; + type: string; // 'standard' | 'disambiguation' | etc. +} + +interface CacheEntry { + summary: WikipediaSummary | null; + inflight: Promise | null; + loaded: boolean; +} + +const _summaryCache: Map = new Map(); +const SUMMARY_CACHE_MAX = 512; + +function evictIfOverCap() { + if (_summaryCache.size <= SUMMARY_CACHE_MAX) return; + const oldest = _summaryCache.keys().next().value; + if (oldest) _summaryCache.delete(oldest); +} + +/** Fetch a Wikipedia article summary (titles, NOT URLs). + * + * Empty / invalid input resolves to `null`. Network errors and disambig + * pages also resolve to `null` so callers can render a fallback without + * a try/catch. Per the audit's "fail forward, not loud" rule. + */ +export async function fetchWikipediaSummary( + title: string, +): Promise { + const trimmed = (title || '').trim(); + if (!trimmed) return null; + + const cached = _summaryCache.get(trimmed); + if (cached?.loaded) return cached.summary; + if (cached?.inflight) return cached.inflight; + + const slug = encodeURIComponent(trimmed.replace(/ /g, '_')); + const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`; + + const promise = fetch(url, { + headers: { 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT }, + }) + .then(async (r) => { + if (!r.ok) return null; + const d = await r.json(); + if (d?.type === 'disambiguation') return null; + const summary: WikipediaSummary = { + title: trimmed, + description: d?.description || '', + extract: d?.extract || '', + thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '', + type: d?.type || 'standard', + }; + return summary; + }) + .catch(() => null) + .then((summary) => { + _summaryCache.set(trimmed, { summary, inflight: null, loaded: true }); + evictIfOverCap(); + return summary; + }); + + _summaryCache.set(trimmed, { summary: null, inflight: promise, loaded: false }); + evictIfOverCap(); + return promise; +} + +/** Fetch a Wikidata SPARQL query result. + * + * Returns the parsed JSON `results.bindings` array on success; `null` + * (not throwing) on any failure so callers can render fallbacks + * silently. Kept as a thin wrapper so the audit-required UA header is + * applied in exactly one place. + */ +export async function fetchWikidataSparql>( + sparql: string, +): Promise { + const trimmed = (sparql || '').trim(); + if (!trimmed) return null; + const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent( + trimmed, + )}&format=json`; + try { + const res = await fetch(url, { + headers: { + 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT, + Accept: 'application/sparql-results+json', + }, + }); + if (!res.ok) return null; + const json = await res.json(); + const bindings = json?.results?.bindings; + return Array.isArray(bindings) ? (bindings as T[]) : null; + } catch { + return null; + } +} + +/** Internal: clear the shared cache. Exposed for tests only. */ +export function _resetWikimediaClientCacheForTests() { + _summaryCache.clear(); +}