mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-28 10:01:31 +02:00
Fix #218/#219/#220: identify ShadowBroker on Wikipedia + Wikidata calls (#284)
Wikimedia's User-Agent policy asks API clients to identify themselves with a stable, contactable identifier so their operators can rate-limit or coordinate. Before this change, ShadowBroker was sending: - Backend (region_dossier.py): generic project default UA only; no Api-User-Agent. - Frontend (useRegionDossier.ts, WikiImage.tsx, NewsFeed.tsx): zero identifying header at all; three separate copy-pasted anonymous fetches with their own module-local caches. Three separate components doing the same broken thing meant policy fixes had to happen in three places, with no shared cache or kill switch. Fix (no UX change, zero hostility): == Backend == `backend/services/region_dossier.py` now sets explicit `User-Agent` + `Api-User-Agent` headers on every outbound Wikidata and Wikipedia request via a new `_WIKIMEDIA_REQUEST_HEADERS` constant. The identifier includes a contact path (issues page on the public GitHub repo). == Frontend == New shared helper `frontend/src/lib/wikimediaClient.ts`: - `fetchWikipediaSummary(title)` — single source of truth for Wikipedia REST summary lookups, with one shared LRU cache (in-flight requests deduplicated, 512-entry cap), `Api-User-Agent` on every fetch. - `fetchWikidataSparql(query)` — same shape for Wikidata SPARQL. - `WIKIMEDIA_API_USER_AGENT` — exported constant; one place to update if Wikimedia ever asks us to back off. Refactored three components to use the shared client: - `frontend/src/hooks/useRegionDossier.ts` — fetchLeader() and fetchLocalWikiSummary() now route through the shared helpers. - `frontend/src/components/WikiImage.tsx` — uses fetchWikipediaSummary, proper React state instead of module-mutation + forceUpdate trick. - `frontend/src/components/NewsFeed.tsx` — same shape. UX: byte-for-byte identical. Same thumbnails, same dossier content, same load behavior. The only observable difference is the outgoing request header. Note on #239 (route duplication): an audit-grade inventory shows 166 main.py routes are shadowed by router modules. That cleanup is too large to land safely in this PR; it will be staged as a separate ladder of small PRs grouped by router module. Tests: - `backend/tests/test_region_dossier_wikimedia_ua.py` — 3 tests asserting backend headers are present. - `frontend/src/__tests__/utils/wikimediaClient.test.ts` — 9 tests covering Api-User-Agent presence, shared cache, concurrent deduplication, disambiguation/HTTP-error/network-error fallthroughs, empty-input safety. Local: backend 76/76 security suite green, frontend 716/716 vitest suite green. Credit: tg12 (external security audit).
This commit is contained in:
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, DEFAULT_USER_AGENT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,6 +15,25 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issue #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path. Bare "python-requests/x.y" or generic strings violate
|
||||
# the policy and risk getting blocked. We send the project default UA
|
||||
# (operator-overridable via SHADOWBROKER_USER_AGENT) on EVERY outbound
|
||||
# Wikimedia request, plus the policy-recommended Api-User-Agent which
|
||||
# Wikimedia explicitly accepts on top of the regular UA.
|
||||
#
|
||||
# This is documented and stable so a Wikimedia operator who wants to
|
||||
# rate-limit or contact us has a fixed identifier to grep for.
|
||||
_WIKIMEDIA_REQUEST_HEADERS = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Api-User-Agent": (
|
||||
f"{DEFAULT_USER_AGENT} "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"report issues at /issues)"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||
@@ -121,7 +140,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
"""
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=6)
|
||||
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||
# sends the project default, and we also add the Wikimedia-
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -147,7 +172,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
slug = quote(name.replace(" ", "_"))
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=5)
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Issues #218 / #219 (tg12): outbound Wikipedia + Wikidata calls must
|
||||
identify ShadowBroker via the Wikimedia-recommended User-Agent /
|
||||
Api-User-Agent headers.
|
||||
|
||||
Before this fix, ``backend/services/region_dossier.py`` called
|
||||
``fetch_with_curl(url)`` with no explicit headers, falling back to the
|
||||
generic project default UA. That sent a too-anonymous identifier to
|
||||
Wikimedia. Per Wikimedia's policy
|
||||
(https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy)
|
||||
the API caller should send a stable, contactable identifier so Wikimedia
|
||||
operators can rate-limit or reach the project.
|
||||
|
||||
This test does NOT make network calls. It patches ``fetch_with_curl``
|
||||
and asserts the headers that get passed through.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _fake_resp(payload: dict, status: int = 200) -> MagicMock:
|
||||
r = MagicMock()
|
||||
r.status_code = status
|
||||
r.json.return_value = payload
|
||||
return r
|
||||
|
||||
|
||||
def test_wikidata_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(kwargs.get("headers"))
|
||||
return _fake_resp({"results": {"bindings": []}})
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||
|
||||
assert calls, "fetch_with_curl was not called"
|
||||
headers = calls[0] or {}
|
||||
assert "User-Agent" in headers
|
||||
assert "Api-User-Agent" in headers
|
||||
# Stable identifier should mention the project + a contact path.
|
||||
assert "Shadowbroker" in headers["Api-User-Agent"] or "ShadowBroker" in headers["Api-User-Agent"]
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikipedia_summary_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append((url, kwargs.get("headers")))
|
||||
return _fake_resp(
|
||||
{
|
||||
"type": "standard",
|
||||
"description": "test desc",
|
||||
"extract": "test extract",
|
||||
"thumbnail": {"source": ""},
|
||||
}
|
||||
)
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||
|
||||
# At least one Wikipedia REST call was issued.
|
||||
wikipedia_calls = [c for c in calls if "wikipedia.org" in c[0]]
|
||||
assert wikipedia_calls, "no Wikipedia call was issued"
|
||||
for url, headers in wikipedia_calls:
|
||||
headers = headers or {}
|
||||
assert "User-Agent" in headers, f"missing User-Agent on {url}"
|
||||
assert "Api-User-Agent" in headers, f"missing Api-User-Agent on {url}"
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikimedia_headers_constant_is_stable():
|
||||
"""Regression guard: if someone removes the contact path from the
|
||||
Api-User-Agent we want a loud test failure, not a silent ToS drift.
|
||||
"""
|
||||
from services.region_dossier import _WIKIMEDIA_REQUEST_HEADERS
|
||||
|
||||
aua = _WIKIMEDIA_REQUEST_HEADERS.get("Api-User-Agent", "")
|
||||
assert "Shadowbroker" in aua or "ShadowBroker" in aua
|
||||
assert "github.com" in aua.lower()
|
||||
# Must include a path Wikimedia operators can use to contact us
|
||||
# (we use /issues against the public repo).
|
||||
assert "issues" in aua.lower()
|
||||
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* Issues #218 / #219 / #220 (tg12 external audit):
|
||||
*
|
||||
* Every browser-direct call to Wikipedia or Wikidata must send the
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for. These
|
||||
* tests pin that requirement on the shared `lib/wikimediaClient`
|
||||
* helper that WikiImage, NewsFeed, and useRegionDossier all route
|
||||
* through, so a future refactor that drops the header gets a loud
|
||||
* test failure rather than a silent ToS regression.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
WIKIMEDIA_API_USER_AGENT,
|
||||
fetchWikipediaSummary,
|
||||
fetchWikidataSparql,
|
||||
_resetWikimediaClientCacheForTests,
|
||||
} from '@/lib/wikimediaClient';
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
describe('lib/wikimediaClient', () => {
|
||||
beforeEach(() => {
|
||||
_resetWikimediaClientCacheForTests();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('exposes a stable Api-User-Agent identifier with a contact path', () => {
|
||||
expect(WIKIMEDIA_API_USER_AGENT).toContain('Shadowbroker');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('github.com');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('issues');
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Boeing 747',
|
||||
description: 'aircraft',
|
||||
extract: 'long extract',
|
||||
thumbnail: { source: 'https://example.org/thumb.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
expect(calls).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
results: {
|
||||
bindings: [
|
||||
{
|
||||
leaderLabel: { value: 'Test Leader' },
|
||||
govTypeLabel: { value: 'Test Government' },
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
});
|
||||
|
||||
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Eiffel Tower',
|
||||
description: 'iron lattice tower',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/eiffel.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const a = await fetchWikipediaSummary('Eiffel Tower');
|
||||
const b = await fetchWikipediaSummary('Eiffel Tower');
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe(b?.thumbnail);
|
||||
});
|
||||
|
||||
it('deduplicates concurrent in-flight requests for the same title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Mount Fuji',
|
||||
description: 'stratovolcano',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/fuji.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const [a, b, c] = await Promise.all([
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
]);
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe('https://example.org/fuji.jpg');
|
||||
expect(b).toEqual(a);
|
||||
expect(c).toEqual(a);
|
||||
});
|
||||
|
||||
it('returns null on disambiguation pages without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Mercury');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on HTTP error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('not found', { status: 404 })) as any;
|
||||
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on network error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any;
|
||||
const summary = await fetchWikipediaSummary('Anything');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty input', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
|
||||
expect(await fetchWikipediaSummary('')).toBeNull();
|
||||
expect(await fetchWikipediaSummary(' ')).toBeNull();
|
||||
expect(globalThis.fetch).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -5,6 +5,7 @@ import { motion, AnimatePresence } from 'framer-motion';
|
||||
import { AlertTriangle, Clock, Minus, Plus, ExternalLink, Brain, Loader2 } from 'lucide-react';
|
||||
import React, { useEffect, useRef, useCallback } from 'react';
|
||||
import WikiImage from '@/components/WikiImage';
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
import type { SelectedEntity, RegionDossier, FimiData } from "@/types/dashboard";
|
||||
import { useDataKeys } from '@/hooks/useDataStore';
|
||||
import { API_BASE } from '@/lib/api';
|
||||
@@ -203,34 +204,37 @@ function resolveAircraftWikiTitle(model: string | undefined): string | null {
|
||||
return AIRCRAFT_WIKI[model] || resolveAcTypeWiki(model);
|
||||
}
|
||||
|
||||
// Module-level cache for Wikipedia thumbnails (persists across re-renders)
|
||||
const _wikiThumbCache: Record<string, { url: string | null; loading: boolean }> = {};
|
||||
|
||||
// Issue #220 (tg12): the previous implementation kept its own
|
||||
// module-local Wikipedia thumbnail cache and issued anonymous fetches
|
||||
// without `Api-User-Agent`. We now delegate to lib/wikimediaClient,
|
||||
// which sends the policy-compliant header and shares one cache with
|
||||
// WikiImage and useRegionDossier.
|
||||
function useAircraftImage(model: string | undefined): { imgUrl: string | null; wikiUrl: string | null; loading: boolean } {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const wikiTitle = resolveAircraftWikiTitle(model) || undefined;
|
||||
const wikiUrl = wikiTitle ? `https://en.wikipedia.org/wiki/${wikiTitle.replace(/ /g, '_')}` : null;
|
||||
|
||||
useEffect(() => {
|
||||
if (!wikiTitle) return;
|
||||
const key = wikiTitle;
|
||||
if (_wikiThumbCache[key]) return; // Already fetched or in-flight
|
||||
_wikiThumbCache[key] = { url: null, loading: true };
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(wikiTitle)}`)
|
||||
.then(r => r.json())
|
||||
.then(d => {
|
||||
_wikiThumbCache[key] = { url: d.thumbnail?.source || null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_wikiThumbCache[key] = { url: null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!wikiTitle) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(wikiTitle).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [wikiTitle]);
|
||||
|
||||
if (!wikiTitle) return { imgUrl: null, wikiUrl: null, loading: false };
|
||||
const cached = _wikiThumbCache[wikiTitle];
|
||||
return { imgUrl: cached?.url || null, wikiUrl, loading: cached?.loading || false };
|
||||
return { imgUrl, wikiUrl, loading };
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
'use client';
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import ExternalImage from '@/components/ExternalImage';
|
||||
|
||||
// Module-level cache: Wikipedia article title → thumbnail URL
|
||||
const _cache: Record<string, { url: string | null; done: boolean }> = {};
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
|
||||
/**
|
||||
* WikiImage — displays a Wikipedia thumbnail for a given article URL.
|
||||
* Uses the Wikipedia REST API with a module-level cache (only fetches once per article).
|
||||
*
|
||||
* Issue #220 (tg12): this component previously had its own
|
||||
* module-local Wikipedia fetch + cache. It now delegates to
|
||||
* `lib/wikimediaClient`, which sends the policy-compliant
|
||||
* `Api-User-Agent` header and shares one cache across every UI
|
||||
* component that asks Wikipedia for an article summary (WikiImage,
|
||||
* NewsFeed, useRegionDossier).
|
||||
*
|
||||
* Props:
|
||||
* wikiUrl: Full Wikipedia URL, e.g. "https://en.wikipedia.org/wiki/Boeing_787_Dreamliner"
|
||||
@@ -26,32 +30,30 @@ export default function WikiImage({
|
||||
maxH?: string;
|
||||
accent?: string;
|
||||
}) {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
// Extract article title from URL
|
||||
const title = wikiUrl.replace(/^https?:\/\/[^/]+\/wiki\//, '');
|
||||
|
||||
useEffect(() => {
|
||||
if (!title || _cache[title]?.done) return;
|
||||
if (_cache[title]) return; // In-flight
|
||||
_cache[title] = { url: null, done: false };
|
||||
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`)
|
||||
.then((r) => r.json())
|
||||
.then((d) => {
|
||||
_cache[title] = { url: d.thumbnail?.source || d.originalimage?.source || null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_cache[title] = { url: null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!title) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(title).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [title]);
|
||||
|
||||
const cached = _cache[title];
|
||||
const imgUrl = cached?.url;
|
||||
const loading = cached && !cached.done;
|
||||
|
||||
return (
|
||||
<div className="pb-2">
|
||||
{loading && (
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useCallback, useState, useEffect } from 'react';
|
||||
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
|
||||
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
|
||||
|
||||
// ─── CACHE ─────────────────────────────────────────────────────────────────
|
||||
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
|
||||
@@ -114,7 +115,11 @@ async function fetchCountryData(countryCode: string) {
|
||||
return Array.isArray(data) ? data[0] || {} : data || {};
|
||||
}
|
||||
|
||||
/** Fetch head of state + government type from Wikidata SPARQL (direct browser call). */
|
||||
/** Fetch head of state + government type from Wikidata SPARQL.
|
||||
*
|
||||
* Issue #218 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy.
|
||||
*/
|
||||
async function fetchLeader(countryName: string) {
|
||||
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
|
||||
@@ -127,13 +132,11 @@ async function fetchLeader(countryName: string) {
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
||||
} LIMIT 1
|
||||
`;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(sparql)}&format=json`;
|
||||
const res = await fetch(url, {
|
||||
headers: { Accept: 'application/sparql-results+json' },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Wikidata HTTP ${res.status}`);
|
||||
const results = (await res.json()).results?.bindings || [];
|
||||
if (results.length > 0) {
|
||||
const results = await fetchWikidataSparql<{
|
||||
leaderLabel?: { value: string };
|
||||
govTypeLabel?: { value: string };
|
||||
}>(sparql);
|
||||
if (results && results.length > 0) {
|
||||
return {
|
||||
leader: results[0].leaderLabel?.value || 'Unknown',
|
||||
government_type: results[0].govTypeLabel?.value || 'Unknown',
|
||||
@@ -142,27 +145,25 @@ async function fetchLeader(countryName: string) {
|
||||
return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
}
|
||||
|
||||
/** Fetch Wikipedia summary for a place (direct browser call). */
|
||||
/** Fetch Wikipedia summary for a place.
|
||||
*
|
||||
* Issue #219 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
|
||||
* shared cache means consecutive useRegionDossier + WikiImage +
|
||||
* NewsFeed lookups for the same article all hit the same slot.
|
||||
*/
|
||||
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
|
||||
if (!placeName) return {};
|
||||
const candidates = [placeName];
|
||||
if (countryName) candidates.push(`${placeName}, ${countryName}`);
|
||||
|
||||
for (const name of candidates) {
|
||||
try {
|
||||
const slug = encodeURIComponent(name.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) continue;
|
||||
const data = await res.json();
|
||||
if (data.type === 'disambiguation') continue;
|
||||
const summary = await fetchWikipediaSummary(name);
|
||||
if (summary) {
|
||||
return {
|
||||
description: data.description || '',
|
||||
extract: data.extract || '',
|
||||
thumbnail: data.thumbnail?.source || '',
|
||||
description: summary.description,
|
||||
extract: summary.extract,
|
||||
thumbnail: summary.thumbnail,
|
||||
};
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* wikimediaClient — single fetch surface for Wikipedia / Wikidata.
|
||||
*
|
||||
* Issues #218, #219, #220 (tg12 external audit):
|
||||
*
|
||||
* Wikimedia's User-Agent policy asks API clients to identify themselves
|
||||
* via `Api-User-Agent` when calling from browser JavaScript (because the
|
||||
* browser does not let JS set `User-Agent` directly). Before this
|
||||
* module existed, three independent components issued anonymous browser
|
||||
* fetches against Wikipedia / Wikidata:
|
||||
*
|
||||
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
|
||||
* - WikiImage (Wikipedia REST summary)
|
||||
* - NewsFeed (Wikipedia REST summary)
|
||||
*
|
||||
* Each component shipped its own copy-pasted fetch + module-local cache.
|
||||
* Provider-policy compliance was missing in all three places.
|
||||
*
|
||||
* This module centralizes:
|
||||
*
|
||||
* 1. The `Api-User-Agent` header on every request.
|
||||
* 2. A single LRU cache for Wikipedia summary lookups (keyed by article
|
||||
* title). Multiple components asking for the same article share
|
||||
* one in-flight request and one cache slot.
|
||||
* 3. One predictable kill switch — if Wikimedia ever asks us to back
|
||||
* off, we change `WIKIMEDIA_API_USER_AGENT` here and the whole
|
||||
* frontend updates.
|
||||
*
|
||||
* This does NOT change end-user UX:
|
||||
*
|
||||
* - WikiImage still shows the same thumbnails.
|
||||
* - NewsFeed still shows aircraft thumbnails.
|
||||
* - useRegionDossier still returns the same place summary + leader.
|
||||
*
|
||||
* What changes:
|
||||
*
|
||||
* - Wikimedia can identify our traffic from any other anonymous
|
||||
* browser visitor pool.
|
||||
* - Provider-policy fixes happen here once, not in three places.
|
||||
*/
|
||||
|
||||
// Stable identifier per Wikimedia UA policy. Includes a contact path so
|
||||
// Wikimedia's operators can reach the project if they need to rate-limit
|
||||
// or coordinate. Bump the version when the contact path changes.
|
||||
export const WIKIMEDIA_API_USER_AGENT =
|
||||
'Shadowbroker/1.0 (+https://github.com/BigBodyCobain/Shadowbroker; ' +
|
||||
'report issues at /issues)';
|
||||
|
||||
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
|
||||
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
|
||||
// form so debugging the cache is easier). Values track in-flight state
|
||||
// so concurrent callers for the same title share one network request.
|
||||
export interface WikipediaSummary {
|
||||
title: string;
|
||||
description: string;
|
||||
extract: string;
|
||||
thumbnail: string;
|
||||
type: string; // 'standard' | 'disambiguation' | etc.
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
summary: WikipediaSummary | null;
|
||||
inflight: Promise<WikipediaSummary | null> | null;
|
||||
loaded: boolean;
|
||||
}
|
||||
|
||||
const _summaryCache: Map<string, CacheEntry> = new Map();
|
||||
const SUMMARY_CACHE_MAX = 512;
|
||||
|
||||
function evictIfOverCap() {
|
||||
if (_summaryCache.size <= SUMMARY_CACHE_MAX) return;
|
||||
const oldest = _summaryCache.keys().next().value;
|
||||
if (oldest) _summaryCache.delete(oldest);
|
||||
}
|
||||
|
||||
/** Fetch a Wikipedia article summary (titles, NOT URLs).
|
||||
*
|
||||
* Empty / invalid input resolves to `null`. Network errors and disambig
|
||||
* pages also resolve to `null` so callers can render a fallback without
|
||||
* a try/catch. Per the audit's "fail forward, not loud" rule.
|
||||
*/
|
||||
export async function fetchWikipediaSummary(
|
||||
title: string,
|
||||
): Promise<WikipediaSummary | null> {
|
||||
const trimmed = (title || '').trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const cached = _summaryCache.get(trimmed);
|
||||
if (cached?.loaded) return cached.summary;
|
||||
if (cached?.inflight) return cached.inflight;
|
||||
|
||||
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
|
||||
const promise = fetch(url, {
|
||||
headers: { 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT },
|
||||
})
|
||||
.then(async (r) => {
|
||||
if (!r.ok) return null;
|
||||
const d = await r.json();
|
||||
if (d?.type === 'disambiguation') return null;
|
||||
const summary: WikipediaSummary = {
|
||||
title: trimmed,
|
||||
description: d?.description || '',
|
||||
extract: d?.extract || '',
|
||||
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
|
||||
type: d?.type || 'standard',
|
||||
};
|
||||
return summary;
|
||||
})
|
||||
.catch(() => null)
|
||||
.then((summary) => {
|
||||
_summaryCache.set(trimmed, { summary, inflight: null, loaded: true });
|
||||
evictIfOverCap();
|
||||
return summary;
|
||||
});
|
||||
|
||||
_summaryCache.set(trimmed, { summary: null, inflight: promise, loaded: false });
|
||||
evictIfOverCap();
|
||||
return promise;
|
||||
}
|
||||
|
||||
/** Fetch a Wikidata SPARQL query result.
|
||||
*
|
||||
* Returns the parsed JSON `results.bindings` array on success; `null`
|
||||
* (not throwing) on any failure so callers can render fallbacks
|
||||
* silently. Kept as a thin wrapper so the audit-required UA header is
|
||||
* applied in exactly one place.
|
||||
*/
|
||||
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
|
||||
sparql: string,
|
||||
): Promise<T[] | null> {
|
||||
const trimmed = (sparql || '').trim();
|
||||
if (!trimmed) return null;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(
|
||||
trimmed,
|
||||
)}&format=json`;
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Api-User-Agent': WIKIMEDIA_API_USER_AGENT,
|
||||
Accept: 'application/sparql-results+json',
|
||||
},
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const json = await res.json();
|
||||
const bindings = json?.results?.bindings;
|
||||
return Array.isArray(bindings) ? (bindings as T[]) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Internal: clear the shared cache. Exposed for tests only. */
|
||||
export function _resetWikimediaClientCacheForTests() {
|
||||
_summaryCache.clear();
|
||||
}
|
||||
Reference in New Issue
Block a user