mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 12:58:11 +02:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a26267700d |
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, DEFAULT_USER_AGENT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,6 +15,25 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issue #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path. Bare "python-requests/x.y" or generic strings violate
|
||||
# the policy and risk getting blocked. We send the project default UA
|
||||
# (operator-overridable via SHADOWBROKER_USER_AGENT) on EVERY outbound
|
||||
# Wikimedia request, plus the policy-recommended Api-User-Agent which
|
||||
# Wikimedia explicitly accepts on top of the regular UA.
|
||||
#
|
||||
# This is documented and stable so a Wikimedia operator who wants to
|
||||
# rate-limit or contact us has a fixed identifier to grep for.
|
||||
_WIKIMEDIA_REQUEST_HEADERS = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Api-User-Agent": (
|
||||
f"{DEFAULT_USER_AGENT} "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"report issues at /issues)"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||
@@ -121,7 +140,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
"""
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=6)
|
||||
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||
# sends the project default, and we also add the Wikimedia-
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -147,7 +172,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
slug = quote(name.replace(" ", "_"))
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=5)
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Issues #218 / #219 (tg12): outbound Wikipedia + Wikidata calls must
|
||||
identify ShadowBroker via the Wikimedia-recommended User-Agent /
|
||||
Api-User-Agent headers.
|
||||
|
||||
Before this fix, ``backend/services/region_dossier.py`` called
|
||||
``fetch_with_curl(url)`` with no explicit headers, falling back to the
|
||||
generic project default UA. That sent a too-anonymous identifier to
|
||||
Wikimedia. Per Wikimedia's policy
|
||||
(https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy)
|
||||
the API caller should send a stable, contactable identifier so Wikimedia
|
||||
operators can rate-limit or reach the project.
|
||||
|
||||
This test does NOT make network calls. It patches ``fetch_with_curl``
|
||||
and asserts the headers that get passed through.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _fake_resp(payload: dict, status: int = 200) -> MagicMock:
|
||||
r = MagicMock()
|
||||
r.status_code = status
|
||||
r.json.return_value = payload
|
||||
return r
|
||||
|
||||
|
||||
def test_wikidata_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(kwargs.get("headers"))
|
||||
return _fake_resp({"results": {"bindings": []}})
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||
|
||||
assert calls, "fetch_with_curl was not called"
|
||||
headers = calls[0] or {}
|
||||
assert "User-Agent" in headers
|
||||
assert "Api-User-Agent" in headers
|
||||
# Stable identifier should mention the project + a contact path.
|
||||
assert "Shadowbroker" in headers["Api-User-Agent"] or "ShadowBroker" in headers["Api-User-Agent"]
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikipedia_summary_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append((url, kwargs.get("headers")))
|
||||
return _fake_resp(
|
||||
{
|
||||
"type": "standard",
|
||||
"description": "test desc",
|
||||
"extract": "test extract",
|
||||
"thumbnail": {"source": ""},
|
||||
}
|
||||
)
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||
|
||||
# At least one Wikipedia REST call was issued.
|
||||
wikipedia_calls = [c for c in calls if "wikipedia.org" in c[0]]
|
||||
assert wikipedia_calls, "no Wikipedia call was issued"
|
||||
for url, headers in wikipedia_calls:
|
||||
headers = headers or {}
|
||||
assert "User-Agent" in headers, f"missing User-Agent on {url}"
|
||||
assert "Api-User-Agent" in headers, f"missing Api-User-Agent on {url}"
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikimedia_headers_constant_is_stable():
|
||||
"""Regression guard: if someone removes the contact path from the
|
||||
Api-User-Agent we want a loud test failure, not a silent ToS drift.
|
||||
"""
|
||||
from services.region_dossier import _WIKIMEDIA_REQUEST_HEADERS
|
||||
|
||||
aua = _WIKIMEDIA_REQUEST_HEADERS.get("Api-User-Agent", "")
|
||||
assert "Shadowbroker" in aua or "ShadowBroker" in aua
|
||||
assert "github.com" in aua.lower()
|
||||
# Must include a path Wikimedia operators can use to contact us
|
||||
# (we use /issues against the public repo).
|
||||
assert "issues" in aua.lower()
|
||||
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* Issues #218 / #219 / #220 (tg12 external audit):
|
||||
*
|
||||
* Every browser-direct call to Wikipedia or Wikidata must send the
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for. These
|
||||
* tests pin that requirement on the shared `lib/wikimediaClient`
|
||||
* helper that WikiImage, NewsFeed, and useRegionDossier all route
|
||||
* through, so a future refactor that drops the header gets a loud
|
||||
* test failure rather than a silent ToS regression.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
WIKIMEDIA_API_USER_AGENT,
|
||||
fetchWikipediaSummary,
|
||||
fetchWikidataSparql,
|
||||
_resetWikimediaClientCacheForTests,
|
||||
} from '@/lib/wikimediaClient';
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
describe('lib/wikimediaClient', () => {
|
||||
beforeEach(() => {
|
||||
_resetWikimediaClientCacheForTests();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('exposes a stable Api-User-Agent identifier with a contact path', () => {
|
||||
expect(WIKIMEDIA_API_USER_AGENT).toContain('Shadowbroker');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('github.com');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('issues');
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Boeing 747',
|
||||
description: 'aircraft',
|
||||
extract: 'long extract',
|
||||
thumbnail: { source: 'https://example.org/thumb.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
expect(calls).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
results: {
|
||||
bindings: [
|
||||
{
|
||||
leaderLabel: { value: 'Test Leader' },
|
||||
govTypeLabel: { value: 'Test Government' },
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
});
|
||||
|
||||
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Eiffel Tower',
|
||||
description: 'iron lattice tower',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/eiffel.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const a = await fetchWikipediaSummary('Eiffel Tower');
|
||||
const b = await fetchWikipediaSummary('Eiffel Tower');
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe(b?.thumbnail);
|
||||
});
|
||||
|
||||
it('deduplicates concurrent in-flight requests for the same title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Mount Fuji',
|
||||
description: 'stratovolcano',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/fuji.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const [a, b, c] = await Promise.all([
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
]);
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe('https://example.org/fuji.jpg');
|
||||
expect(b).toEqual(a);
|
||||
expect(c).toEqual(a);
|
||||
});
|
||||
|
||||
it('returns null on disambiguation pages without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Mercury');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on HTTP error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('not found', { status: 404 })) as any;
|
||||
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on network error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any;
|
||||
const summary = await fetchWikipediaSummary('Anything');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty input', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
|
||||
expect(await fetchWikipediaSummary('')).toBeNull();
|
||||
expect(await fetchWikipediaSummary(' ')).toBeNull();
|
||||
expect(globalThis.fetch).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -5,6 +5,7 @@ import { motion, AnimatePresence } from 'framer-motion';
|
||||
import { AlertTriangle, Clock, Minus, Plus, ExternalLink, Brain, Loader2 } from 'lucide-react';
|
||||
import React, { useEffect, useRef, useCallback } from 'react';
|
||||
import WikiImage from '@/components/WikiImage';
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
import type { SelectedEntity, RegionDossier, FimiData } from "@/types/dashboard";
|
||||
import { useDataKeys } from '@/hooks/useDataStore';
|
||||
import { API_BASE } from '@/lib/api';
|
||||
@@ -203,34 +204,37 @@ function resolveAircraftWikiTitle(model: string | undefined): string | null {
|
||||
return AIRCRAFT_WIKI[model] || resolveAcTypeWiki(model);
|
||||
}
|
||||
|
||||
// Module-level cache for Wikipedia thumbnails (persists across re-renders)
|
||||
const _wikiThumbCache: Record<string, { url: string | null; loading: boolean }> = {};
|
||||
|
||||
// Issue #220 (tg12): the previous implementation kept its own
|
||||
// module-local Wikipedia thumbnail cache and issued anonymous fetches
|
||||
// without `Api-User-Agent`. We now delegate to lib/wikimediaClient,
|
||||
// which sends the policy-compliant header and shares one cache with
|
||||
// WikiImage and useRegionDossier.
|
||||
function useAircraftImage(model: string | undefined): { imgUrl: string | null; wikiUrl: string | null; loading: boolean } {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const wikiTitle = resolveAircraftWikiTitle(model) || undefined;
|
||||
const wikiUrl = wikiTitle ? `https://en.wikipedia.org/wiki/${wikiTitle.replace(/ /g, '_')}` : null;
|
||||
|
||||
useEffect(() => {
|
||||
if (!wikiTitle) return;
|
||||
const key = wikiTitle;
|
||||
if (_wikiThumbCache[key]) return; // Already fetched or in-flight
|
||||
_wikiThumbCache[key] = { url: null, loading: true };
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(wikiTitle)}`)
|
||||
.then(r => r.json())
|
||||
.then(d => {
|
||||
_wikiThumbCache[key] = { url: d.thumbnail?.source || null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_wikiThumbCache[key] = { url: null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!wikiTitle) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(wikiTitle).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [wikiTitle]);
|
||||
|
||||
if (!wikiTitle) return { imgUrl: null, wikiUrl: null, loading: false };
|
||||
const cached = _wikiThumbCache[wikiTitle];
|
||||
return { imgUrl: cached?.url || null, wikiUrl, loading: cached?.loading || false };
|
||||
return { imgUrl, wikiUrl, loading };
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
'use client';
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import ExternalImage from '@/components/ExternalImage';
|
||||
|
||||
// Module-level cache: Wikipedia article title → thumbnail URL
|
||||
const _cache: Record<string, { url: string | null; done: boolean }> = {};
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
|
||||
/**
|
||||
* WikiImage — displays a Wikipedia thumbnail for a given article URL.
|
||||
* Uses the Wikipedia REST API with a module-level cache (only fetches once per article).
|
||||
*
|
||||
* Issue #220 (tg12): this component previously had its own
|
||||
* module-local Wikipedia fetch + cache. It now delegates to
|
||||
* `lib/wikimediaClient`, which sends the policy-compliant
|
||||
* `Api-User-Agent` header and shares one cache across every UI
|
||||
* component that asks Wikipedia for an article summary (WikiImage,
|
||||
* NewsFeed, useRegionDossier).
|
||||
*
|
||||
* Props:
|
||||
* wikiUrl: Full Wikipedia URL, e.g. "https://en.wikipedia.org/wiki/Boeing_787_Dreamliner"
|
||||
@@ -26,32 +30,30 @@ export default function WikiImage({
|
||||
maxH?: string;
|
||||
accent?: string;
|
||||
}) {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
// Extract article title from URL
|
||||
const title = wikiUrl.replace(/^https?:\/\/[^/]+\/wiki\//, '');
|
||||
|
||||
useEffect(() => {
|
||||
if (!title || _cache[title]?.done) return;
|
||||
if (_cache[title]) return; // In-flight
|
||||
_cache[title] = { url: null, done: false };
|
||||
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`)
|
||||
.then((r) => r.json())
|
||||
.then((d) => {
|
||||
_cache[title] = { url: d.thumbnail?.source || d.originalimage?.source || null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_cache[title] = { url: null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!title) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(title).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [title]);
|
||||
|
||||
const cached = _cache[title];
|
||||
const imgUrl = cached?.url;
|
||||
const loading = cached && !cached.done;
|
||||
|
||||
return (
|
||||
<div className="pb-2">
|
||||
{loading && (
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useCallback, useState, useEffect } from 'react';
|
||||
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
|
||||
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
|
||||
|
||||
// ─── CACHE ─────────────────────────────────────────────────────────────────
|
||||
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
|
||||
@@ -114,7 +115,11 @@ async function fetchCountryData(countryCode: string) {
|
||||
return Array.isArray(data) ? data[0] || {} : data || {};
|
||||
}
|
||||
|
||||
/** Fetch head of state + government type from Wikidata SPARQL (direct browser call). */
|
||||
/** Fetch head of state + government type from Wikidata SPARQL.
|
||||
*
|
||||
* Issue #218 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy.
|
||||
*/
|
||||
async function fetchLeader(countryName: string) {
|
||||
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
|
||||
@@ -127,13 +132,11 @@ async function fetchLeader(countryName: string) {
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
||||
} LIMIT 1
|
||||
`;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(sparql)}&format=json`;
|
||||
const res = await fetch(url, {
|
||||
headers: { Accept: 'application/sparql-results+json' },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Wikidata HTTP ${res.status}`);
|
||||
const results = (await res.json()).results?.bindings || [];
|
||||
if (results.length > 0) {
|
||||
const results = await fetchWikidataSparql<{
|
||||
leaderLabel?: { value: string };
|
||||
govTypeLabel?: { value: string };
|
||||
}>(sparql);
|
||||
if (results && results.length > 0) {
|
||||
return {
|
||||
leader: results[0].leaderLabel?.value || 'Unknown',
|
||||
government_type: results[0].govTypeLabel?.value || 'Unknown',
|
||||
@@ -142,27 +145,25 @@ async function fetchLeader(countryName: string) {
|
||||
return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
}
|
||||
|
||||
/** Fetch Wikipedia summary for a place (direct browser call). */
|
||||
/** Fetch Wikipedia summary for a place.
|
||||
*
|
||||
* Issue #219 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
|
||||
* shared cache means consecutive useRegionDossier + WikiImage +
|
||||
* NewsFeed lookups for the same article all hit the same slot.
|
||||
*/
|
||||
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
|
||||
if (!placeName) return {};
|
||||
const candidates = [placeName];
|
||||
if (countryName) candidates.push(`${placeName}, ${countryName}`);
|
||||
|
||||
for (const name of candidates) {
|
||||
try {
|
||||
const slug = encodeURIComponent(name.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) continue;
|
||||
const data = await res.json();
|
||||
if (data.type === 'disambiguation') continue;
|
||||
const summary = await fetchWikipediaSummary(name);
|
||||
if (summary) {
|
||||
return {
|
||||
description: data.description || '',
|
||||
extract: data.extract || '',
|
||||
thumbnail: data.thumbnail?.source || '',
|
||||
description: summary.description,
|
||||
extract: summary.extract,
|
||||
thumbnail: summary.thumbnail,
|
||||
};
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* wikimediaClient — single fetch surface for Wikipedia / Wikidata.
|
||||
*
|
||||
* Issues #218, #219, #220 (tg12 external audit):
|
||||
*
|
||||
* Wikimedia's User-Agent policy asks API clients to identify themselves
|
||||
* via `Api-User-Agent` when calling from browser JavaScript (because the
|
||||
* browser does not let JS set `User-Agent` directly). Before this
|
||||
* module existed, three independent components issued anonymous browser
|
||||
* fetches against Wikipedia / Wikidata:
|
||||
*
|
||||
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
|
||||
* - WikiImage (Wikipedia REST summary)
|
||||
* - NewsFeed (Wikipedia REST summary)
|
||||
*
|
||||
* Each component shipped its own copy-pasted fetch + module-local cache.
|
||||
* Provider-policy compliance was missing in all three places.
|
||||
*
|
||||
* This module centralizes:
|
||||
*
|
||||
* 1. The `Api-User-Agent` header on every request.
|
||||
* 2. A single LRU cache for Wikipedia summary lookups (keyed by article
|
||||
* title). Multiple components asking for the same article share
|
||||
* one in-flight request and one cache slot.
|
||||
* 3. One predictable kill switch — if Wikimedia ever asks us to back
|
||||
* off, we change `WIKIMEDIA_API_USER_AGENT` here and the whole
|
||||
* frontend updates.
|
||||
*
|
||||
* This does NOT change end-user UX:
|
||||
*
|
||||
* - WikiImage still shows the same thumbnails.
|
||||
* - NewsFeed still shows aircraft thumbnails.
|
||||
* - useRegionDossier still returns the same place summary + leader.
|
||||
*
|
||||
* What changes:
|
||||
*
|
||||
* - Wikimedia can identify our traffic from any other anonymous
|
||||
* browser visitor pool.
|
||||
* - Provider-policy fixes happen here once, not in three places.
|
||||
*/
|
||||
|
||||
// Stable identifier per Wikimedia UA policy. Includes a contact path so
|
||||
// Wikimedia's operators can reach the project if they need to rate-limit
|
||||
// or coordinate. Bump the version when the contact path changes.
|
||||
export const WIKIMEDIA_API_USER_AGENT =
|
||||
'Shadowbroker/1.0 (+https://github.com/BigBodyCobain/Shadowbroker; ' +
|
||||
'report issues at /issues)';
|
||||
|
||||
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
|
||||
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
|
||||
// form so debugging the cache is easier). Values track in-flight state
|
||||
// so concurrent callers for the same title share one network request.
|
||||
export interface WikipediaSummary {
|
||||
title: string;
|
||||
description: string;
|
||||
extract: string;
|
||||
thumbnail: string;
|
||||
type: string; // 'standard' | 'disambiguation' | etc.
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
summary: WikipediaSummary | null;
|
||||
inflight: Promise<WikipediaSummary | null> | null;
|
||||
loaded: boolean;
|
||||
}
|
||||
|
||||
const _summaryCache: Map<string, CacheEntry> = new Map();
|
||||
const SUMMARY_CACHE_MAX = 512;
|
||||
|
||||
function evictIfOverCap() {
|
||||
if (_summaryCache.size <= SUMMARY_CACHE_MAX) return;
|
||||
const oldest = _summaryCache.keys().next().value;
|
||||
if (oldest) _summaryCache.delete(oldest);
|
||||
}
|
||||
|
||||
/** Fetch a Wikipedia article summary (titles, NOT URLs).
|
||||
*
|
||||
* Empty / invalid input resolves to `null`. Network errors and disambig
|
||||
* pages also resolve to `null` so callers can render a fallback without
|
||||
* a try/catch. Per the audit's "fail forward, not loud" rule.
|
||||
*/
|
||||
export async function fetchWikipediaSummary(
|
||||
title: string,
|
||||
): Promise<WikipediaSummary | null> {
|
||||
const trimmed = (title || '').trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const cached = _summaryCache.get(trimmed);
|
||||
if (cached?.loaded) return cached.summary;
|
||||
if (cached?.inflight) return cached.inflight;
|
||||
|
||||
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
|
||||
const promise = fetch(url, {
|
||||
headers: { 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT },
|
||||
})
|
||||
.then(async (r) => {
|
||||
if (!r.ok) return null;
|
||||
const d = await r.json();
|
||||
if (d?.type === 'disambiguation') return null;
|
||||
const summary: WikipediaSummary = {
|
||||
title: trimmed,
|
||||
description: d?.description || '',
|
||||
extract: d?.extract || '',
|
||||
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
|
||||
type: d?.type || 'standard',
|
||||
};
|
||||
return summary;
|
||||
})
|
||||
.catch(() => null)
|
||||
.then((summary) => {
|
||||
_summaryCache.set(trimmed, { summary, inflight: null, loaded: true });
|
||||
evictIfOverCap();
|
||||
return summary;
|
||||
});
|
||||
|
||||
_summaryCache.set(trimmed, { summary: null, inflight: promise, loaded: false });
|
||||
evictIfOverCap();
|
||||
return promise;
|
||||
}
|
||||
|
||||
/** Fetch a Wikidata SPARQL query result.
|
||||
*
|
||||
* Returns the parsed JSON `results.bindings` array on success; `null`
|
||||
* (not throwing) on any failure so callers can render fallbacks
|
||||
* silently. Kept as a thin wrapper so the audit-required UA header is
|
||||
* applied in exactly one place.
|
||||
*/
|
||||
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
|
||||
sparql: string,
|
||||
): Promise<T[] | null> {
|
||||
const trimmed = (sparql || '').trim();
|
||||
if (!trimmed) return null;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(
|
||||
trimmed,
|
||||
)}&format=json`;
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Api-User-Agent': WIKIMEDIA_API_USER_AGENT,
|
||||
Accept: 'application/sparql-results+json',
|
||||
},
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const json = await res.json();
|
||||
const bindings = json?.results?.bindings;
|
||||
return Array.isArray(bindings) ? (bindings as T[]) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Internal: clear the shared cache. Exposed for tests only. */
|
||||
export function _resetWikimediaClientCacheForTests() {
|
||||
_summaryCache.clear();
|
||||
}
|
||||
Reference in New Issue
Block a user