Files
Shadowbroker/frontend/src/hooks/useRegionDossier.ts
T
Shadowbroker 0fee36e8f7 Fix #218/#219/#220: identify ShadowBroker on Wikipedia + Wikidata calls (#284)
Wikimedia's User-Agent policy asks API clients to identify themselves
with a stable, contactable identifier so their operators can rate-limit
or coordinate. Before this change, ShadowBroker was sending:

- Backend (region_dossier.py): generic project default UA only; no
  Api-User-Agent.
- Frontend (useRegionDossier.ts, WikiImage.tsx, NewsFeed.tsx): zero
  identifying header at all; three separate copy-pasted anonymous
  fetches with their own module-local caches.

Three separate components doing the same broken thing meant policy
fixes had to happen in three places, with no shared cache or kill
switch.

Fix (no UX change, zero hostility):

== Backend ==

`backend/services/region_dossier.py` now sets explicit `User-Agent` +
`Api-User-Agent` headers on every outbound Wikidata and Wikipedia
request via a new `_WIKIMEDIA_REQUEST_HEADERS` constant. The identifier
includes a contact path (issues page on the public GitHub repo).

== Frontend ==

New shared helper `frontend/src/lib/wikimediaClient.ts`:
- `fetchWikipediaSummary(title)` — single source of truth for Wikipedia
  REST summary lookups, with one shared LRU cache (in-flight requests
  deduplicated, 512-entry cap), `Api-User-Agent` on every fetch.
- `fetchWikidataSparql(query)` — same shape for Wikidata SPARQL.
- `WIKIMEDIA_API_USER_AGENT` — exported constant; one place to update
  if Wikimedia ever asks us to back off.

Refactored three components to use the shared client:
- `frontend/src/hooks/useRegionDossier.ts` — fetchLeader() and
  fetchLocalWikiSummary() now route through the shared helpers.
- `frontend/src/components/WikiImage.tsx` — uses fetchWikipediaSummary,
  proper React state instead of module-mutation + forceUpdate trick.
- `frontend/src/components/NewsFeed.tsx` — same shape.

UX: byte-for-byte identical. Same thumbnails, same dossier content,
same load behavior. The only observable difference is the outgoing
request header.

Note on #239 (route duplication): an audit-grade inventory shows 166
main.py routes are shadowed by router modules. That cleanup is too
large to land safely in this PR; it will be staged as a separate
ladder of small PRs grouped by router module.

Tests:
- `backend/tests/test_region_dossier_wikimedia_ua.py` — 3 tests
  asserting backend headers are present.
- `frontend/src/__tests__/utils/wikimediaClient.test.ts` — 9 tests
  covering Api-User-Agent presence, shared cache, concurrent
  deduplication, disambiguation/HTTP-error/network-error fallthroughs,
  empty-input safety.

Local: backend 76/76 security suite green, frontend 716/716 vitest
suite green.

Credit: tg12 (external security audit).
2026-05-21 10:48:05 -06:00

418 lines
16 KiB
TypeScript

import { useCallback, useState, useEffect } from 'react';
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
// ─── CACHE ─────────────────────────────────────────────────────────────────
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
const _dossierCache = new Map<string, { data: RegionDossier; ts: number }>();
const CACHE_TTL = 86400_000; // 24 hours in ms
function getCached(lat: number, lng: number): RegionDossier | null {
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
const entry = _dossierCache.get(key);
if (entry && Date.now() - entry.ts < CACHE_TTL) return entry.data;
if (entry) _dossierCache.delete(key);
return null;
}
function setCache(lat: number, lng: number, data: RegionDossier) {
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
_dossierCache.set(key, { data, ts: Date.now() });
// Evict oldest entries if cache exceeds 500
if (_dossierCache.size > 500) {
const oldest = _dossierCache.keys().next().value;
if (oldest) _dossierCache.delete(oldest);
}
}
// ─── ESRI WORLD IMAGERY FALLBACK ───────────────────────────────────────────
function buildLocalSentinelFallback(lat: number, lng: number) {
const latSpan = 0.18;
const lngSpan = 0.24;
const bbox = `${lng - lngSpan},${lat - latSpan},${lng + lngSpan},${lat + latSpan}`;
const base =
'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/export';
return {
found: true,
scene_id: null,
datetime: null,
cloud_cover: null,
thumbnail_url: `${base}?bbox=${bbox}&bboxSR=4326&imageSR=4326&size=640,360&format=png32&f=image`,
fullres_url: `${base}?bbox=${bbox}&bboxSR=4326&imageSR=4326&size=1600,900&format=png32&f=image`,
bbox: [lng - lngSpan, lat - latSpan, lng + lngSpan, lat + latSpan],
platform: 'Esri World Imagery',
fallback: true,
message: 'Using local imagery fallback while live satellite search completes.',
};
}
function buildLimitedDossier(lat: number, lng: number, error?: string): RegionDossier {
return {
lat,
lng,
coordinates: { lat, lng },
location: {
display_name: `${lat.toFixed(4)}, ${lng.toFixed(4)}`,
},
country: {
name: 'LIMITED INTEL',
official_name: '',
leader: 'Unknown',
government_type: 'Unavailable',
population: 0,
capital: 'Unknown',
languages: [],
currencies: [],
region: '',
subregion: '',
area_km2: 0,
flag_emoji: '',
},
local: {
name: 'Selected coordinates',
state: '',
description: 'Fallback dossier',
summary:
'Live region enrichment is currently unavailable or slow. Local coordinates and fallback imagery are still available.',
thumbnail: '',
},
warning: error || 'Region dossier is using local fallback data.',
} as RegionDossier;
}
// ─── BROWSER-DIRECT API CALLS ──────────────────────────────────────────────
// All external APIs below support CORS — no backend proxy needed.
/** Reverse geocode via Nominatim (direct browser call). */
async function reverseGeocode(lat: number, lng: number) {
const url =
`https://nominatim.openstreetmap.org/reverse?` +
`lat=${lat}&lon=${lng}&format=json&zoom=10&addressdetails=1&accept-language=en`;
const res = await fetch(url, {
headers: { 'User-Agent': 'ShadowBroker-OSINT/1.0 (live-risk-dashboard)' },
});
if (!res.ok) throw new Error(`Nominatim HTTP ${res.status}`);
const data = await res.json();
const addr = data.address || {};
return {
city: addr.city || addr.town || addr.village || addr.county || '',
state: addr.state || addr.region || '',
country: addr.country || '',
country_code: (addr.country_code || '').toUpperCase(),
display_name: data.display_name || '',
};
}
/** Fetch country data from RestCountries (direct browser call). */
async function fetchCountryData(countryCode: string) {
if (!countryCode) return {};
const url =
`https://restcountries.com/v3.1/alpha/${countryCode}` +
`?fields=name,population,capital,languages,region,subregion,area,currencies,borders,flag`;
const res = await fetch(url);
if (!res.ok) throw new Error(`RestCountries HTTP ${res.status}`);
const data = await res.json();
return Array.isArray(data) ? data[0] || {} : data || {};
}
/** Fetch head of state + government type from Wikidata SPARQL.
*
* Issue #218 (tg12): routes through lib/wikimediaClient so the
* Api-User-Agent header is set per Wikimedia's UA policy.
*/
async function fetchLeader(countryName: string) {
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
const sparql = `
SELECT ?leaderLabel ?govTypeLabel WHERE {
?country wdt:P31 wd:Q6256 ;
rdfs:label "${safeName}"@en .
OPTIONAL { ?country wdt:P35 ?leader . }
OPTIONAL { ?country wdt:P122 ?govType . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} LIMIT 1
`;
const results = await fetchWikidataSparql<{
leaderLabel?: { value: string };
govTypeLabel?: { value: string };
}>(sparql);
if (results && results.length > 0) {
return {
leader: results[0].leaderLabel?.value || 'Unknown',
government_type: results[0].govTypeLabel?.value || 'Unknown',
};
}
return { leader: 'Unknown', government_type: 'Unknown' };
}
/** Fetch Wikipedia summary for a place.
*
* Issue #219 (tg12): routes through lib/wikimediaClient so the
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
* shared cache means consecutive useRegionDossier + WikiImage +
* NewsFeed lookups for the same article all hit the same slot.
*/
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
if (!placeName) return {};
const candidates = [placeName];
if (countryName) candidates.push(`${placeName}, ${countryName}`);
for (const name of candidates) {
const summary = await fetchWikipediaSummary(name);
if (summary) {
return {
description: summary.description,
extract: summary.extract,
thumbnail: summary.thumbnail,
};
}
}
return {};
}
/** Search for Sentinel-2 imagery via Microsoft Planetary Computer STAC (direct browser call). */
async function fetchSentinel2Direct(lat: number, lng: number) {
const now = new Date();
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
const payload = {
collections: ['sentinel-2-l2a'],
intersects: { type: 'Point', coordinates: [lng, lat] },
datetime: `${thirtyDaysAgo.toISOString()}/${now.toISOString()}`,
sortby: [{ field: 'datetime', direction: 'desc' }],
limit: 3,
query: { 'eo:cloud_cover': { lt: 30 } },
};
const res = await fetch('https://planetarycomputer.microsoft.com/api/stac/v1/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!res.ok) throw new Error(`Planetary Computer HTTP ${res.status}`);
const data = await res.json();
const features = data.features || [];
if (!features.length) return null; // No scenes — caller uses Esri fallback
const scenes = features.map((item: any) => {
const assets = item.assets || {};
const rendered = assets.rendered_preview || {};
const thumbnail = assets.thumbnail || {};
return {
found: true,
scene_id: item.id,
datetime: item.properties?.datetime,
cloud_cover: item.properties?.['eo:cloud_cover'],
thumbnail_url: thumbnail.href || rendered.href,
fullres_url: rendered.href || thumbnail.href,
bbox: item.bbox ? [...item.bbox] : null,
platform: item.properties?.platform || 'Sentinel-2',
};
});
return { ...scenes[0], scenes };
}
// ─── MAIN HOOK ─────────────────────────────────────────────────────────────
export function useRegionDossier(
selectedEntity: SelectedEntity | null,
setSelectedEntity: (entity: SelectedEntity | null) => void,
) {
const [regionDossier, setRegionDossier] = useState<RegionDossier | null>(null);
const [regionDossierLoading, setRegionDossierLoading] = useState(false);
const handleMapRightClick = useCallback(
async (coords: { lat: number; lng: number }) => {
const { lat, lng } = coords;
const esriFallback = buildLocalSentinelFallback(lat, lng);
setSelectedEntity({
type: 'region_dossier',
id: `${lat.toFixed(4)}_${lng.toFixed(4)}`,
extra: coords,
});
setRegionDossierLoading(true);
// Check cache first
const cached = getCached(lat, lng);
if (cached) {
setRegionDossier(cached);
setRegionDossierLoading(false);
return;
}
// Show fallback immediately while API calls are in flight
setRegionDossier({
...buildLimitedDossier(lat, lng),
sentinel2: esriFallback,
});
try {
// ── Phase 1: Geocode + Sentinel-2 in parallel ──────────────────
const [geoResult, sentinelResult] = await Promise.allSettled([
reverseGeocode(lat, lng),
fetchSentinel2Direct(lat, lng),
]);
// Parse geocode
let geo = { city: '', state: '', country: '', country_code: '', display_name: '' };
if (geoResult.status === 'fulfilled') {
geo = geoResult.value;
} else {
console.warn('[Dossier] Reverse geocode failed:', geoResult.reason);
}
// Parse sentinel
let sentinel2: Record<string, unknown> = esriFallback;
if (sentinelResult.status === 'fulfilled' && sentinelResult.value) {
sentinel2 = sentinelResult.value;
} else if (sentinelResult.status === 'rejected') {
console.warn('[Dossier] Sentinel-2 search failed:', sentinelResult.reason);
}
// sentinelResult fulfilled but null → no scenes found, keep Esri fallback
// If no country found (ocean, uninhabited), show limited dossier
if (!geo.country) {
const result: RegionDossier = {
lat,
lng,
coordinates: { lat, lng },
location: geo.display_name
? geo
: { display_name: `${lat.toFixed(4)}, ${lng.toFixed(4)}` },
country: null,
local: null,
error: 'No country data — possibly international waters or uninhabited area',
sentinel2,
} as RegionDossier;
setRegionDossier(result);
setCache(lat, lng, result);
setRegionDossierLoading(false);
return;
}
// ── Phase 2: Country + Leader + Wiki in parallel ───────────────
const [countryResult, leaderResult, localWikiResult, countryWikiResult] =
await Promise.allSettled([
fetchCountryData(geo.country_code),
fetchLeader(geo.country),
fetchLocalWikiSummary(geo.city || geo.state, geo.country),
fetchLocalWikiSummary(geo.country, ''),
]);
// Parse country data
let countryData: Record<string, unknown> = {};
if (countryResult.status === 'fulfilled') {
countryData = countryResult.value as Record<string, unknown>;
} else {
console.warn('[Dossier] Country data failed:', countryResult.reason);
}
// Parse leader data
let leaderData = { leader: 'Unknown', government_type: 'Unknown' };
if (leaderResult.status === 'fulfilled') {
leaderData = leaderResult.value;
} else {
console.warn('[Dossier] Leader data failed:', leaderResult.reason);
}
// Parse local wiki
let localData: Record<string, string> = {};
if (localWikiResult.status === 'fulfilled') {
localData = localWikiResult.value as Record<string, string>;
} else {
console.warn('[Dossier] Local wiki failed:', localWikiResult.reason);
}
// If no local data, try country wiki summary
if (!localData.extract && countryWikiResult.status === 'fulfilled') {
const cw = countryWikiResult.value as Record<string, string>;
if (cw.extract) localData = cw;
}
// Build languages list
const languages = countryData.languages as Record<string, string> | undefined;
const langList = languages ? Object.values(languages) : [];
// Build currencies list
const currencies = countryData.currencies as
| Record<string, { name: string; symbol?: string }>
| undefined;
const currencyList: string[] = [];
if (currencies) {
for (const v of Object.values(currencies)) {
if (v && typeof v === 'object') {
const sym = v.symbol || '';
const nm = v.name || '';
currencyList.push(sym ? `${nm} (${sym})` : nm);
}
}
}
const nameData = countryData.name as
| { common?: string; official?: string }
| undefined;
const capitalData = countryData.capital as string[] | undefined;
// ── Assemble final dossier (exact same shape as backend) ───────
const result: RegionDossier = {
lat,
lng,
coordinates: { lat, lng },
location: {
city: geo.city,
state: geo.state,
country: geo.country,
country_code: geo.country_code,
display_name: geo.display_name,
},
country: {
name: nameData?.common || geo.country,
official_name: nameData?.official || '',
leader: leaderData.leader,
government_type: leaderData.government_type,
population: (countryData.population as number) || 0,
capital: capitalData?.length ? capitalData[0] : 'Unknown',
languages: langList,
currencies: currencyList,
region: (countryData.region as string) || '',
subregion: (countryData.subregion as string) || '',
area_km2: (countryData.area as number) || 0,
flag_emoji: (countryData.flag as string) || '',
},
local: {
name: geo.city,
state: geo.state,
description: localData.description || '',
summary: localData.extract || '',
thumbnail: localData.thumbnail || '',
},
sentinel2,
} as RegionDossier;
setRegionDossier(result);
setCache(lat, lng, result);
} catch (e) {
console.error('[Dossier] Unexpected error:', e);
setRegionDossier({
...buildLimitedDossier(lat, lng, 'Region dossier request failed unexpectedly'),
sentinel2: esriFallback,
});
} finally {
setRegionDossierLoading(false);
}
},
[setSelectedEntity],
);
// Clear dossier when selecting a different entity type
useEffect(() => {
if (selectedEntity?.type !== 'region_dossier') {
setRegionDossier(null);
setRegionDossierLoading(false);
}
}, [selectedEntity]);
return { regionDossier, regionDossierLoading, handleMapRightClick };
}