Route dossier, geocode, and Wikimedia through the backend (#351, #352, #360)

Proxy region dossier, Sentinel search, Wikipedia, and Wikidata via self-hosted
APIs; remove LocateBar client-side Nominatim fallback; migrate legacy shadow-
operator handles to operator- prefix.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
BigBodyCobain
2026-06-02 15:20:44 -06:00
parent c3dd95f6a9
commit f08781bdc9
9 changed files with 250 additions and 622 deletions
+33
View File
@@ -85,6 +85,39 @@ async def api_geocode_reverse(
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ───
@router.get("/api/wikipedia/summary")
@limiter.limit("60/minute")
def api_wikipedia_summary(
request: Request,
title: str = Query(..., min_length=1, max_length=256),
):
"""Proxy Wikipedia REST summaries through the self-hosted backend."""
from services.region_dossier import fetch_wikipedia_page_summary
summary = fetch_wikipedia_page_summary(title)
if summary is None:
return JSONResponse(status_code=404, content={"detail": "not_found"})
return summary
class WikidataSparqlRequest(BaseModel):
query: str
@router.post("/api/wikidata/sparql")
@limiter.limit("30/minute")
def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest):
"""Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org."""
from services.region_dossier import fetch_wikidata_sparql_bindings
q = (body.query or "").strip()
if len(q) > 12_000:
raise HTTPException(400, "SPARQL query too large")
bindings = fetch_wikidata_sparql_bindings(q)
return {"bindings": bindings}
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
# These three endpoints relay external Sentinel / Planetary Computer
# requests through the backend to avoid browser CORS blocks. They are
+7 -2
View File
@@ -146,7 +146,12 @@ def get_operator_handle() -> str:
# 3. On-disk handle from a previous run.
persisted = _load_persisted_operator_handle()
if persisted:
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
normalized = _normalize_handle(persisted)
# Migrate legacy auto-generated handles (pre-Round-7a ``shadow-`` prefix).
if normalized.startswith("shadow-"):
normalized = f"operator-{normalized[len('shadow-'):]}"
_persist_operator_handle(normalized)
_OPERATOR_HANDLE_CACHE = normalized
return _OPERATOR_HANDLE_CACHE
# 4. Generate, persist, return.
@@ -178,7 +183,7 @@ def outbound_user_agent(purpose: str = "") -> str:
Returns something like::
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
Shadowbroker/0.9 (operator: operator-7f3a92; purpose: wikipedia;
+https://github.com/BigBodyCobain/Shadowbroker/issues)
The ``purpose`` is optional but recommended — it tells the upstream
+33
View File
@@ -301,3 +301,36 @@ def get_region_dossier(lat: float, lng: float) -> dict:
dossier_cache[cache_key] = result
return result
def fetch_wikipedia_page_summary(title: str) -> dict | None:
"""Wikipedia REST summary for a page title (backend-proxied for #360)."""
trimmed = (title or "").strip()
if not trimmed:
return None
data = _fetch_local_wiki_summary(trimmed, "")
if not data.get("extract") and not data.get("description"):
return None
return {
"title": trimmed,
"description": data.get("description", ""),
"extract": data.get("extract", ""),
"thumbnail": data.get("thumbnail", ""),
"type": "standard",
}
def fetch_wikidata_sparql_bindings(sparql: str) -> list:
"""Run a Wikidata SPARQL query; returns bindings list (empty on failure)."""
trimmed = (sparql or "").strip()
if not trimmed:
return []
url = f"https://query.wikidata.org/sparql?query={quote(trimmed)}&format=json"
try:
res = fetch_with_curl(url, timeout=8, headers=_wikimedia_request_headers())
if res.status_code == 200:
bindings = res.json().get("results", {}).get("bindings", [])
return bindings if isinstance(bindings, list) else []
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
logger.warning("Wikidata SPARQL failed: %s", e)
return []
@@ -0,0 +1,34 @@
"""Backend Wikimedia proxy routes (#360)."""
from __future__ import annotations
from unittest.mock import patch
import pytest
def test_wikipedia_summary_route_returns_payload(client):
sample = {
"title": "Paris",
"description": "capital",
"extract": "Paris is the capital of France.",
"thumbnail": "https://example.org/t.jpg",
"type": "standard",
}
with patch(
"services.region_dossier.fetch_wikipedia_page_summary",
return_value=sample,
):
r = client.get("/api/wikipedia/summary", params={"title": "Paris"})
assert r.status_code == 200
assert r.json()["title"] == "Paris"
def test_wikidata_sparql_route_returns_bindings(client):
bindings = [{"x": {"value": "1"}}]
with patch(
"services.region_dossier.fetch_wikidata_sparql_bindings",
return_value=bindings,
):
r = client.post("/api/wikidata/sparql", json={"query": "SELECT ?x WHERE {}"})
assert r.status_code == 200
assert r.json()["bindings"] == bindings
@@ -203,6 +203,19 @@ describe('page.tsx decomposition — no admin-session/proxy regression', () => {
const locateBar = readAppFile('LocateBar.tsx');
expect(locateBar).toContain('API_BASE');
expect(locateBar).toContain('/api/geocode/search');
expect(locateBar).not.toContain('nominatim.openstreetmap.org');
});
it('useRegionDossier uses backend dossier APIs (no browser-direct enrichment)', () => {
const hook = fs.readFileSync(
path.resolve(__dirname, '../../hooks/useRegionDossier.ts'),
'utf-8',
);
expect(hook).toContain('/api/region-dossier');
expect(hook).toContain('/api/sentinel2/search');
expect(hook).not.toContain('nominatim.openstreetmap.org');
expect(hook).not.toContain('planetarycomputer.microsoft.com');
expect(hook).not.toContain('restcountries.com');
});
});
@@ -1,21 +1,8 @@
/**
* Issues #218 / #219 / #220 (tg12 external audit) + Round 7a:
*
* Every browser-direct call to Wikipedia or Wikidata must send the
* `Api-User-Agent` header that Wikimedia's UA policy asks for, AND must
* embed the per-install operator handle so Wikimedia can rate-limit /
* contact the specific operator instead of treating "Shadowbroker" as
* one giant entity.
*
* These tests pin both requirements on the shared `lib/wikimediaClient`
* helper that WikiImage, NewsFeed, and useRegionDossier all route
* through. A future refactor that drops either the header OR the
* per-operator handle gets a loud test failure rather than a silent
* ToS / privacy regression.
* #360: Wikipedia / Wikidata traffic is proxied via the self-hosted backend.
*/
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
buildWikimediaUserAgent,
fetchWikipediaSummary,
fetchWikidataSparql,
_resetWikimediaClientCacheForTests,
@@ -23,18 +10,6 @@ import {
const originalFetch = globalThis.fetch;
// Helper: stub fetch so calls to /api/settings/operator-handle return a
// known handle, and everything else proxies to whatever the test set up.
function withHandle(handle: string, otherFetch: typeof globalThis.fetch) {
return vi.fn(async (input: any, init?: RequestInit) => {
const url = String(input);
if (url.endsWith('/api/settings/operator-handle')) {
return new Response(JSON.stringify({ handle }), { status: 200 });
}
return otherFetch(input, init);
});
}
describe('lib/wikimediaClient', () => {
beforeEach(() => {
_resetWikimediaClientCacheForTests();
@@ -45,194 +20,78 @@ describe('lib/wikimediaClient', () => {
vi.restoreAllMocks();
});
it('builds a stable per-operator Api-User-Agent with contact path', async () => {
globalThis.fetch = withHandle(
'operator-abc123',
vi.fn(async () => new Response('{}', { status: 200 })) as any,
) as any;
const ua = await buildWikimediaUserAgent('wikipedia-summary');
expect(ua).toContain('Shadowbroker');
expect(ua.toLowerCase()).toContain('github.com');
expect(ua.toLowerCase()).toContain('issues');
expect(ua).toContain('operator: operator-abc123');
expect(ua).toContain('purpose: wikipedia-summary');
});
it('falls back to "operator-offline" when handle endpoint is unreachable', async () => {
it('fetches Wikipedia summary through backend proxy', async () => {
const calls: string[] = [];
globalThis.fetch = vi.fn(async (input: any) => {
const url = String(input);
if (url.endsWith('/api/settings/operator-handle')) {
return new Response('forbidden', { status: 403 });
}
return new Response('{}', { status: 200 });
}) as any;
const ua = await buildWikimediaUserAgent('test');
expect(ua).toContain('operator: operator-offline');
});
it('sends per-operator Api-User-Agent on Wikipedia summary fetch', async () => {
const wikiCalls: Array<{ url: string; init?: RequestInit }> = [];
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
wikiCalls.push({ url: String(url), init });
calls.push(String(input));
return new Response(
JSON.stringify({
type: 'standard',
title: 'Boeing 747',
description: 'aircraft',
extract: 'long extract',
thumbnail: { source: 'https://example.org/thumb.jpg' },
}),
{ status: 200 },
);
});
globalThis.fetch = withHandle('operator-test01', baseFetch as any) as any;
const summary = await fetchWikipediaSummary('Boeing 747');
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
// wikiCalls only captures calls to non-handle URLs.
expect(wikiCalls).toHaveLength(1);
const headers = (wikiCalls[0].init?.headers || {}) as Record<string, string>;
expect(headers['Api-User-Agent']).toContain('operator: operator-test01');
expect(headers['Api-User-Agent']).toContain('purpose: wikipedia-summary');
});
it('sends per-operator Api-User-Agent on Wikidata SPARQL fetch', async () => {
const calls: Array<{ url: string; init?: RequestInit }> = [];
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
calls.push({ url: String(url), init });
return new Response(
JSON.stringify({
results: { bindings: [{ leaderLabel: { value: 'Test Leader' } }] },
}),
{ status: 200 },
);
});
globalThis.fetch = withHandle('operator-sparql', baseFetch as any) as any;
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
expect(bindings).toHaveLength(1);
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
expect(headers['Api-User-Agent']).toContain('operator: operator-sparql');
expect(headers['Api-User-Agent']).toContain('purpose: wikidata-sparql');
expect(headers['Accept']).toBe('application/sparql-results+json');
});
it('handle endpoint is queried only ONCE across many wiki fetches', async () => {
let handleCalls = 0;
let wikiCalls = 0;
globalThis.fetch = vi.fn(async (input: any) => {
const url = String(input);
if (url.endsWith('/api/settings/operator-handle')) {
handleCalls++;
return new Response(JSON.stringify({ handle: 'operator-cache' }), { status: 200 });
}
wikiCalls++;
return new Response(
JSON.stringify({
thumbnail: 'https://example.org/thumb.jpg',
type: 'standard',
title: 'X',
description: '',
extract: '',
thumbnail: { source: 'https://example.org/x.jpg' },
}),
{ status: 200 },
);
}) as any;
await fetchWikipediaSummary('Eiffel Tower');
await fetchWikipediaSummary('Mount Fuji');
await fetchWikipediaSummary('Statue of Liberty');
expect(handleCalls).toBe(1);
expect(wikiCalls).toBe(3);
const summary = await fetchWikipediaSummary('Boeing 747');
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
expect(calls).toHaveLength(1);
expect(calls[0]).toContain('/api/wikipedia/summary');
expect(calls[0]).not.toContain('wikipedia.org');
});
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
let fetchCount = 0;
const baseFetch = vi.fn(async () => {
fetchCount++;
it('fetches Wikidata SPARQL through backend proxy', async () => {
const calls: Array<{ url: string; init?: RequestInit }> = [];
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
calls.push({ url: String(url), init });
return new Response(
JSON.stringify({
type: 'standard',
title: 'Eiffel Tower',
description: 'iron lattice tower',
extract: '...',
thumbnail: { source: 'https://example.org/eiffel.jpg' },
bindings: [{ leaderLabel: { value: 'Test Leader' } }],
}),
{ status: 200 },
);
});
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
}) as any;
const a = await fetchWikipediaSummary('Eiffel Tower');
const b = await fetchWikipediaSummary('Eiffel Tower');
expect(fetchCount).toBe(1);
expect(a?.thumbnail).toBe(b?.thumbnail);
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
expect(bindings).toHaveLength(1);
expect(calls).toHaveLength(1);
expect(calls[0].url).toContain('/api/wikidata/sparql');
expect(calls[0].init?.method).toBe('POST');
expect(calls[0].url).not.toContain('wikidata.org');
});
it('deduplicates concurrent in-flight requests for the same title', async () => {
let fetchCount = 0;
const baseFetch = vi.fn(async () => {
fetchCount++;
await new Promise((r) => setTimeout(r, 5));
it('deduplicates concurrent Wikipedia summary requests', async () => {
let hits = 0;
globalThis.fetch = vi.fn(async () => {
hits += 1;
return new Response(
JSON.stringify({
type: 'standard',
title: 'Mount Fuji',
description: 'stratovolcano',
extract: '...',
thumbnail: { source: 'https://example.org/fuji.jpg' },
description: 'mountain',
extract: 'extract',
thumbnail: '',
type: 'standard',
}),
{ status: 200 },
);
});
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
}) as any;
const [a, b, c] = await Promise.all([
fetchWikipediaSummary('Mount Fuji'),
fetchWikipediaSummary('Mount Fuji'),
fetchWikipediaSummary('Mount Fuji'),
]);
expect(fetchCount).toBe(1);
expect(a?.thumbnail).toBe('https://example.org/fuji.jpg');
expect(a?.title).toBe('Mount Fuji');
expect(b).toEqual(a);
expect(c).toEqual(a);
expect(hits).toBe(1);
});
it('returns null on disambiguation pages without throwing', async () => {
globalThis.fetch = withHandle(
'operator-cache',
vi.fn(async () =>
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
) as any,
) as any;
const summary = await fetchWikipediaSummary('Mercury');
expect(summary).toBeNull();
});
it('returns null on HTTP error without throwing', async () => {
globalThis.fetch = withHandle(
'operator-cache',
vi.fn(async () => new Response('not found', { status: 404 })) as any,
) as any;
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
expect(summary).toBeNull();
});
it('returns null on network error without throwing', async () => {
globalThis.fetch = withHandle(
'operator-cache',
vi.fn(async () => {
throw new Error('network down');
}) as any,
) as any;
const summary = await fetchWikipediaSummary('Anything');
expect(summary).toBeNull();
});
it('returns null on empty input without fetching anything', async () => {
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
expect(await fetchWikipediaSummary('')).toBeNull();
expect(await fetchWikipediaSummary(' ')).toBeNull();
expect(globalThis.fetch).not.toHaveBeenCalled();
it('returns null on Wikipedia 404', async () => {
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 404 })) as any;
expect(await fetchWikipediaSummary('Nonexistent Article 12345')).toBeNull();
});
});
+16 -33
View File
@@ -12,6 +12,7 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
const [value, setValue] = useState('');
const [results, setResults] = useState<{ label: string; lat: number; lng: number }[]>([]);
const [loading, setLoading] = useState(false);
const [searchError, setSearchError] = useState<string | null>(null);
const inputRef = useRef<HTMLInputElement>(null);
const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const searchAbortRef = useRef<AbortController | null>(null);
@@ -58,14 +59,15 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
if (searchAbortRef.current) searchAbortRef.current.abort();
if (q.trim().length < 2) {
setResults([]);
setSearchError(null);
return;
}
timerRef.current = setTimeout(async () => {
setLoading(true);
setSearchError(null);
searchAbortRef.current = new AbortController();
const signal = searchAbortRef.current.signal;
try {
// Try backend proxy first (has caching + rate-limit compliance)
const res = await fetch(
`${API_BASE}/api/geocode/search?q=${encodeURIComponent(q)}&limit=5`,
{ signal },
@@ -80,43 +82,19 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
}),
);
setResults(mapped);
if (mapped.length === 0) {
setSearchError('No places found');
}
} else {
// Backend proxy returned an error — fall back to direct Nominatim
console.warn(`[Locate] Proxy returned HTTP ${res.status}, falling back to Nominatim`);
const directRes = await fetch(
`https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`,
{ headers: { 'Accept-Language': 'en' }, signal },
);
const data = await directRes.json();
setResults(
data.map((r: { display_name: string; lat: string; lon: string }) => ({
label: r.display_name,
lat: parseFloat(r.lat),
lng: parseFloat(r.lon),
})),
);
console.warn(`[Locate] Geocode proxy HTTP ${res.status}`);
setResults([]);
setSearchError('Place search unavailable — check backend connection');
}
} catch (err) {
if ((err as Error)?.name !== 'AbortError') {
// Proxy completely failed — try direct Nominatim as last resort
try {
const directRes = await fetch(
`https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`,
{ headers: { 'Accept-Language': 'en' } },
);
const data = await directRes.json();
setResults(
data.map((r: { display_name: string; lat: string; lon: string }) => ({
label: r.display_name,
lat: parseFloat(r.lat),
lng: parseFloat(r.lon),
})),
);
} catch {
setResults([]);
}
} else {
console.warn('[Locate] Geocode proxy failed:', err);
setResults([]);
setSearchError('Place search unavailable — check backend connection');
}
} finally {
setLoading(false);
@@ -216,6 +194,11 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
</svg>
</button>
</div>
{searchError && results.length === 0 && !loading && value.trim().length >= 2 && (
<div className="absolute bottom-full left-0 right-0 mb-1 bg-[var(--bg-secondary)] border border-amber-800/50 px-3 py-2 text-[10px] font-mono text-amber-200/90">
{searchError}
</div>
)}
{results.length > 0 && (
<div className="absolute bottom-full left-0 right-0 mb-1 bg-[var(--bg-secondary)] border border-[var(--border-primary)] overflow-hidden shadow-[0_-8px_30px_rgba(0,0,0,0.4)] max-h-[200px] overflow-y-auto styled-scrollbar">
{results.map((r, i) => (
+54 -275
View File
@@ -1,11 +1,10 @@
import { useCallback, useState, useEffect } from 'react';
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
import { API_BASE } from '@/lib/api';
// ─── CACHE ─────────────────────────────────────────────────────────────────
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
const _dossierCache = new Map<string, { data: RegionDossier; ts: number }>();
const CACHE_TTL = 86400_000; // 24 hours in ms
const CACHE_TTL = 86400_000;
function getCached(lat: number, lng: number): RegionDossier | null {
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
@@ -18,14 +17,12 @@ function getCached(lat: number, lng: number): RegionDossier | null {
function setCache(lat: number, lng: number, data: RegionDossier) {
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
_dossierCache.set(key, { data, ts: Date.now() });
// Evict oldest entries if cache exceeds 500
if (_dossierCache.size > 500) {
const oldest = _dossierCache.keys().next().value;
if (oldest) _dossierCache.delete(oldest);
}
}
// ─── ESRI WORLD IMAGERY FALLBACK ───────────────────────────────────────────
function buildLocalSentinelFallback(lat: number, lng: number) {
const latSpan = 0.18;
const lngSpan = 0.24;
@@ -80,140 +77,56 @@ function buildLimitedDossier(lat: number, lng: number, error?: string): RegionDo
} as RegionDossier;
}
// ─── BROWSER-DIRECT API CALLS ──────────────────────────────────────────────
// All external APIs below support CORS — no backend proxy needed.
/** Self-hosted backend routes (#351) — no browser-direct third-party dossier calls. */
async function fetchDossierBundle(
lat: number,
lng: number,
): Promise<{ dossier: Record<string, unknown> | null; sentinel2: Record<string, unknown> }> {
const qs = `lat=${encodeURIComponent(lat)}&lng=${encodeURIComponent(lng)}`;
const [dossierRes, sentinelRes] = await Promise.allSettled([
fetch(`${API_BASE}/api/region-dossier?${qs}`),
fetch(`${API_BASE}/api/sentinel2/search?${qs}`),
]);
/** Reverse geocode via Nominatim (direct browser call). */
async function reverseGeocode(lat: number, lng: number) {
const url =
`https://nominatim.openstreetmap.org/reverse?` +
`lat=${lat}&lon=${lng}&format=json&zoom=10&addressdetails=1&accept-language=en`;
const res = await fetch(url, {
headers: { 'User-Agent': 'ShadowBroker-OSINT/1.0 (live-risk-dashboard)' },
});
if (!res.ok) throw new Error(`Nominatim HTTP ${res.status}`);
const data = await res.json();
const addr = data.address || {};
let dossier: Record<string, unknown> | null = null;
if (dossierRes.status === 'fulfilled' && dossierRes.value.ok) {
dossier = await dossierRes.value.json();
} else if (dossierRes.status === 'fulfilled') {
console.warn('[Dossier] Backend region-dossier HTTP', dossierRes.value.status);
} else {
console.warn('[Dossier] Backend region-dossier failed:', dossierRes.reason);
}
let sentinel2: Record<string, unknown> = buildLocalSentinelFallback(lat, lng);
if (sentinelRes.status === 'fulfilled' && sentinelRes.value.ok) {
sentinel2 = await sentinelRes.value.json();
} else if (sentinelRes.status === 'rejected') {
console.warn('[Dossier] Backend sentinel2/search failed:', sentinelRes.reason);
}
return { dossier, sentinel2 };
}
function dossierFromBackend(
lat: number,
lng: number,
raw: Record<string, unknown>,
sentinel2: Record<string, unknown>,
): RegionDossier {
const coords = (raw.coordinates as { lat?: number; lng?: number }) || { lat, lng };
return {
city: addr.city || addr.town || addr.village || addr.county || '',
state: addr.state || addr.region || '',
country: addr.country || '',
country_code: (addr.country_code || '').toUpperCase(),
display_name: data.display_name || '',
};
lat,
lng,
coordinates: coords,
location: raw.location ?? {},
country: raw.country ?? null,
local: raw.local ?? null,
error: raw.error as string | undefined,
warning: raw.warning as string | undefined,
sentinel2,
} as RegionDossier;
}
/** Fetch country data from RestCountries (direct browser call). */
async function fetchCountryData(countryCode: string) {
if (!countryCode) return {};
const url =
`https://restcountries.com/v3.1/alpha/${countryCode}` +
`?fields=name,population,capital,languages,region,subregion,area,currencies,borders,flag`;
const res = await fetch(url);
if (!res.ok) throw new Error(`RestCountries HTTP ${res.status}`);
const data = await res.json();
return Array.isArray(data) ? data[0] || {} : data || {};
}
/** Fetch head of state + government type from Wikidata SPARQL.
*
* Issue #218 (tg12): routes through lib/wikimediaClient so the
* Api-User-Agent header is set per Wikimedia's UA policy.
*/
async function fetchLeader(countryName: string) {
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
const sparql = `
SELECT ?leaderLabel ?govTypeLabel WHERE {
?country wdt:P31 wd:Q6256 ;
rdfs:label "${safeName}"@en .
OPTIONAL { ?country wdt:P35 ?leader . }
OPTIONAL { ?country wdt:P122 ?govType . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} LIMIT 1
`;
const results = await fetchWikidataSparql<{
leaderLabel?: { value: string };
govTypeLabel?: { value: string };
}>(sparql);
if (results && results.length > 0) {
return {
leader: results[0].leaderLabel?.value || 'Unknown',
government_type: results[0].govTypeLabel?.value || 'Unknown',
};
}
return { leader: 'Unknown', government_type: 'Unknown' };
}
/** Fetch Wikipedia summary for a place.
*
* Issue #219 (tg12): routes through lib/wikimediaClient so the
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
* shared cache means consecutive useRegionDossier + WikiImage +
* NewsFeed lookups for the same article all hit the same slot.
*/
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
if (!placeName) return {};
const candidates = [placeName];
if (countryName) candidates.push(`${placeName}, ${countryName}`);
for (const name of candidates) {
const summary = await fetchWikipediaSummary(name);
if (summary) {
return {
description: summary.description,
extract: summary.extract,
thumbnail: summary.thumbnail,
};
}
}
return {};
}
/** Search for Sentinel-2 imagery via Microsoft Planetary Computer STAC (direct browser call). */
async function fetchSentinel2Direct(lat: number, lng: number) {
const now = new Date();
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
const payload = {
collections: ['sentinel-2-l2a'],
intersects: { type: 'Point', coordinates: [lng, lat] },
datetime: `${thirtyDaysAgo.toISOString()}/${now.toISOString()}`,
sortby: [{ field: 'datetime', direction: 'desc' }],
limit: 3,
query: { 'eo:cloud_cover': { lt: 30 } },
};
const res = await fetch('https://planetarycomputer.microsoft.com/api/stac/v1/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!res.ok) throw new Error(`Planetary Computer HTTP ${res.status}`);
const data = await res.json();
const features = data.features || [];
if (!features.length) return null; // No scenes — caller uses Esri fallback
const scenes = features.map((item: any) => {
const assets = item.assets || {};
const rendered = assets.rendered_preview || {};
const thumbnail = assets.thumbnail || {};
return {
found: true,
scene_id: item.id,
datetime: item.properties?.datetime,
cloud_cover: item.properties?.['eo:cloud_cover'],
thumbnail_url: thumbnail.href || rendered.href,
fullres_url: rendered.href || thumbnail.href,
bbox: item.bbox ? [...item.bbox] : null,
platform: item.properties?.platform || 'Sentinel-2',
};
});
return { ...scenes[0], scenes };
}
// ─── MAIN HOOK ─────────────────────────────────────────────────────────────
export function useRegionDossier(
selectedEntity: SelectedEntity | null,
setSelectedEntity: (entity: SelectedEntity | null) => void,
@@ -233,7 +146,6 @@ export function useRegionDossier(
});
setRegionDossierLoading(true);
// Check cache first
const cached = getCached(lat, lng);
if (cached) {
setRegionDossier(cached);
@@ -241,155 +153,23 @@ export function useRegionDossier(
return;
}
// Show fallback immediately while API calls are in flight
setRegionDossier({
...buildLimitedDossier(lat, lng),
sentinel2: esriFallback,
});
try {
// ── Phase 1: Geocode + Sentinel-2 in parallel ──────────────────
const [geoResult, sentinelResult] = await Promise.allSettled([
reverseGeocode(lat, lng),
fetchSentinel2Direct(lat, lng),
]);
const { dossier, sentinel2 } = await fetchDossierBundle(lat, lng);
// Parse geocode
let geo = { city: '', state: '', country: '', country_code: '', display_name: '' };
if (geoResult.status === 'fulfilled') {
geo = geoResult.value;
} else {
console.warn('[Dossier] Reverse geocode failed:', geoResult.reason);
}
// Parse sentinel
let sentinel2: Record<string, unknown> = esriFallback;
if (sentinelResult.status === 'fulfilled' && sentinelResult.value) {
sentinel2 = sentinelResult.value;
} else if (sentinelResult.status === 'rejected') {
console.warn('[Dossier] Sentinel-2 search failed:', sentinelResult.reason);
}
// sentinelResult fulfilled but null → no scenes found, keep Esri fallback
// If no country found (ocean, uninhabited), show limited dossier
if (!geo.country) {
const result: RegionDossier = {
lat,
lng,
coordinates: { lat, lng },
location: geo.display_name
? geo
: { display_name: `${lat.toFixed(4)}, ${lng.toFixed(4)}` },
country: null,
local: null,
error: 'No country data — possibly international waters or uninhabited area',
if (!dossier) {
setRegionDossier({
...buildLimitedDossier(lat, lng, 'Region dossier unavailable — check backend connection'),
sentinel2,
} as RegionDossier;
setRegionDossier(result);
setCache(lat, lng, result);
setRegionDossierLoading(false);
});
return;
}
// ── Phase 2: Country + Leader + Wiki in parallel ───────────────
const [countryResult, leaderResult, localWikiResult, countryWikiResult] =
await Promise.allSettled([
fetchCountryData(geo.country_code),
fetchLeader(geo.country),
fetchLocalWikiSummary(geo.city || geo.state, geo.country),
fetchLocalWikiSummary(geo.country, ''),
]);
// Parse country data
let countryData: Record<string, unknown> = {};
if (countryResult.status === 'fulfilled') {
countryData = countryResult.value as Record<string, unknown>;
} else {
console.warn('[Dossier] Country data failed:', countryResult.reason);
}
// Parse leader data
let leaderData = { leader: 'Unknown', government_type: 'Unknown' };
if (leaderResult.status === 'fulfilled') {
leaderData = leaderResult.value;
} else {
console.warn('[Dossier] Leader data failed:', leaderResult.reason);
}
// Parse local wiki
let localData: Record<string, string> = {};
if (localWikiResult.status === 'fulfilled') {
localData = localWikiResult.value as Record<string, string>;
} else {
console.warn('[Dossier] Local wiki failed:', localWikiResult.reason);
}
// If no local data, try country wiki summary
if (!localData.extract && countryWikiResult.status === 'fulfilled') {
const cw = countryWikiResult.value as Record<string, string>;
if (cw.extract) localData = cw;
}
// Build languages list
const languages = countryData.languages as Record<string, string> | undefined;
const langList = languages ? Object.values(languages) : [];
// Build currencies list
const currencies = countryData.currencies as
| Record<string, { name: string; symbol?: string }>
| undefined;
const currencyList: string[] = [];
if (currencies) {
for (const v of Object.values(currencies)) {
if (v && typeof v === 'object') {
const sym = v.symbol || '';
const nm = v.name || '';
currencyList.push(sym ? `${nm} (${sym})` : nm);
}
}
}
const nameData = countryData.name as
| { common?: string; official?: string }
| undefined;
const capitalData = countryData.capital as string[] | undefined;
// ── Assemble final dossier (exact same shape as backend) ───────
const result: RegionDossier = {
lat,
lng,
coordinates: { lat, lng },
location: {
city: geo.city,
state: geo.state,
country: geo.country,
country_code: geo.country_code,
display_name: geo.display_name,
},
country: {
name: nameData?.common || geo.country,
official_name: nameData?.official || '',
leader: leaderData.leader,
government_type: leaderData.government_type,
population: (countryData.population as number) || 0,
capital: capitalData?.length ? capitalData[0] : 'Unknown',
languages: langList,
currencies: currencyList,
region: (countryData.region as string) || '',
subregion: (countryData.subregion as string) || '',
area_km2: (countryData.area as number) || 0,
flag_emoji: (countryData.flag as string) || '',
},
local: {
name: geo.city,
state: geo.state,
description: localData.description || '',
summary: localData.extract || '',
thumbnail: localData.thumbnail || '',
},
sentinel2,
} as RegionDossier;
const result = dossierFromBackend(lat, lng, dossier, sentinel2);
setRegionDossier(result);
setCache(lat, lng, result);
} catch (e) {
@@ -405,7 +185,6 @@ export function useRegionDossier(
[setSelectedEntity],
);
// Clear dossier when selecting a different entity type
useEffect(() => {
if (selectedEntity?.type !== 'region_dossier') {
setRegionDossier(null);
+24 -135
View File
@@ -1,47 +1,18 @@
/**
* wikimediaClient single fetch surface for Wikipedia / Wikidata.
* wikimediaClient Wikipedia / Wikidata via the self-hosted backend (#360).
*
* Issues #218, #219, #220 (tg12 external audit) + Round 7a:
*
* Wikimedia's User-Agent policy asks API clients to identify themselves
* via `Api-User-Agent` when calling from browser JavaScript (because the
* browser does not let JS set `User-Agent` directly). Three independent
* components used to issue anonymous browser fetches against Wikipedia /
* Wikidata:
*
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
* - WikiImage (Wikipedia REST summary)
* - NewsFeed (Wikipedia REST summary)
*
* PR #284 collapsed them into this shared helper with one stable
* `Api-User-Agent`. That fixed compliance but introduced a new problem:
* the `Api-User-Agent` was project-wide, so from Wikimedia's perspective
* every Shadowbroker install looked like one giant scraper. If one
* install misbehaved, Wikimedia's only recourse was to block the project
* as a whole.
*
* Round 7a fixes that. The frontend fetches the per-install operator
* handle from `GET /api/settings/operator-handle` once on first use and
* embeds it in the `Api-User-Agent`. Wikimedia can now rate-limit /
* contact the specific install instead of the project. The handle is
* auto-generated on the backend (`shadow-XXXXXX`) or operator-chosen via
* the `OPERATOR_HANDLE` setting.
*
* UX impact: zero. Same thumbnails, same summaries, same load behavior.
* The only observable change is the value of the outgoing
* `Api-User-Agent` header.
* The browser only calls `/api/wikipedia/summary` and `/api/wikidata/sparql`.
* Outbound Wikimedia traffic (with per-install operator attribution from
* Round 7a) is handled server-side in `services/region_dossier.py`.
*/
import { API_BASE } from '@/lib/api';
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
// form so debugging the cache is easier). Values track in-flight state
// so concurrent callers for the same title share one network request.
export interface WikipediaSummary {
title: string;
description: string;
extract: string;
thumbnail: string;
type: string; // 'standard' | 'disambiguation' | etc.
type: string;
}
interface CacheEntry {
@@ -59,72 +30,6 @@ function evictIfOverCap() {
if (oldest) _summaryCache.delete(oldest);
}
// ─── Per-operator handle (Round 7a) ────────────────────────────────────────
// Fetched once from the backend on first need and cached for the page
// lifetime. The handle is NOT a secret — Wikimedia will see it on every
// Wikipedia / Wikidata request we make — but caching it locally avoids a
// round-trip on every Wikipedia fetch and lets the offline / no-backend
// case still produce a stable UA (the fallback handle).
let _handlePromise: Promise<string> | null = null;
let _cachedHandle: string | null = null;
const FALLBACK_HANDLE = 'operator-offline';
const HANDLE_ENDPOINT = '/api/settings/operator-handle';
async function fetchOperatorHandle(): Promise<string> {
try {
const res = await fetch(HANDLE_ENDPOINT, {
// Use the standard relative-path proxy so the Next.js admin-key
// injection (same-origin) flows naturally for legitimate browser
// sessions. A cross-origin scanner will be blocked by the proxy
// before this even leaves their browser.
credentials: 'same-origin',
});
if (!res.ok) return FALLBACK_HANDLE;
const data = await res.json();
const h = (data && typeof data.handle === 'string' && data.handle.trim()) || '';
return h || FALLBACK_HANDLE;
} catch {
return FALLBACK_HANDLE;
}
}
async function getOperatorHandle(): Promise<string> {
if (_cachedHandle) return _cachedHandle;
if (!_handlePromise) {
_handlePromise = fetchOperatorHandle().then((h) => {
_cachedHandle = h;
return h;
});
}
return _handlePromise;
}
/** Build the Wikimedia Api-User-Agent for this install.
*
* Includes the per-install operator handle so Wikimedia can rate-limit /
* contact the specific operator instead of the project as a whole.
* Exported for tests; production callers should let
* `fetchWikipediaSummary` / `fetchWikidataSparql` build it implicitly.
*/
export async function buildWikimediaUserAgent(purpose: string): Promise<string> {
const handle = await getOperatorHandle();
const safePurpose = (purpose || '').replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase();
return (
`Shadowbroker/1.0 (operator: ${handle}; purpose: ${safePurpose}; ` +
'+https://github.com/BigBodyCobain/Shadowbroker; report issues at /issues)'
);
}
// ─── Wikipedia summary fetch ───────────────────────────────────────────────
/** Fetch a Wikipedia article summary (titles, NOT URLs).
*
* Empty / invalid input resolves to `null`. Network errors and disambig
* pages also resolve to `null` so callers can render a fallback without
* a try/catch. Per the audit's "fail forward, not loud" rule.
*/
export async function fetchWikipediaSummary(
title: string,
): Promise<WikipediaSummary | null> {
@@ -135,22 +40,19 @@ export async function fetchWikipediaSummary(
if (cached?.loaded) return cached.summary;
if (cached?.inflight) return cached.inflight;
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
const promise = (async (): Promise<WikipediaSummary | null> => {
try {
const ua = await buildWikimediaUserAgent('wikipedia-summary');
const r = await fetch(url, { headers: { 'Api-User-Agent': ua } });
const url = `${API_BASE}/api/wikipedia/summary?title=${encodeURIComponent(trimmed)}`;
const r = await fetch(url);
if (r.status === 404) return null;
if (!r.ok) return null;
const d = await r.json();
if (d?.type === 'disambiguation') return null;
return {
title: trimmed,
description: d?.description || '',
extract: d?.extract || '',
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
type: d?.type || 'standard',
title: (d?.title as string) || trimmed,
description: (d?.description as string) || '',
extract: (d?.extract as string) || '',
thumbnail: (d?.thumbnail as string) || '',
type: (d?.type as string) || 'standard',
};
} catch {
return null;
@@ -166,45 +68,32 @@ export async function fetchWikipediaSummary(
return promise;
}
// ─── Wikidata SPARQL ───────────────────────────────────────────────────────
/** Fetch a Wikidata SPARQL query result.
*
* Returns the parsed JSON `results.bindings` array on success; `null`
* (not throwing) on any failure so callers can render fallbacks
* silently. Per-install operator handle threaded through `Api-User-Agent`
* (Round 7a).
*/
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
sparql: string,
): Promise<T[] | null> {
const trimmed = (sparql || '').trim();
if (!trimmed) return null;
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(
trimmed,
)}&format=json`;
try {
const ua = await buildWikimediaUserAgent('wikidata-sparql');
const res = await fetch(url, {
headers: {
'Api-User-Agent': ua,
Accept: 'application/sparql-results+json',
},
const res = await fetch(`${API_BASE}/api/wikidata/sparql`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: trimmed }),
});
if (!res.ok) return null;
const json = await res.json();
const bindings = json?.results?.bindings;
const bindings = json?.bindings;
return Array.isArray(bindings) ? (bindings as T[]) : null;
} catch {
return null;
}
}
// ─── Test helpers ──────────────────────────────────────────────────────────
/** @deprecated Browser no longer builds Wikimedia UA; kept for tests that import it. */
export async function buildWikimediaUserAgent(purpose: string): Promise<string> {
void purpose;
return 'Shadowbroker/1.0 (backend-proxied; purpose: wikimedia)';
}
/** Internal: clear the shared cache + the handle cache. Exposed for tests only. */
export function _resetWikimediaClientCacheForTests() {
_summaryCache.clear();
_handlePromise = null;
_cachedHandle = null;
}