mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 21:08:13 +02:00
Proxy region dossier, Sentinel search, Wikipedia, and Wikidata via self-hosted APIs; remove LocateBar client-side Nominatim fallback; migrate legacy shadow- operator handles to operator- prefix. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -85,6 +85,39 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ───
|
||||
@router.get("/api/wikipedia/summary")
|
||||
@limiter.limit("60/minute")
|
||||
def api_wikipedia_summary(
|
||||
request: Request,
|
||||
title: str = Query(..., min_length=1, max_length=256),
|
||||
):
|
||||
"""Proxy Wikipedia REST summaries through the self-hosted backend."""
|
||||
from services.region_dossier import fetch_wikipedia_page_summary
|
||||
|
||||
summary = fetch_wikipedia_page_summary(title)
|
||||
if summary is None:
|
||||
return JSONResponse(status_code=404, content={"detail": "not_found"})
|
||||
return summary
|
||||
|
||||
|
||||
class WikidataSparqlRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
@router.post("/api/wikidata/sparql")
|
||||
@limiter.limit("30/minute")
|
||||
def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest):
|
||||
"""Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org."""
|
||||
from services.region_dossier import fetch_wikidata_sparql_bindings
|
||||
|
||||
q = (body.query or "").strip()
|
||||
if len(q) > 12_000:
|
||||
raise HTTPException(400, "SPARQL query too large")
|
||||
bindings = fetch_wikidata_sparql_bindings(q)
|
||||
return {"bindings": bindings}
|
||||
|
||||
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
|
||||
@@ -146,7 +146,12 @@ def get_operator_handle() -> str:
|
||||
# 3. On-disk handle from a previous run.
|
||||
persisted = _load_persisted_operator_handle()
|
||||
if persisted:
|
||||
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
|
||||
normalized = _normalize_handle(persisted)
|
||||
# Migrate legacy auto-generated handles (pre-Round-7a ``shadow-`` prefix).
|
||||
if normalized.startswith("shadow-"):
|
||||
normalized = f"operator-{normalized[len('shadow-'):]}"
|
||||
_persist_operator_handle(normalized)
|
||||
_OPERATOR_HANDLE_CACHE = normalized
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 4. Generate, persist, return.
|
||||
@@ -178,7 +183,7 @@ def outbound_user_agent(purpose: str = "") -> str:
|
||||
|
||||
Returns something like::
|
||||
|
||||
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
|
||||
Shadowbroker/0.9 (operator: operator-7f3a92; purpose: wikipedia;
|
||||
+https://github.com/BigBodyCobain/Shadowbroker/issues)
|
||||
|
||||
The ``purpose`` is optional but recommended — it tells the upstream
|
||||
|
||||
@@ -301,3 +301,36 @@ def get_region_dossier(lat: float, lng: float) -> dict:
|
||||
|
||||
dossier_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
def fetch_wikipedia_page_summary(title: str) -> dict | None:
|
||||
"""Wikipedia REST summary for a page title (backend-proxied for #360)."""
|
||||
trimmed = (title or "").strip()
|
||||
if not trimmed:
|
||||
return None
|
||||
data = _fetch_local_wiki_summary(trimmed, "")
|
||||
if not data.get("extract") and not data.get("description"):
|
||||
return None
|
||||
return {
|
||||
"title": trimmed,
|
||||
"description": data.get("description", ""),
|
||||
"extract": data.get("extract", ""),
|
||||
"thumbnail": data.get("thumbnail", ""),
|
||||
"type": "standard",
|
||||
}
|
||||
|
||||
|
||||
def fetch_wikidata_sparql_bindings(sparql: str) -> list:
|
||||
"""Run a Wikidata SPARQL query; returns bindings list (empty on failure)."""
|
||||
trimmed = (sparql or "").strip()
|
||||
if not trimmed:
|
||||
return []
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(trimmed)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=8, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
bindings = res.json().get("results", {}).get("bindings", [])
|
||||
return bindings if isinstance(bindings, list) else []
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.warning("Wikidata SPARQL failed: %s", e)
|
||||
return []
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
"""Backend Wikimedia proxy routes (#360)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_wikipedia_summary_route_returns_payload(client):
|
||||
sample = {
|
||||
"title": "Paris",
|
||||
"description": "capital",
|
||||
"extract": "Paris is the capital of France.",
|
||||
"thumbnail": "https://example.org/t.jpg",
|
||||
"type": "standard",
|
||||
}
|
||||
with patch(
|
||||
"services.region_dossier.fetch_wikipedia_page_summary",
|
||||
return_value=sample,
|
||||
):
|
||||
r = client.get("/api/wikipedia/summary", params={"title": "Paris"})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["title"] == "Paris"
|
||||
|
||||
|
||||
def test_wikidata_sparql_route_returns_bindings(client):
|
||||
bindings = [{"x": {"value": "1"}}]
|
||||
with patch(
|
||||
"services.region_dossier.fetch_wikidata_sparql_bindings",
|
||||
return_value=bindings,
|
||||
):
|
||||
r = client.post("/api/wikidata/sparql", json={"query": "SELECT ?x WHERE {}"})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["bindings"] == bindings
|
||||
@@ -203,6 +203,19 @@ describe('page.tsx decomposition — no admin-session/proxy regression', () => {
|
||||
const locateBar = readAppFile('LocateBar.tsx');
|
||||
expect(locateBar).toContain('API_BASE');
|
||||
expect(locateBar).toContain('/api/geocode/search');
|
||||
expect(locateBar).not.toContain('nominatim.openstreetmap.org');
|
||||
});
|
||||
|
||||
it('useRegionDossier uses backend dossier APIs (no browser-direct enrichment)', () => {
|
||||
const hook = fs.readFileSync(
|
||||
path.resolve(__dirname, '../../hooks/useRegionDossier.ts'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(hook).toContain('/api/region-dossier');
|
||||
expect(hook).toContain('/api/sentinel2/search');
|
||||
expect(hook).not.toContain('nominatim.openstreetmap.org');
|
||||
expect(hook).not.toContain('planetarycomputer.microsoft.com');
|
||||
expect(hook).not.toContain('restcountries.com');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,21 +1,8 @@
|
||||
/**
|
||||
* Issues #218 / #219 / #220 (tg12 external audit) + Round 7a:
|
||||
*
|
||||
* Every browser-direct call to Wikipedia or Wikidata must send the
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for, AND must
|
||||
* embed the per-install operator handle so Wikimedia can rate-limit /
|
||||
* contact the specific operator instead of treating "Shadowbroker" as
|
||||
* one giant entity.
|
||||
*
|
||||
* These tests pin both requirements on the shared `lib/wikimediaClient`
|
||||
* helper that WikiImage, NewsFeed, and useRegionDossier all route
|
||||
* through. A future refactor that drops either the header OR the
|
||||
* per-operator handle gets a loud test failure rather than a silent
|
||||
* ToS / privacy regression.
|
||||
* #360: Wikipedia / Wikidata traffic is proxied via the self-hosted backend.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
buildWikimediaUserAgent,
|
||||
fetchWikipediaSummary,
|
||||
fetchWikidataSparql,
|
||||
_resetWikimediaClientCacheForTests,
|
||||
@@ -23,18 +10,6 @@ import {
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
// Helper: stub fetch so calls to /api/settings/operator-handle return a
|
||||
// known handle, and everything else proxies to whatever the test set up.
|
||||
function withHandle(handle: string, otherFetch: typeof globalThis.fetch) {
|
||||
return vi.fn(async (input: any, init?: RequestInit) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
return new Response(JSON.stringify({ handle }), { status: 200 });
|
||||
}
|
||||
return otherFetch(input, init);
|
||||
});
|
||||
}
|
||||
|
||||
describe('lib/wikimediaClient', () => {
|
||||
beforeEach(() => {
|
||||
_resetWikimediaClientCacheForTests();
|
||||
@@ -45,194 +20,78 @@ describe('lib/wikimediaClient', () => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('builds a stable per-operator Api-User-Agent with contact path', async () => {
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-abc123',
|
||||
vi.fn(async () => new Response('{}', { status: 200 })) as any,
|
||||
) as any;
|
||||
const ua = await buildWikimediaUserAgent('wikipedia-summary');
|
||||
expect(ua).toContain('Shadowbroker');
|
||||
expect(ua.toLowerCase()).toContain('github.com');
|
||||
expect(ua.toLowerCase()).toContain('issues');
|
||||
expect(ua).toContain('operator: operator-abc123');
|
||||
expect(ua).toContain('purpose: wikipedia-summary');
|
||||
});
|
||||
|
||||
it('falls back to "operator-offline" when handle endpoint is unreachable', async () => {
|
||||
it('fetches Wikipedia summary through backend proxy', async () => {
|
||||
const calls: string[] = [];
|
||||
globalThis.fetch = vi.fn(async (input: any) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return new Response('{}', { status: 200 });
|
||||
}) as any;
|
||||
const ua = await buildWikimediaUserAgent('test');
|
||||
expect(ua).toContain('operator: operator-offline');
|
||||
});
|
||||
|
||||
it('sends per-operator Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const wikiCalls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
wikiCalls.push({ url: String(url), init });
|
||||
calls.push(String(input));
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Boeing 747',
|
||||
description: 'aircraft',
|
||||
extract: 'long extract',
|
||||
thumbnail: { source: 'https://example.org/thumb.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-test01', baseFetch as any) as any;
|
||||
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
// wikiCalls only captures calls to non-handle URLs.
|
||||
expect(wikiCalls).toHaveLength(1);
|
||||
const headers = (wikiCalls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toContain('operator: operator-test01');
|
||||
expect(headers['Api-User-Agent']).toContain('purpose: wikipedia-summary');
|
||||
});
|
||||
|
||||
it('sends per-operator Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
results: { bindings: [{ leaderLabel: { value: 'Test Leader' } }] },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-sparql', baseFetch as any) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toContain('operator: operator-sparql');
|
||||
expect(headers['Api-User-Agent']).toContain('purpose: wikidata-sparql');
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
});
|
||||
|
||||
it('handle endpoint is queried only ONCE across many wiki fetches', async () => {
|
||||
let handleCalls = 0;
|
||||
let wikiCalls = 0;
|
||||
globalThis.fetch = vi.fn(async (input: any) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
handleCalls++;
|
||||
return new Response(JSON.stringify({ handle: 'operator-cache' }), { status: 200 });
|
||||
}
|
||||
wikiCalls++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
thumbnail: 'https://example.org/thumb.jpg',
|
||||
type: 'standard',
|
||||
title: 'X',
|
||||
description: '',
|
||||
extract: '',
|
||||
thumbnail: { source: 'https://example.org/x.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
await fetchWikipediaSummary('Eiffel Tower');
|
||||
await fetchWikipediaSummary('Mount Fuji');
|
||||
await fetchWikipediaSummary('Statue of Liberty');
|
||||
expect(handleCalls).toBe(1);
|
||||
expect(wikiCalls).toBe(3);
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(calls[0]).toContain('/api/wikipedia/summary');
|
||||
expect(calls[0]).not.toContain('wikipedia.org');
|
||||
});
|
||||
|
||||
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
|
||||
let fetchCount = 0;
|
||||
const baseFetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
it('fetches Wikidata SPARQL through backend proxy', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Eiffel Tower',
|
||||
description: 'iron lattice tower',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/eiffel.jpg' },
|
||||
bindings: [{ leaderLabel: { value: 'Test Leader' } }],
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
|
||||
}) as any;
|
||||
|
||||
const a = await fetchWikipediaSummary('Eiffel Tower');
|
||||
const b = await fetchWikipediaSummary('Eiffel Tower');
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe(b?.thumbnail);
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(calls[0].url).toContain('/api/wikidata/sparql');
|
||||
expect(calls[0].init?.method).toBe('POST');
|
||||
expect(calls[0].url).not.toContain('wikidata.org');
|
||||
});
|
||||
|
||||
it('deduplicates concurrent in-flight requests for the same title', async () => {
|
||||
let fetchCount = 0;
|
||||
const baseFetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
it('deduplicates concurrent Wikipedia summary requests', async () => {
|
||||
let hits = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
hits += 1;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Mount Fuji',
|
||||
description: 'stratovolcano',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/fuji.jpg' },
|
||||
description: 'mountain',
|
||||
extract: 'extract',
|
||||
thumbnail: '',
|
||||
type: 'standard',
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
|
||||
}) as any;
|
||||
|
||||
const [a, b, c] = await Promise.all([
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
]);
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe('https://example.org/fuji.jpg');
|
||||
expect(a?.title).toBe('Mount Fuji');
|
||||
expect(b).toEqual(a);
|
||||
expect(c).toEqual(a);
|
||||
expect(hits).toBe(1);
|
||||
});
|
||||
|
||||
it('returns null on disambiguation pages without throwing', async () => {
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Mercury');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on HTTP error without throwing', async () => {
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () => new Response('not found', { status: 404 })) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on network error without throwing', async () => {
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Anything');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty input without fetching anything', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
|
||||
expect(await fetchWikipediaSummary('')).toBeNull();
|
||||
expect(await fetchWikipediaSummary(' ')).toBeNull();
|
||||
expect(globalThis.fetch).not.toHaveBeenCalled();
|
||||
it('returns null on Wikipedia 404', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 404 })) as any;
|
||||
expect(await fetchWikipediaSummary('Nonexistent Article 12345')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,6 +12,7 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
|
||||
const [value, setValue] = useState('');
|
||||
const [results, setResults] = useState<{ label: string; lat: number; lng: number }[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [searchError, setSearchError] = useState<string | null>(null);
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const searchAbortRef = useRef<AbortController | null>(null);
|
||||
@@ -58,14 +59,15 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
|
||||
if (searchAbortRef.current) searchAbortRef.current.abort();
|
||||
if (q.trim().length < 2) {
|
||||
setResults([]);
|
||||
setSearchError(null);
|
||||
return;
|
||||
}
|
||||
timerRef.current = setTimeout(async () => {
|
||||
setLoading(true);
|
||||
setSearchError(null);
|
||||
searchAbortRef.current = new AbortController();
|
||||
const signal = searchAbortRef.current.signal;
|
||||
try {
|
||||
// Try backend proxy first (has caching + rate-limit compliance)
|
||||
const res = await fetch(
|
||||
`${API_BASE}/api/geocode/search?q=${encodeURIComponent(q)}&limit=5`,
|
||||
{ signal },
|
||||
@@ -80,43 +82,19 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
|
||||
}),
|
||||
);
|
||||
setResults(mapped);
|
||||
if (mapped.length === 0) {
|
||||
setSearchError('No places found');
|
||||
}
|
||||
} else {
|
||||
// Backend proxy returned an error — fall back to direct Nominatim
|
||||
console.warn(`[Locate] Proxy returned HTTP ${res.status}, falling back to Nominatim`);
|
||||
const directRes = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`,
|
||||
{ headers: { 'Accept-Language': 'en' }, signal },
|
||||
);
|
||||
const data = await directRes.json();
|
||||
setResults(
|
||||
data.map((r: { display_name: string; lat: string; lon: string }) => ({
|
||||
label: r.display_name,
|
||||
lat: parseFloat(r.lat),
|
||||
lng: parseFloat(r.lon),
|
||||
})),
|
||||
);
|
||||
console.warn(`[Locate] Geocode proxy HTTP ${res.status}`);
|
||||
setResults([]);
|
||||
setSearchError('Place search unavailable — check backend connection');
|
||||
}
|
||||
} catch (err) {
|
||||
if ((err as Error)?.name !== 'AbortError') {
|
||||
// Proxy completely failed — try direct Nominatim as last resort
|
||||
try {
|
||||
const directRes = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?q=${encodeURIComponent(q)}&format=json&limit=5`,
|
||||
{ headers: { 'Accept-Language': 'en' } },
|
||||
);
|
||||
const data = await directRes.json();
|
||||
setResults(
|
||||
data.map((r: { display_name: string; lat: string; lon: string }) => ({
|
||||
label: r.display_name,
|
||||
lat: parseFloat(r.lat),
|
||||
lng: parseFloat(r.lon),
|
||||
})),
|
||||
);
|
||||
} catch {
|
||||
setResults([]);
|
||||
}
|
||||
} else {
|
||||
console.warn('[Locate] Geocode proxy failed:', err);
|
||||
setResults([]);
|
||||
setSearchError('Place search unavailable — check backend connection');
|
||||
}
|
||||
} finally {
|
||||
setLoading(false);
|
||||
@@ -216,6 +194,11 @@ export function LocateBar({ onLocate, onOpenChange }: { onLocate: (lat: number,
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{searchError && results.length === 0 && !loading && value.trim().length >= 2 && (
|
||||
<div className="absolute bottom-full left-0 right-0 mb-1 bg-[var(--bg-secondary)] border border-amber-800/50 px-3 py-2 text-[10px] font-mono text-amber-200/90">
|
||||
{searchError}
|
||||
</div>
|
||||
)}
|
||||
{results.length > 0 && (
|
||||
<div className="absolute bottom-full left-0 right-0 mb-1 bg-[var(--bg-secondary)] border border-[var(--border-primary)] overflow-hidden shadow-[0_-8px_30px_rgba(0,0,0,0.4)] max-h-[200px] overflow-y-auto styled-scrollbar">
|
||||
{results.map((r, i) => (
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import { useCallback, useState, useEffect } from 'react';
|
||||
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
|
||||
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
|
||||
import { API_BASE } from '@/lib/api';
|
||||
|
||||
// ─── CACHE ─────────────────────────────────────────────────────────────────
|
||||
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
|
||||
const _dossierCache = new Map<string, { data: RegionDossier; ts: number }>();
|
||||
const CACHE_TTL = 86400_000; // 24 hours in ms
|
||||
const CACHE_TTL = 86400_000;
|
||||
|
||||
function getCached(lat: number, lng: number): RegionDossier | null {
|
||||
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
|
||||
@@ -18,14 +17,12 @@ function getCached(lat: number, lng: number): RegionDossier | null {
|
||||
function setCache(lat: number, lng: number, data: RegionDossier) {
|
||||
const key = `${Math.round(lat * 10) / 10}_${Math.round(lng * 10) / 10}`;
|
||||
_dossierCache.set(key, { data, ts: Date.now() });
|
||||
// Evict oldest entries if cache exceeds 500
|
||||
if (_dossierCache.size > 500) {
|
||||
const oldest = _dossierCache.keys().next().value;
|
||||
if (oldest) _dossierCache.delete(oldest);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── ESRI WORLD IMAGERY FALLBACK ───────────────────────────────────────────
|
||||
function buildLocalSentinelFallback(lat: number, lng: number) {
|
||||
const latSpan = 0.18;
|
||||
const lngSpan = 0.24;
|
||||
@@ -80,140 +77,56 @@ function buildLimitedDossier(lat: number, lng: number, error?: string): RegionDo
|
||||
} as RegionDossier;
|
||||
}
|
||||
|
||||
// ─── BROWSER-DIRECT API CALLS ──────────────────────────────────────────────
|
||||
// All external APIs below support CORS — no backend proxy needed.
|
||||
/** Self-hosted backend routes (#351) — no browser-direct third-party dossier calls. */
|
||||
async function fetchDossierBundle(
|
||||
lat: number,
|
||||
lng: number,
|
||||
): Promise<{ dossier: Record<string, unknown> | null; sentinel2: Record<string, unknown> }> {
|
||||
const qs = `lat=${encodeURIComponent(lat)}&lng=${encodeURIComponent(lng)}`;
|
||||
const [dossierRes, sentinelRes] = await Promise.allSettled([
|
||||
fetch(`${API_BASE}/api/region-dossier?${qs}`),
|
||||
fetch(`${API_BASE}/api/sentinel2/search?${qs}`),
|
||||
]);
|
||||
|
||||
/** Reverse geocode via Nominatim (direct browser call). */
|
||||
async function reverseGeocode(lat: number, lng: number) {
|
||||
const url =
|
||||
`https://nominatim.openstreetmap.org/reverse?` +
|
||||
`lat=${lat}&lon=${lng}&format=json&zoom=10&addressdetails=1&accept-language=en`;
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': 'ShadowBroker-OSINT/1.0 (live-risk-dashboard)' },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Nominatim HTTP ${res.status}`);
|
||||
const data = await res.json();
|
||||
const addr = data.address || {};
|
||||
let dossier: Record<string, unknown> | null = null;
|
||||
if (dossierRes.status === 'fulfilled' && dossierRes.value.ok) {
|
||||
dossier = await dossierRes.value.json();
|
||||
} else if (dossierRes.status === 'fulfilled') {
|
||||
console.warn('[Dossier] Backend region-dossier HTTP', dossierRes.value.status);
|
||||
} else {
|
||||
console.warn('[Dossier] Backend region-dossier failed:', dossierRes.reason);
|
||||
}
|
||||
|
||||
let sentinel2: Record<string, unknown> = buildLocalSentinelFallback(lat, lng);
|
||||
if (sentinelRes.status === 'fulfilled' && sentinelRes.value.ok) {
|
||||
sentinel2 = await sentinelRes.value.json();
|
||||
} else if (sentinelRes.status === 'rejected') {
|
||||
console.warn('[Dossier] Backend sentinel2/search failed:', sentinelRes.reason);
|
||||
}
|
||||
|
||||
return { dossier, sentinel2 };
|
||||
}
|
||||
|
||||
function dossierFromBackend(
|
||||
lat: number,
|
||||
lng: number,
|
||||
raw: Record<string, unknown>,
|
||||
sentinel2: Record<string, unknown>,
|
||||
): RegionDossier {
|
||||
const coords = (raw.coordinates as { lat?: number; lng?: number }) || { lat, lng };
|
||||
return {
|
||||
city: addr.city || addr.town || addr.village || addr.county || '',
|
||||
state: addr.state || addr.region || '',
|
||||
country: addr.country || '',
|
||||
country_code: (addr.country_code || '').toUpperCase(),
|
||||
display_name: data.display_name || '',
|
||||
};
|
||||
lat,
|
||||
lng,
|
||||
coordinates: coords,
|
||||
location: raw.location ?? {},
|
||||
country: raw.country ?? null,
|
||||
local: raw.local ?? null,
|
||||
error: raw.error as string | undefined,
|
||||
warning: raw.warning as string | undefined,
|
||||
sentinel2,
|
||||
} as RegionDossier;
|
||||
}
|
||||
|
||||
/** Fetch country data from RestCountries (direct browser call). */
|
||||
async function fetchCountryData(countryCode: string) {
|
||||
if (!countryCode) return {};
|
||||
const url =
|
||||
`https://restcountries.com/v3.1/alpha/${countryCode}` +
|
||||
`?fields=name,population,capital,languages,region,subregion,area,currencies,borders,flag`;
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`RestCountries HTTP ${res.status}`);
|
||||
const data = await res.json();
|
||||
return Array.isArray(data) ? data[0] || {} : data || {};
|
||||
}
|
||||
|
||||
/** Fetch head of state + government type from Wikidata SPARQL.
|
||||
*
|
||||
* Issue #218 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy.
|
||||
*/
|
||||
async function fetchLeader(countryName: string) {
|
||||
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
|
||||
const sparql = `
|
||||
SELECT ?leaderLabel ?govTypeLabel WHERE {
|
||||
?country wdt:P31 wd:Q6256 ;
|
||||
rdfs:label "${safeName}"@en .
|
||||
OPTIONAL { ?country wdt:P35 ?leader . }
|
||||
OPTIONAL { ?country wdt:P122 ?govType . }
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
||||
} LIMIT 1
|
||||
`;
|
||||
const results = await fetchWikidataSparql<{
|
||||
leaderLabel?: { value: string };
|
||||
govTypeLabel?: { value: string };
|
||||
}>(sparql);
|
||||
if (results && results.length > 0) {
|
||||
return {
|
||||
leader: results[0].leaderLabel?.value || 'Unknown',
|
||||
government_type: results[0].govTypeLabel?.value || 'Unknown',
|
||||
};
|
||||
}
|
||||
return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
}
|
||||
|
||||
/** Fetch Wikipedia summary for a place.
|
||||
*
|
||||
* Issue #219 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
|
||||
* shared cache means consecutive useRegionDossier + WikiImage +
|
||||
* NewsFeed lookups for the same article all hit the same slot.
|
||||
*/
|
||||
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
|
||||
if (!placeName) return {};
|
||||
const candidates = [placeName];
|
||||
if (countryName) candidates.push(`${placeName}, ${countryName}`);
|
||||
for (const name of candidates) {
|
||||
const summary = await fetchWikipediaSummary(name);
|
||||
if (summary) {
|
||||
return {
|
||||
description: summary.description,
|
||||
extract: summary.extract,
|
||||
thumbnail: summary.thumbnail,
|
||||
};
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/** Search for Sentinel-2 imagery via Microsoft Planetary Computer STAC (direct browser call). */
|
||||
async function fetchSentinel2Direct(lat: number, lng: number) {
|
||||
const now = new Date();
|
||||
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
||||
const payload = {
|
||||
collections: ['sentinel-2-l2a'],
|
||||
intersects: { type: 'Point', coordinates: [lng, lat] },
|
||||
datetime: `${thirtyDaysAgo.toISOString()}/${now.toISOString()}`,
|
||||
sortby: [{ field: 'datetime', direction: 'desc' }],
|
||||
limit: 3,
|
||||
query: { 'eo:cloud_cover': { lt: 30 } },
|
||||
};
|
||||
|
||||
const res = await fetch('https://planetarycomputer.microsoft.com/api/stac/v1/search', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Planetary Computer HTTP ${res.status}`);
|
||||
const data = await res.json();
|
||||
const features = data.features || [];
|
||||
if (!features.length) return null; // No scenes — caller uses Esri fallback
|
||||
|
||||
const scenes = features.map((item: any) => {
|
||||
const assets = item.assets || {};
|
||||
const rendered = assets.rendered_preview || {};
|
||||
const thumbnail = assets.thumbnail || {};
|
||||
return {
|
||||
found: true,
|
||||
scene_id: item.id,
|
||||
datetime: item.properties?.datetime,
|
||||
cloud_cover: item.properties?.['eo:cloud_cover'],
|
||||
thumbnail_url: thumbnail.href || rendered.href,
|
||||
fullres_url: rendered.href || thumbnail.href,
|
||||
bbox: item.bbox ? [...item.bbox] : null,
|
||||
platform: item.properties?.platform || 'Sentinel-2',
|
||||
};
|
||||
});
|
||||
|
||||
return { ...scenes[0], scenes };
|
||||
}
|
||||
|
||||
// ─── MAIN HOOK ─────────────────────────────────────────────────────────────
|
||||
|
||||
export function useRegionDossier(
|
||||
selectedEntity: SelectedEntity | null,
|
||||
setSelectedEntity: (entity: SelectedEntity | null) => void,
|
||||
@@ -233,7 +146,6 @@ export function useRegionDossier(
|
||||
});
|
||||
setRegionDossierLoading(true);
|
||||
|
||||
// Check cache first
|
||||
const cached = getCached(lat, lng);
|
||||
if (cached) {
|
||||
setRegionDossier(cached);
|
||||
@@ -241,155 +153,23 @@ export function useRegionDossier(
|
||||
return;
|
||||
}
|
||||
|
||||
// Show fallback immediately while API calls are in flight
|
||||
setRegionDossier({
|
||||
...buildLimitedDossier(lat, lng),
|
||||
sentinel2: esriFallback,
|
||||
});
|
||||
|
||||
try {
|
||||
// ── Phase 1: Geocode + Sentinel-2 in parallel ──────────────────
|
||||
const [geoResult, sentinelResult] = await Promise.allSettled([
|
||||
reverseGeocode(lat, lng),
|
||||
fetchSentinel2Direct(lat, lng),
|
||||
]);
|
||||
const { dossier, sentinel2 } = await fetchDossierBundle(lat, lng);
|
||||
|
||||
// Parse geocode
|
||||
let geo = { city: '', state: '', country: '', country_code: '', display_name: '' };
|
||||
if (geoResult.status === 'fulfilled') {
|
||||
geo = geoResult.value;
|
||||
} else {
|
||||
console.warn('[Dossier] Reverse geocode failed:', geoResult.reason);
|
||||
}
|
||||
|
||||
// Parse sentinel
|
||||
let sentinel2: Record<string, unknown> = esriFallback;
|
||||
if (sentinelResult.status === 'fulfilled' && sentinelResult.value) {
|
||||
sentinel2 = sentinelResult.value;
|
||||
} else if (sentinelResult.status === 'rejected') {
|
||||
console.warn('[Dossier] Sentinel-2 search failed:', sentinelResult.reason);
|
||||
}
|
||||
// sentinelResult fulfilled but null → no scenes found, keep Esri fallback
|
||||
|
||||
// If no country found (ocean, uninhabited), show limited dossier
|
||||
if (!geo.country) {
|
||||
const result: RegionDossier = {
|
||||
lat,
|
||||
lng,
|
||||
coordinates: { lat, lng },
|
||||
location: geo.display_name
|
||||
? geo
|
||||
: { display_name: `${lat.toFixed(4)}, ${lng.toFixed(4)}` },
|
||||
country: null,
|
||||
local: null,
|
||||
error: 'No country data — possibly international waters or uninhabited area',
|
||||
if (!dossier) {
|
||||
setRegionDossier({
|
||||
...buildLimitedDossier(lat, lng, 'Region dossier unavailable — check backend connection'),
|
||||
sentinel2,
|
||||
} as RegionDossier;
|
||||
setRegionDossier(result);
|
||||
setCache(lat, lng, result);
|
||||
setRegionDossierLoading(false);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Phase 2: Country + Leader + Wiki in parallel ───────────────
|
||||
const [countryResult, leaderResult, localWikiResult, countryWikiResult] =
|
||||
await Promise.allSettled([
|
||||
fetchCountryData(geo.country_code),
|
||||
fetchLeader(geo.country),
|
||||
fetchLocalWikiSummary(geo.city || geo.state, geo.country),
|
||||
fetchLocalWikiSummary(geo.country, ''),
|
||||
]);
|
||||
|
||||
// Parse country data
|
||||
let countryData: Record<string, unknown> = {};
|
||||
if (countryResult.status === 'fulfilled') {
|
||||
countryData = countryResult.value as Record<string, unknown>;
|
||||
} else {
|
||||
console.warn('[Dossier] Country data failed:', countryResult.reason);
|
||||
}
|
||||
|
||||
// Parse leader data
|
||||
let leaderData = { leader: 'Unknown', government_type: 'Unknown' };
|
||||
if (leaderResult.status === 'fulfilled') {
|
||||
leaderData = leaderResult.value;
|
||||
} else {
|
||||
console.warn('[Dossier] Leader data failed:', leaderResult.reason);
|
||||
}
|
||||
|
||||
// Parse local wiki
|
||||
let localData: Record<string, string> = {};
|
||||
if (localWikiResult.status === 'fulfilled') {
|
||||
localData = localWikiResult.value as Record<string, string>;
|
||||
} else {
|
||||
console.warn('[Dossier] Local wiki failed:', localWikiResult.reason);
|
||||
}
|
||||
|
||||
// If no local data, try country wiki summary
|
||||
if (!localData.extract && countryWikiResult.status === 'fulfilled') {
|
||||
const cw = countryWikiResult.value as Record<string, string>;
|
||||
if (cw.extract) localData = cw;
|
||||
}
|
||||
|
||||
// Build languages list
|
||||
const languages = countryData.languages as Record<string, string> | undefined;
|
||||
const langList = languages ? Object.values(languages) : [];
|
||||
|
||||
// Build currencies list
|
||||
const currencies = countryData.currencies as
|
||||
| Record<string, { name: string; symbol?: string }>
|
||||
| undefined;
|
||||
const currencyList: string[] = [];
|
||||
if (currencies) {
|
||||
for (const v of Object.values(currencies)) {
|
||||
if (v && typeof v === 'object') {
|
||||
const sym = v.symbol || '';
|
||||
const nm = v.name || '';
|
||||
currencyList.push(sym ? `${nm} (${sym})` : nm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const nameData = countryData.name as
|
||||
| { common?: string; official?: string }
|
||||
| undefined;
|
||||
const capitalData = countryData.capital as string[] | undefined;
|
||||
|
||||
// ── Assemble final dossier (exact same shape as backend) ───────
|
||||
const result: RegionDossier = {
|
||||
lat,
|
||||
lng,
|
||||
coordinates: { lat, lng },
|
||||
location: {
|
||||
city: geo.city,
|
||||
state: geo.state,
|
||||
country: geo.country,
|
||||
country_code: geo.country_code,
|
||||
display_name: geo.display_name,
|
||||
},
|
||||
country: {
|
||||
name: nameData?.common || geo.country,
|
||||
official_name: nameData?.official || '',
|
||||
leader: leaderData.leader,
|
||||
government_type: leaderData.government_type,
|
||||
population: (countryData.population as number) || 0,
|
||||
capital: capitalData?.length ? capitalData[0] : 'Unknown',
|
||||
languages: langList,
|
||||
currencies: currencyList,
|
||||
region: (countryData.region as string) || '',
|
||||
subregion: (countryData.subregion as string) || '',
|
||||
area_km2: (countryData.area as number) || 0,
|
||||
flag_emoji: (countryData.flag as string) || '',
|
||||
},
|
||||
local: {
|
||||
name: geo.city,
|
||||
state: geo.state,
|
||||
description: localData.description || '',
|
||||
summary: localData.extract || '',
|
||||
thumbnail: localData.thumbnail || '',
|
||||
},
|
||||
sentinel2,
|
||||
} as RegionDossier;
|
||||
|
||||
const result = dossierFromBackend(lat, lng, dossier, sentinel2);
|
||||
setRegionDossier(result);
|
||||
setCache(lat, lng, result);
|
||||
} catch (e) {
|
||||
@@ -405,7 +185,6 @@ export function useRegionDossier(
|
||||
[setSelectedEntity],
|
||||
);
|
||||
|
||||
// Clear dossier when selecting a different entity type
|
||||
useEffect(() => {
|
||||
if (selectedEntity?.type !== 'region_dossier') {
|
||||
setRegionDossier(null);
|
||||
|
||||
@@ -1,47 +1,18 @@
|
||||
/**
|
||||
* wikimediaClient — single fetch surface for Wikipedia / Wikidata.
|
||||
* wikimediaClient — Wikipedia / Wikidata via the self-hosted backend (#360).
|
||||
*
|
||||
* Issues #218, #219, #220 (tg12 external audit) + Round 7a:
|
||||
*
|
||||
* Wikimedia's User-Agent policy asks API clients to identify themselves
|
||||
* via `Api-User-Agent` when calling from browser JavaScript (because the
|
||||
* browser does not let JS set `User-Agent` directly). Three independent
|
||||
* components used to issue anonymous browser fetches against Wikipedia /
|
||||
* Wikidata:
|
||||
*
|
||||
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
|
||||
* - WikiImage (Wikipedia REST summary)
|
||||
* - NewsFeed (Wikipedia REST summary)
|
||||
*
|
||||
* PR #284 collapsed them into this shared helper with one stable
|
||||
* `Api-User-Agent`. That fixed compliance but introduced a new problem:
|
||||
* the `Api-User-Agent` was project-wide, so from Wikimedia's perspective
|
||||
* every Shadowbroker install looked like one giant scraper. If one
|
||||
* install misbehaved, Wikimedia's only recourse was to block the project
|
||||
* as a whole.
|
||||
*
|
||||
* Round 7a fixes that. The frontend fetches the per-install operator
|
||||
* handle from `GET /api/settings/operator-handle` once on first use and
|
||||
* embeds it in the `Api-User-Agent`. Wikimedia can now rate-limit /
|
||||
* contact the specific install instead of the project. The handle is
|
||||
* auto-generated on the backend (`shadow-XXXXXX`) or operator-chosen via
|
||||
* the `OPERATOR_HANDLE` setting.
|
||||
*
|
||||
* UX impact: zero. Same thumbnails, same summaries, same load behavior.
|
||||
* The only observable change is the value of the outgoing
|
||||
* `Api-User-Agent` header.
|
||||
* The browser only calls `/api/wikipedia/summary` and `/api/wikidata/sparql`.
|
||||
* Outbound Wikimedia traffic (with per-install operator attribution from
|
||||
* Round 7a) is handled server-side in `services/region_dossier.py`.
|
||||
*/
|
||||
import { API_BASE } from '@/lib/api';
|
||||
|
||||
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
|
||||
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
|
||||
// form so debugging the cache is easier). Values track in-flight state
|
||||
// so concurrent callers for the same title share one network request.
|
||||
export interface WikipediaSummary {
|
||||
title: string;
|
||||
description: string;
|
||||
extract: string;
|
||||
thumbnail: string;
|
||||
type: string; // 'standard' | 'disambiguation' | etc.
|
||||
type: string;
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
@@ -59,72 +30,6 @@ function evictIfOverCap() {
|
||||
if (oldest) _summaryCache.delete(oldest);
|
||||
}
|
||||
|
||||
// ─── Per-operator handle (Round 7a) ────────────────────────────────────────
|
||||
|
||||
// Fetched once from the backend on first need and cached for the page
|
||||
// lifetime. The handle is NOT a secret — Wikimedia will see it on every
|
||||
// Wikipedia / Wikidata request we make — but caching it locally avoids a
|
||||
// round-trip on every Wikipedia fetch and lets the offline / no-backend
|
||||
// case still produce a stable UA (the fallback handle).
|
||||
let _handlePromise: Promise<string> | null = null;
|
||||
let _cachedHandle: string | null = null;
|
||||
|
||||
const FALLBACK_HANDLE = 'operator-offline';
|
||||
const HANDLE_ENDPOINT = '/api/settings/operator-handle';
|
||||
|
||||
async function fetchOperatorHandle(): Promise<string> {
|
||||
try {
|
||||
const res = await fetch(HANDLE_ENDPOINT, {
|
||||
// Use the standard relative-path proxy so the Next.js admin-key
|
||||
// injection (same-origin) flows naturally for legitimate browser
|
||||
// sessions. A cross-origin scanner will be blocked by the proxy
|
||||
// before this even leaves their browser.
|
||||
credentials: 'same-origin',
|
||||
});
|
||||
if (!res.ok) return FALLBACK_HANDLE;
|
||||
const data = await res.json();
|
||||
const h = (data && typeof data.handle === 'string' && data.handle.trim()) || '';
|
||||
return h || FALLBACK_HANDLE;
|
||||
} catch {
|
||||
return FALLBACK_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
async function getOperatorHandle(): Promise<string> {
|
||||
if (_cachedHandle) return _cachedHandle;
|
||||
if (!_handlePromise) {
|
||||
_handlePromise = fetchOperatorHandle().then((h) => {
|
||||
_cachedHandle = h;
|
||||
return h;
|
||||
});
|
||||
}
|
||||
return _handlePromise;
|
||||
}
|
||||
|
||||
/** Build the Wikimedia Api-User-Agent for this install.
|
||||
*
|
||||
* Includes the per-install operator handle so Wikimedia can rate-limit /
|
||||
* contact the specific operator instead of the project as a whole.
|
||||
* Exported for tests; production callers should let
|
||||
* `fetchWikipediaSummary` / `fetchWikidataSparql` build it implicitly.
|
||||
*/
|
||||
export async function buildWikimediaUserAgent(purpose: string): Promise<string> {
|
||||
const handle = await getOperatorHandle();
|
||||
const safePurpose = (purpose || '').replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase();
|
||||
return (
|
||||
`Shadowbroker/1.0 (operator: ${handle}; purpose: ${safePurpose}; ` +
|
||||
'+https://github.com/BigBodyCobain/Shadowbroker; report issues at /issues)'
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Wikipedia summary fetch ───────────────────────────────────────────────
|
||||
|
||||
/** Fetch a Wikipedia article summary (titles, NOT URLs).
|
||||
*
|
||||
* Empty / invalid input resolves to `null`. Network errors and disambig
|
||||
* pages also resolve to `null` so callers can render a fallback without
|
||||
* a try/catch. Per the audit's "fail forward, not loud" rule.
|
||||
*/
|
||||
export async function fetchWikipediaSummary(
|
||||
title: string,
|
||||
): Promise<WikipediaSummary | null> {
|
||||
@@ -135,22 +40,19 @@ export async function fetchWikipediaSummary(
|
||||
if (cached?.loaded) return cached.summary;
|
||||
if (cached?.inflight) return cached.inflight;
|
||||
|
||||
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
|
||||
const promise = (async (): Promise<WikipediaSummary | null> => {
|
||||
try {
|
||||
const ua = await buildWikimediaUserAgent('wikipedia-summary');
|
||||
const r = await fetch(url, { headers: { 'Api-User-Agent': ua } });
|
||||
const url = `${API_BASE}/api/wikipedia/summary?title=${encodeURIComponent(trimmed)}`;
|
||||
const r = await fetch(url);
|
||||
if (r.status === 404) return null;
|
||||
if (!r.ok) return null;
|
||||
const d = await r.json();
|
||||
if (d?.type === 'disambiguation') return null;
|
||||
return {
|
||||
title: trimmed,
|
||||
description: d?.description || '',
|
||||
extract: d?.extract || '',
|
||||
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
|
||||
type: d?.type || 'standard',
|
||||
title: (d?.title as string) || trimmed,
|
||||
description: (d?.description as string) || '',
|
||||
extract: (d?.extract as string) || '',
|
||||
thumbnail: (d?.thumbnail as string) || '',
|
||||
type: (d?.type as string) || 'standard',
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
@@ -166,45 +68,32 @@ export async function fetchWikipediaSummary(
|
||||
return promise;
|
||||
}
|
||||
|
||||
// ─── Wikidata SPARQL ───────────────────────────────────────────────────────
|
||||
|
||||
/** Fetch a Wikidata SPARQL query result.
|
||||
*
|
||||
* Returns the parsed JSON `results.bindings` array on success; `null`
|
||||
* (not throwing) on any failure so callers can render fallbacks
|
||||
* silently. Per-install operator handle threaded through `Api-User-Agent`
|
||||
* (Round 7a).
|
||||
*/
|
||||
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
|
||||
sparql: string,
|
||||
): Promise<T[] | null> {
|
||||
const trimmed = (sparql || '').trim();
|
||||
if (!trimmed) return null;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(
|
||||
trimmed,
|
||||
)}&format=json`;
|
||||
try {
|
||||
const ua = await buildWikimediaUserAgent('wikidata-sparql');
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Api-User-Agent': ua,
|
||||
Accept: 'application/sparql-results+json',
|
||||
},
|
||||
const res = await fetch(`${API_BASE}/api/wikidata/sparql`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: trimmed }),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const json = await res.json();
|
||||
const bindings = json?.results?.bindings;
|
||||
const bindings = json?.bindings;
|
||||
return Array.isArray(bindings) ? (bindings as T[]) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Test helpers ──────────────────────────────────────────────────────────
|
||||
/** @deprecated Browser no longer builds Wikimedia UA; kept for tests that import it. */
|
||||
export async function buildWikimediaUserAgent(purpose: string): Promise<string> {
|
||||
void purpose;
|
||||
return 'Shadowbroker/1.0 (backend-proxied; purpose: wikimedia)';
|
||||
}
|
||||
|
||||
/** Internal: clear the shared cache + the handle cache. Exposed for tests only. */
|
||||
export function _resetWikimediaClientCacheForTests() {
|
||||
_summaryCache.clear();
|
||||
_handlePromise = null;
|
||||
_cachedHandle = null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user