fix(security): k-anon suppression in community-pulse attack aggregate

Top-N attacked domains + layer distribution previously listed every
value with count>=1. With a small gstack community, that leaks
single-user attribution: if only one user is getting hit on
example.com, example.com appears in the aggregate as "1 attack,
1 domain" — easy to deanonymize when you know who's targeted.

Add K_ANON=5 threshold: a domain (or layer) must be reported by at
least 5 distinct installations before appearing in the aggregate.
Verdict distribution stays unfiltered (block/warn/log_only is
low-cardinality + population-wide, no re-id risk).

Raw rows already locked to service_role only (002_tighten_rls.sql);
this closes the aggregate-channel leak.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-20 19:39:42 +08:00
parent 24274c97b0
commit 10a7ab8a62
+22 -1
View File
@@ -108,31 +108,52 @@ Deno.serve(async () => {
// security_layer, security_verdict.
const { data: attackRows } = await supabase
.from("telemetry_events")
.select("security_url_domain, security_layer, security_verdict")
.select("security_url_domain, security_layer, security_verdict, installation_id")
.eq("event_type", "attack_attempt")
.gte("event_timestamp", weekAgo)
.limit(5000);
// k-anonymity threshold. A domain (or layer) must be reported by at least
// K_ANON distinct installations to appear in the aggregate. Without this,
// a single user's attack log leaks their targeted domains to every other
// gstack user who polls /community-pulse. With it, the dashboard shows
// only community-wide patterns.
const K_ANON = 5;
const attacksTotal = attackRows?.length ?? 0;
const domainCounts: Record<string, number> = {};
const domainInstallations: Record<string, Set<string>> = {};
const layerCounts: Record<string, number> = {};
const layerInstallations: Record<string, Set<string>> = {};
const verdictCounts: Record<string, number> = {};
for (const row of attackRows ?? []) {
const iid = row.installation_id ?? "";
if (row.security_url_domain) {
domainCounts[row.security_url_domain] = (domainCounts[row.security_url_domain] ?? 0) + 1;
if (iid) {
(domainInstallations[row.security_url_domain] ??= new Set()).add(iid);
}
}
if (row.security_layer) {
layerCounts[row.security_layer] = (layerCounts[row.security_layer] ?? 0) + 1;
if (iid) {
(layerInstallations[row.security_layer] ??= new Set()).add(iid);
}
}
if (row.security_verdict) {
// Verdict distribution is low-cardinality (block/warn/log_only) and
// aggregates population-wide with no re-identification risk, so no
// k-anon filter.
verdictCounts[row.security_verdict] = (verdictCounts[row.security_verdict] ?? 0) + 1;
}
}
const topAttackDomains = Object.entries(domainCounts)
.filter(([domain]) => (domainInstallations[domain]?.size ?? 0) >= K_ANON)
.sort(([, a], [, b]) => b - a)
.slice(0, 10)
.map(([domain, count]) => ({ domain, count }));
const topAttackLayers = Object.entries(layerCounts)
.filter(([layer]) => (layerInstallations[layer]?.size ?? 0) >= K_ANON)
.sort(([, a], [, b]) => b - a)
.map(([layer, count]) => ({ layer, count }));
const attackVerdictDistribution = Object.entries(verdictCounts)