fix(security): k-anon suppression in community-pulse attack aggregate

Top-N attacked domains + layer distribution previously listed every value with count>=1. With a small gstack community, that leaks single-user attribution: if only one user is getting hit on example.com, example.com appears in the aggregate as "1 attack, 1 domain" — easy to deanonymize when you know who's targeted. Add K_ANON=5 threshold: a domain (or layer) must be reported by at least 5 distinct installations before appearing in the aggregate. Verdict distribution stays unfiltered (block/warn/log_only is low-cardinality + population-wide, no re-id risk). Raw rows already locked to service_role only (002_tighten_rls.sql); this closes the aggregate-channel leak. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-21 17:20:02 +02:00 · 2026-04-20 19:39:42 +08:00
parent 24274c97b0
commit 10a7ab8a62
1 changed files with 22 additions and 1 deletions
@@ -108,31 +108,52 @@ Deno.serve(async () => {
    //   security_layer, security_verdict.
    const { data: attackRows } = await supabase
      .from("telemetry_events")
-      .select("security_url_domain, security_layer, security_verdict")
+      .select("security_url_domain, security_layer, security_verdict, installation_id")
      .eq("event_type", "attack_attempt")
      .gte("event_timestamp", weekAgo)
      .limit(5000);

+    // k-anonymity threshold. A domain (or layer) must be reported by at least
+    // K_ANON distinct installations to appear in the aggregate. Without this,
+    // a single user's attack log leaks their targeted domains to every other
+    // gstack user who polls /community-pulse. With it, the dashboard shows
+    // only community-wide patterns.
+    const K_ANON = 5;
+
    const attacksTotal = attackRows?.length ?? 0;
    const domainCounts: Record<string, number> = {};
+    const domainInstallations: Record<string, Set<string>> = {};
    const layerCounts: Record<string, number> = {};
+    const layerInstallations: Record<string, Set<string>> = {};
    const verdictCounts: Record<string, number> = {};
    for (const row of attackRows ?? []) {
+      const iid = row.installation_id ?? "";
      if (row.security_url_domain) {
        domainCounts[row.security_url_domain] = (domainCounts[row.security_url_domain] ?? 0) + 1;
+        if (iid) {
+          (domainInstallations[row.security_url_domain] ??= new Set()).add(iid);
+        }
      }
      if (row.security_layer) {
        layerCounts[row.security_layer] = (layerCounts[row.security_layer] ?? 0) + 1;
+        if (iid) {
+          (layerInstallations[row.security_layer] ??= new Set()).add(iid);
+        }
      }
      if (row.security_verdict) {
+        // Verdict distribution is low-cardinality (block/warn/log_only) and
+        // aggregates population-wide with no re-identification risk, so no
+        // k-anon filter.
        verdictCounts[row.security_verdict] = (verdictCounts[row.security_verdict] ?? 0) + 1;
      }
    }
    const topAttackDomains = Object.entries(domainCounts)
+      .filter(([domain]) => (domainInstallations[domain]?.size ?? 0) >= K_ANON)
      .sort(([, a], [, b]) => b - a)
      .slice(0, 10)
      .map(([domain, count]) => ({ domain, count }));
    const topAttackLayers = Object.entries(layerCounts)
+      .filter(([layer]) => (layerInstallations[layer]?.size ?? 0) >= K_ANON)
      .sort(([, a], [, b]) => b - a)
      .map(([layer, count]) => ({ layer, count }));
    const attackVerdictDistribution = Object.entries(verdictCounts)