fix(identify): stop flagging multi-actor C2PA manifests as integrity clashes

The C2PA issuer attribution (`c2pa`) and the SynthID proxy (`synthid`) are
derived from the same manifest, so treating them as independent signals made
rule 1 fire on legitimate multi-actor manifests where a product wraps another
vendor's engine (Microsoft Designer on OpenAI, Microsoft on Google) or an edit
chain re-signs (Adobe over a Gemini original). 19 such files in the
2026-06-01/02 spaces batches read as "likely spoofed/laundered" before this.

Group `c2pa` + `synthid` into one provenance source via `_CLASH_SOURCE`; rule 1
now requires two vendors from different sources. A manifest vendor still clashes
with a genuinely independent stamp (EXIF/XMP generator, IPTC AISystemUsed, AIGC,
xAI).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Victor Kuznetsov
2026-06-02 19:02:35 -07:00
parent 9cb66992bd
commit df0fafe94e
3 changed files with 47 additions and 5 deletions
+1 -1
View File
File diff suppressed because one or more lines are too long
+24 -4
View File
@@ -19,6 +19,7 @@ never as "clean". See CLAUDE.md "SynthID detection is metadata-only".
from __future__ import annotations
import itertools
import logging
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
@@ -282,6 +283,17 @@ def _vendor_of(text: str | None) -> str | None:
return None
# Clash-detection provenance sources. Rule 1 (below) flags two AI vendors only
# when they come from *independent* signals. The C2PA issuer attribution and the
# SynthID proxy are NOT independent -- the proxy is inferred from the same C2PA
# manifest -- so they share one source. A multi-actor manifest (a product wrapping
# another vendor's engine, e.g. Microsoft+OpenAI or Microsoft+Google; or an edit
# chain like Adobe over a Gemini original) legitimately names several vendors in
# one valid chain and must not read as spoofing. Families not listed here are each
# their own independent source (EXIF/XMP generator, IPTC AISystemUsed, AIGC, ...).
_CLASH_SOURCE: dict[str, str] = {"c2pa": "c2pa_manifest", "synthid": "c2pa_manifest"}
def _integrity_clashes(
ai_vendors: dict[str, str], camera_label: str | None, *, camera_has_ai_marker: bool
) -> list[str]:
@@ -301,10 +313,18 @@ def _integrity_clashes(
"""
clashes: list[str] = []
by_vendor: dict[str, list[str]] = {}
for family, vendor in ai_vendors.items():
by_vendor.setdefault(vendor, []).append(family)
if len(by_vendor) >= 2:
# Rule 1: two genuinely INDEPENDENT signals naming different AI vendors. Two
# families clash only when they belong to different provenance sources (see
# _CLASH_SOURCE) AND name different vendors -- so multiple vendors named within
# one C2PA manifest (c2pa issuer + synthid proxy) do not flag.
source = {fam: _CLASH_SOURCE.get(fam, fam) for fam in ai_vendors}
independent_conflict = any(
source[a] != source[b] and ai_vendors[a] != ai_vendors[b] for a, b in itertools.combinations(ai_vendors, 2)
)
if independent_conflict:
by_vendor: dict[str, list[str]] = {}
for family, vendor in ai_vendors.items():
by_vendor.setdefault(vendor, []).append(family)
parts = [f"{vendor} (via {', '.join(sorted(fams))})" for vendor, fams in sorted(by_vendor.items())]
clashes.append(
"Conflicting AI-origin attributions from independent signals: "
+22
View File
@@ -731,6 +731,28 @@ class TestIntegrityClashesHelper:
# C2PA Google + SynthID-Google proxy is consistent, not a contradiction.
assert _integrity_clashes({"c2pa": "Google", "synthid": "Google"}, None, camera_has_ai_marker=True) == []
def test_multi_actor_manifest_no_clash(self):
# A multi-actor C2PA manifest names a product + the engine it wraps in ONE
# valid chain (Microsoft Designer on OpenAI, Microsoft on Google, Adobe over
# a Gemini original). The c2pa issuer attribution and the SynthID proxy share
# the same manifest source, so the differing vendors must NOT read as a clash.
for c2pa_vendor, synthid_vendor in (("Microsoft", "OpenAI"), ("Microsoft", "Google"), ("Adobe", "Google")):
assert (
_integrity_clashes({"c2pa": c2pa_vendor, "synthid": synthid_vendor}, None, camera_has_ai_marker=True)
== []
)
def test_manifest_vendor_vs_independent_signal_clashes(self):
# A vendor named only inside the manifest still clashes with a genuinely
# independent stamp (here an EXIF/XMP generator tag) naming a third vendor.
clashes = _integrity_clashes(
{"c2pa": "Microsoft", "synthid": "Google", "exif_generator": "Ideogram"},
None,
camera_has_ai_marker=True,
)
assert len(clashes) == 1
assert "Ideogram" in clashes[0]
def test_single_vendor_no_clash(self):
assert _integrity_clashes({"c2pa": "OpenAI"}, None, camera_has_ai_marker=True) == []