mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-04 18:18:00 +02:00
fix(identify): stop flagging multi-actor C2PA manifests as integrity clashes
The C2PA issuer attribution (`c2pa`) and the SynthID proxy (`synthid`) are derived from the same manifest, so treating them as independent signals made rule 1 fire on legitimate multi-actor manifests where a product wraps another vendor's engine (Microsoft Designer on OpenAI, Microsoft on Google) or an edit chain re-signs (Adobe over a Gemini original). 19 such files in the 2026-06-01/02 spaces batches read as "likely spoofed/laundered" before this. Group `c2pa` + `synthid` into one provenance source via `_CLASH_SOURCE`; rule 1 now requires two vendors from different sources. A manifest vendor still clashes with a genuinely independent stamp (EXIF/XMP generator, IPTC AISystemUsed, AIGC, xAI). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ never as "clean". See CLAUDE.md "SynthID detection is metadata-only".
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -282,6 +283,17 @@ def _vendor_of(text: str | None) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
# Clash-detection provenance sources. Rule 1 (below) flags two AI vendors only
|
||||
# when they come from *independent* signals. The C2PA issuer attribution and the
|
||||
# SynthID proxy are NOT independent -- the proxy is inferred from the same C2PA
|
||||
# manifest -- so they share one source. A multi-actor manifest (a product wrapping
|
||||
# another vendor's engine, e.g. Microsoft+OpenAI or Microsoft+Google; or an edit
|
||||
# chain like Adobe over a Gemini original) legitimately names several vendors in
|
||||
# one valid chain and must not read as spoofing. Families not listed here are each
|
||||
# their own independent source (EXIF/XMP generator, IPTC AISystemUsed, AIGC, ...).
|
||||
_CLASH_SOURCE: dict[str, str] = {"c2pa": "c2pa_manifest", "synthid": "c2pa_manifest"}
|
||||
|
||||
|
||||
def _integrity_clashes(
|
||||
ai_vendors: dict[str, str], camera_label: str | None, *, camera_has_ai_marker: bool
|
||||
) -> list[str]:
|
||||
@@ -301,10 +313,18 @@ def _integrity_clashes(
|
||||
"""
|
||||
clashes: list[str] = []
|
||||
|
||||
by_vendor: dict[str, list[str]] = {}
|
||||
for family, vendor in ai_vendors.items():
|
||||
by_vendor.setdefault(vendor, []).append(family)
|
||||
if len(by_vendor) >= 2:
|
||||
# Rule 1: two genuinely INDEPENDENT signals naming different AI vendors. Two
|
||||
# families clash only when they belong to different provenance sources (see
|
||||
# _CLASH_SOURCE) AND name different vendors -- so multiple vendors named within
|
||||
# one C2PA manifest (c2pa issuer + synthid proxy) do not flag.
|
||||
source = {fam: _CLASH_SOURCE.get(fam, fam) for fam in ai_vendors}
|
||||
independent_conflict = any(
|
||||
source[a] != source[b] and ai_vendors[a] != ai_vendors[b] for a, b in itertools.combinations(ai_vendors, 2)
|
||||
)
|
||||
if independent_conflict:
|
||||
by_vendor: dict[str, list[str]] = {}
|
||||
for family, vendor in ai_vendors.items():
|
||||
by_vendor.setdefault(vendor, []).append(family)
|
||||
parts = [f"{vendor} (via {', '.join(sorted(fams))})" for vendor, fams in sorted(by_vendor.items())]
|
||||
clashes.append(
|
||||
"Conflicting AI-origin attributions from independent signals: "
|
||||
|
||||
@@ -731,6 +731,28 @@ class TestIntegrityClashesHelper:
|
||||
# C2PA Google + SynthID-Google proxy is consistent, not a contradiction.
|
||||
assert _integrity_clashes({"c2pa": "Google", "synthid": "Google"}, None, camera_has_ai_marker=True) == []
|
||||
|
||||
def test_multi_actor_manifest_no_clash(self):
|
||||
# A multi-actor C2PA manifest names a product + the engine it wraps in ONE
|
||||
# valid chain (Microsoft Designer on OpenAI, Microsoft on Google, Adobe over
|
||||
# a Gemini original). The c2pa issuer attribution and the SynthID proxy share
|
||||
# the same manifest source, so the differing vendors must NOT read as a clash.
|
||||
for c2pa_vendor, synthid_vendor in (("Microsoft", "OpenAI"), ("Microsoft", "Google"), ("Adobe", "Google")):
|
||||
assert (
|
||||
_integrity_clashes({"c2pa": c2pa_vendor, "synthid": synthid_vendor}, None, camera_has_ai_marker=True)
|
||||
== []
|
||||
)
|
||||
|
||||
def test_manifest_vendor_vs_independent_signal_clashes(self):
|
||||
# A vendor named only inside the manifest still clashes with a genuinely
|
||||
# independent stamp (here an EXIF/XMP generator tag) naming a third vendor.
|
||||
clashes = _integrity_clashes(
|
||||
{"c2pa": "Microsoft", "synthid": "Google", "exif_generator": "Ideogram"},
|
||||
None,
|
||||
camera_has_ai_marker=True,
|
||||
)
|
||||
assert len(clashes) == 1
|
||||
assert "Ideogram" in clashes[0]
|
||||
|
||||
def test_single_vendor_no_clash(self):
|
||||
assert _integrity_clashes({"c2pa": "OpenAI"}, None, camera_has_ai_marker=True) == []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user