From 18160fe269491c73de4ace61068e4c156c2a3923 Mon Sep 17 00:00:00 2001
From: test-user <kuznetsov.va@gmail.com>
Date: Wed, 27 May 2026 13:27:25 -0700
Subject: [PATCH] feat(identify): integrity-clash detection for contradictory
 provenance (v0.6.7)

Surface contradictions between independent provenance signals instead of
collapsing to a single verdict -- a strong tell of spoofed, transplanted, or
laundered metadata. Inspired by arXiv:2603.02378.

Two rules in the new _integrity_clashes helper:
- Conflicting AI-origin attributions: two or more distinct AI vendors named by
  independent generator stamps (e.g. a C2PA OpenAI manifest on an image whose
  EXIF says Make="Ideogram AI").
- Camera + AI: a camera-capture C2PA device (Pixel/Leica/Sony/Nikon/Truepic)
  coexisting with an AI-generation marker -- a genuine capture is not AI.

High-precision by design: only hard generator stamps feed it (C2PA issuer when
the source is AI, SynthID proxy, EXIF/XMP generator, IPTC AISystemUsed, xAI,
AIGC). The fuzzy visible sparkle and the open invisible watermark are excluded
-- the latter can be a by-product of our own SDXL removal pass. Vendor
normalization (_vendor_of over _AI_VENDOR_TOKENS) keeps consistent signals from
clashing (C2PA "Google (Gemini)" + SynthID-Google agree); the C2PA vendor is
read from the issuer attribution, not the resolved platform, so a camera label
like "Google Pixel" cannot mis-normalize to an AI vendor.

Surfaced as ProvenanceReport.integrity_clashes (red in the table view, included
in --json). 19 new tests; all real single-origin fixtures (chatgpt/firefly/
doubao/grok/mj) verified to produce zero clashes (false-positive guard).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                            |   2 +-
 README.md                            |   2 +-
 pyproject.toml                       |   2 +-
 src/remove_ai_watermarks/__init__.py |   2 +-
 src/remove_ai_watermarks/cli.py      |   5 ++
 src/remove_ai_watermarks/identify.py | 114 ++++++++++++++++++++++++++-
 tests/test_identify.py               | 100 +++++++++++++++++++++++
 uv.lock                              |   2 +-
 8 files changed, 223 insertions(+), 6 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index 8601b54..d422ac7 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -30,7 +30,7 @@ You are a **principal Python engineer** maintaining a CLI tool and library for r
 - `noai/c2pa.py` — PNG chunk parser; use `extract_c2pa_chunk(path)` to get raw caBX payload, `has_c2pa_metadata(path)` to detect. Do not reimplement chunk parsing. `extract_c2pa_info(path)` sets `synthid_watermark`/`synthid_vendors` when the manifest is signed by a SynthID-using vendor, and `soft_binding`/`soft_binding_vendors` when a `c2pa.soft-binding` `alg` names a forensic-watermark vendor (`soft_binding_vendors_in(buffer)` is the shared byte-scan, used by both the PNG parser and the non-PNG binary path).
 - `noai/constants.py` — PNG_SIGNATURE, C2PA_CHUNK_TYPE, C2PA_SIGNATURES, C2PA_ISSUERS, `SYNTHID_C2PA_ISSUERS` (issuers that pair SynthID with C2PA: Google, OpenAI), and `C2PA_SOFT_BINDINGS` (soft-binding `alg` prefix → forensic-watermark vendor: Adobe TrustMark, Digimarc, Imatag, Steg.AI, Microsoft, ...). Add a new issuer/binding here, not inline.
 - `metadata.py` — `synthid_source(path)` returns the vendor name(s) if the C2PA manifest implies a SynthID pixel watermark, else None. Format-agnostic: PNG via the caBX parser, JPEG/WebP/AVIF/HEIF/JXL via a binary scan (C2PA marker + SynthID issuer + AI-source marker). `get_ai_metadata` surfaces the verdict, and `metadata --check` prints it as a callout. Both `get_ai_metadata` and `has_ai_metadata` guard the PIL open with `except Exception` (HEIC/unknown formats raise non-OSError) and fall through to the binary scan. `xai_signature(path)` detects xAI/Grok's EXIF-only scheme (`ImageDescription` = `Signature: <base64>` + UUID `Artist`); it feeds `has_ai_metadata`, `get_ai_metadata` (key `xai_signature`), and `identify`. `iptc_ai_system(path)` detects the IPTC Photo Metadata 2025.1 AI-disclosure XMP properties (`IPTC_AI_FIELD_MARKERS` = `AISystemUsed`/`AISystemVersionUsed`/`AIPromptInformation`/`AIPromptWriterName`) and returns the `AISystemUsed` generator name (or `"fields present"`). `remove_ai_metadata` routes **ISOBMFF video** (`.mp4`/`.mov`/`.m4v`) through the same `isobmff.strip_c2pa_boxes` as AVIF/HEIF (MP4 is ISOBMFF), and `_scrub_ai_exif` removes the xAI signature + AI-generator EXIF tags on JPEG output.
-- `identify.py` — `identify(path)` aggregates every locally-readable signal (C2PA issuer→platform, C2PA soft-binding forensic-watermark vendor, IPTC "Made with AI" + IPTC 2025.1 `AISystemUsed`, embedded SD/ComfyUI params, SynthID proxy, xAI/Grok EXIF signature via `metadata.xai_signature`, visible Gemini sparkle, open invisible watermark, Adobe TrustMark via `trustmark_detector`) into one `ProvenanceReport`. `is_ai_generated` is True or None (never asserted False — stripped metadata is not proof of clean origin). Visible-sparkle is promoted only at confidence ≥ `_SPARKLE_THRESHOLD` (0.5; corpus-tuned to separate Gemini sparkles ≥0.56 from non-sparkle ≤0.49). The cv2 dependency lives in `gemini_engine.detect_sparkle_confidence`, not here. **C2PA platform attribution is device-token-first, issuer-scan fallback** (`_device_platform` scans manifest bytes for `_DEVICE_C2PA_PLATFORM` tokens, then `_attribute_platform`/`_ISSUER_PLATFORM`). **Why, verified on real signed files 2026-05-26:** the old issuer-only byte-scan matched ANY issuer substring anywhere, so multi-entity manifests mis-attributed -- Leica→"Truepic" (a signing authority in the trust chain), Nikon→"Adobe Firefly" (XMP-toolkit "Adobe" + the sample's "Adobe_MAX" name), Pixel→"Google (Gemini)" ("Google LLC" cert org), Truepic→"Google". A distinctive device token wins instead. **Token distinctiveness is load-bearing:** bare `b"Truepic"` mis-fires (it appears in unrelated trust chains -- it mis-attributed the OpenAI `chatgpt-1.png` fixture), so the token is the specific `b"Truepic_Lens"` from the Lens SDK claim generator; likewise `b"Pixel Camera"` (cert CN) not bare `b"Pixel"`. `_DEVICE_C2PA_PLATFORM` lists ONLY tokens **verified against a real C2PA file**: Leica (`lc_c2pa`/`Leica Camera`), Nikon (`NIKON`), Pixel (`Pixel Camera` -- from a real Pixel 10 Pro file attached to c2pa-rs issue #1609/#1554), Sony (`sony.sig`/`sony.cert` -- Sony's own C2PA assertion namespace, verified on a real Sony PXW-Z300 file; NOT bare "Sony" which is a common EXIF Make), Truepic (`Truepic_Lens`). Canon/Samsung/Bria have **no public direct-download C2PA sample** (checked exhaustively: GitHub issue/PR attachments, contentcredentials gallery, HF datasets -- all upload-to-verify or token-gated; Canon's only public file was a self-signed hobbyist CR3, not factory), so they stay unmapped until a real file is captured (same fixture discipline as Grok/Doubao). The Sony sample is video (MP4) -- our ISOBMFF C2PA path detects it; Sony Alpha stills likely share the `sony.*` namespace but are not separately verified. Camera C2PA marks capture authenticity, not AI (Pixel carries `computationalCapture`, not `trainedAlgorithmicMedia`), so these never set `is_ai` -- that stays driven by digital-source-type. `c2pa.cbor_text_after` (now public) is best-effort for the `generator` detail string only and can be None when the manifest keys it `claim_generator_info` (Pixel). **Issuer→generator mapping is `is_ai`-gated** (`_attribute_platform(issuers, is_ai=c2pa_is_ai)`): a specific AI-generator platform is named only when the digital-source-type is `trainedAlgorithmicMedia`; on a non-AI source an issuer substring is treated as incidental (an "Adobe XMP" toolkit string in an *unmapped* Canon/Sony capture would otherwise mislabel it "Adobe Firefly"), so it degrades to the neutral "C2PA signer: X" label. Real Firefly/OpenAI/Google output carries the AI source-type, so it is unaffected (verified: chatgpt-1.png→OpenAI, firefly-1.png→Adobe Firefly still attribute). `_attribute_platform` defaults `is_ai=True` so the mapping stays unit-testable in isolation. Add device tokens to `_DEVICE_C2PA_PLATFORM`, generator/issuer platforms to `_ISSUER_PLATFORM`, not inline. For non-PNG containers (JPEG/WebP/AVIF/HEIF/JXL) the caBX parser returns nothing, so issuer (`_issuers_in`) and generator (`_ai_tools_in`, reusing `C2PA_AI_TOOLS`) are recovered by binary-scanning the first MB. EXIF `Software` / `Make` / `Artist` / `ImageDescription` and XMP `CreatorTool` generator tags are read by `metadata.exif_generator` (PIL+piexif for any format PIL opens incl. AVIF, plus a container-agnostic XMP raw-byte scan that also covers HEIF/JXL), matched against `AI_GENERATOR_TOKENS` so ordinary editors (plain "Adobe Photoshop") and real-camera `Make` ("Apple"/"Canon") are not flagged. **Ideogram tags its output with EXIF `Make="Ideogram AI"`** (verified on a real download 2026-05-24) — that's why `Make` is read.
+- `identify.py` — `identify(path)` aggregates every locally-readable signal (C2PA issuer→platform, C2PA soft-binding forensic-watermark vendor, IPTC "Made with AI" + IPTC 2025.1 `AISystemUsed`, embedded SD/ComfyUI params, SynthID proxy, xAI/Grok EXIF signature via `metadata.xai_signature`, visible Gemini sparkle, open invisible watermark, Adobe TrustMark via `trustmark_detector`) into one `ProvenanceReport`. `is_ai_generated` is True or None (never asserted False — stripped metadata is not proof of clean origin). Visible-sparkle is promoted only at confidence ≥ `_SPARKLE_THRESHOLD` (0.5; corpus-tuned to separate Gemini sparkles ≥0.56 from non-sparkle ≤0.49). The cv2 dependency lives in `gemini_engine.detect_sparkle_confidence`, not here. **C2PA platform attribution is device-token-first, issuer-scan fallback** (`_device_platform` scans manifest bytes for `_DEVICE_C2PA_PLATFORM` tokens, then `_attribute_platform`/`_ISSUER_PLATFORM`). **Why, verified on real signed files 2026-05-26:** the old issuer-only byte-scan matched ANY issuer substring anywhere, so multi-entity manifests mis-attributed -- Leica→"Truepic" (a signing authority in the trust chain), Nikon→"Adobe Firefly" (XMP-toolkit "Adobe" + the sample's "Adobe_MAX" name), Pixel→"Google (Gemini)" ("Google LLC" cert org), Truepic→"Google". A distinctive device token wins instead. **Token distinctiveness is load-bearing:** bare `b"Truepic"` mis-fires (it appears in unrelated trust chains -- it mis-attributed the OpenAI `chatgpt-1.png` fixture), so the token is the specific `b"Truepic_Lens"` from the Lens SDK claim generator; likewise `b"Pixel Camera"` (cert CN) not bare `b"Pixel"`. `_DEVICE_C2PA_PLATFORM` lists ONLY tokens **verified against a real C2PA file**: Leica (`lc_c2pa`/`Leica Camera`), Nikon (`NIKON`), Pixel (`Pixel Camera` -- from a real Pixel 10 Pro file attached to c2pa-rs issue #1609/#1554), Sony (`sony.sig`/`sony.cert` -- Sony's own C2PA assertion namespace, verified on a real Sony PXW-Z300 file; NOT bare "Sony" which is a common EXIF Make), Truepic (`Truepic_Lens`). Canon/Samsung/Bria have **no public direct-download C2PA sample** (checked exhaustively: GitHub issue/PR attachments, contentcredentials gallery, HF datasets -- all upload-to-verify or token-gated; Canon's only public file was a self-signed hobbyist CR3, not factory), so they stay unmapped until a real file is captured (same fixture discipline as Grok/Doubao). The Sony sample is video (MP4) -- our ISOBMFF C2PA path detects it; Sony Alpha stills likely share the `sony.*` namespace but are not separately verified. Camera C2PA marks capture authenticity, not AI (Pixel carries `computationalCapture`, not `trainedAlgorithmicMedia`), so these never set `is_ai` -- that stays driven by digital-source-type. `c2pa.cbor_text_after` (now public) is best-effort for the `generator` detail string only and can be None when the manifest keys it `claim_generator_info` (Pixel). **Issuer→generator mapping is `is_ai`-gated** (`_attribute_platform(issuers, is_ai=c2pa_is_ai)`): a specific AI-generator platform is named only when the digital-source-type is `trainedAlgorithmicMedia`; on a non-AI source an issuer substring is treated as incidental (an "Adobe XMP" toolkit string in an *unmapped* Canon/Sony capture would otherwise mislabel it "Adobe Firefly"), so it degrades to the neutral "C2PA signer: X" label. Real Firefly/OpenAI/Google output carries the AI source-type, so it is unaffected (verified: chatgpt-1.png→OpenAI, firefly-1.png→Adobe Firefly still attribute). `_attribute_platform` defaults `is_ai=True` so the mapping stays unit-testable in isolation. Add device tokens to `_DEVICE_C2PA_PLATFORM`, generator/issuer platforms to `_ISSUER_PLATFORM`, not inline. For non-PNG containers (JPEG/WebP/AVIF/HEIF/JXL) the caBX parser returns nothing, so issuer (`_issuers_in`) and generator (`_ai_tools_in`, reusing `C2PA_AI_TOOLS`) are recovered by binary-scanning the first MB. EXIF `Software` / `Make` / `Artist` / `ImageDescription` and XMP `CreatorTool` generator tags are read by `metadata.exif_generator` (PIL+piexif for any format PIL opens incl. AVIF, plus a container-agnostic XMP raw-byte scan that also covers HEIF/JXL), matched against `AI_GENERATOR_TOKENS` so ordinary editors (plain "Adobe Photoshop") and real-camera `Make` ("Apple"/"Canon") are not flagged. **Ideogram tags its output with EXIF `Make="Ideogram AI"`** (verified on a real download 2026-05-24) — that's why `Make` is read. **Integrity-clash detection** (`_integrity_clashes`, surfaced as `ProvenanceReport.integrity_clashes`, printed in red by `identify` and serialized to `--json`): contradictions between independent generator stamps are a laundering/spoofing tell. Two rules: (1) two or more distinct AI-origin vendors named by independent signals (e.g. C2PA OpenAI + EXIF `Make="Ideogram AI"`), and (2) a camera-capture C2PA device (`_DEVICE_C2PA_PLATFORM`) coexisting with any AI-generation marker. Vendor normalization is `_vendor_of` over `_AI_VENDOR_TOKENS` (so a C2PA "Google (Gemini)" issuer and a SynthID-Google proxy agree, while different vendors clash). **High-precision by design:** only hard generator stamps feed it (C2PA-issuer when source is AI, SynthID, EXIF/XMP generator, IPTC `AISystemUsed`, xAI, AIGC); the fuzzy visible sparkle and the open invisible watermark are **excluded** (the latter can be a by-product of our own SDXL removal pass). The c2pa vendor is classified from the issuer attribution / generator, NOT the resolved `platform` (a camera label like "Google Pixel" would mis-normalize to "Google"). All real single-origin fixtures (chatgpt/firefly/doubao/grok/mj) verified to produce **zero** clashes (false-positive guard in `test_identify.py::TestRealSamplesHaveNoClash`).
 - `gemini_engine.py` — visible Gemini-sparkle remover/detector (cv2/numpy, no GPU). `detect_sparkle_confidence(path)` is the file-level entry point used by `identify.py`.
 - `doubao_engine.py` — visible Doubao "豆包AI生成" remover/detector (cv2/numpy, no GPU). `DoubaoEngine.locate` anchors a bottom-right box by **geometry** (mark scales with image WIDTH, fractions in module constants; no bundled template), `extract_mask` pulls the light low-saturation glyphs with a **polarity-aware white top-hat** (brighter-than-blurred-local-bg, so white-paper documents are left untouched instead of smeared), `detect` thresholds glyph coverage (`DETECT_MIN_COVERAGE` 0.16 separates real marks ≥0.20 from corner noise, which stays ≤0.06 on large images but can spike to ~0.15 on tiny ones), `remove_watermark` inpaints (cv2 Telea/NS) and **bails when coverage > `MAX_INPAINT_COVERAGE` 0.50** (dense-text background → would smear). Wired into `visible --mark` via `cli._run_doubao_if_selected`. **Logo is near-white (~253), not the gray some third-party tools assume.** Best on photo/illustration backgrounds; high-contrast edges leave faint residue (cv2-inpaint limit). Clean per-pixel reverse-alpha (Gemini-style) is the future upgrade but needs a captured/distilled alpha map — see below.
 - `region_eraser.py` — universal region eraser (`erase` CLI). `erase(image, boxes=|mask=, backend=)`: `boxes_to_mask` → `cv2.inpaint` (`cv2` backend, default, no deps) or big-LaMa via onnxruntime (`lama` backend, extra `lama`, `Carve/LaMa-ONNX` Apache-2.0 model downloaded on first use, never bundled). `erase_lama` crops a padded region around the mask, runs LaMa at its fixed 512² input, pastes only masked pixels back (untouched areas stay pixel-exact). Lazy `_get_lama_session` singleton; `lama_available()` guards the optional import. **LaMa-ONNX costs ~3.5-4 GB peak RAM and ~5-6 s/call on CPU** (FFC working set, not arena — `enable_cpu_mem_arena=False` does not help), so it does NOT fit a minimal droplet; the cv2 backend (tens of MB, ~30 ms) does. LaMa quality at low RAM = serverless/GPU, mirroring how raiw.cc offloads SDXL to fal.
diff --git a/README.md b/README.md
index 4ab9616..2575574 100644
--- a/README.md
+++ b/README.md
@@ -325,7 +325,7 @@ Tracked but not yet implemented:
 - **Real non-PNG C2PA fixtures**. SynthID-source detection for JPEG / WebP / AVIF is currently covered only by synthetic byte blobs; replace with real vendor-emitted files to ground the binary-scan path.
 - **Maintenance debt**. Clear strict-pyright debt in `remove_ai_metadata` / `cli.py` (untyped piexif / PIL / click / rich) so `maintain.sh` can finish green. (`uv-secure` is already clean since `idna` was bumped to 3.16.)
 - **AVIF / HEIF EXIF/XMP inside the `meta` box**. Removal already strips top-level C2PA `uuid` / JUMBF `jumb` boxes and any AI-labelled top-level XMP `uuid` box, and non-ISOBMFF audio/video (WebM, MP3, WAV, FLAC, OGG) is stripped losslessly via ffmpeg. Still open: EXIF/XMP stored as *items inside the `meta` box* (typical for AVIF/HEIF stills) — needs `meta`-box surgery (iinf/iloc + mdat splice) or `exiftool` (a non-bundled binary dependency).
-- **Multi-signal contradiction reporting ("Integrity Clash")**. When a C2PA manifest claims human authorship but a watermark / IPTC signal indicates AI (or signals otherwise disagree), `identify` should surface the *contradiction* rather than collapse to one verdict (per [arXiv:2603.02378](https://arxiv.org/abs/2603.02378)). Pure aggregation logic — no new dependency or sample needed.
+- **Multi-signal contradiction reporting ("Integrity Clash")** — *shipped (v0.6.7)*. `identify` now surfaces contradictions between independent provenance signals (two different AI vendors named by separate stamps, or camera-capture C2PA credentials next to AI-generation markers) as `integrity_clashes` (shown in red in the table view and in `--json`), rather than collapsing to a single verdict. Inspired by [arXiv:2603.02378](https://arxiv.org/abs/2603.02378).
 - **More C2PA device signers**. Leica, Nikon, Google Pixel, Sony, and Truepic are mapped (each verified against a real signed file). Canon and Samsung Galaxy (AI-edit) are deferred until a real signed sample surfaces — no public direct-download C2PA file exists for them today (upload-to-verify / news-agency-licensed only).
 - **C2PA detection window for streaming MP4**. Non-PNG detection scans the first 1 MB; a manifest placed after a large `mdat` in a streaming MP4 can be missed (front-placed manifests, the common case, are caught).
 - **Resemble PerTh audio detection** — evaluated, not feasible with the public API: `get_watermark()` returns a raw bit array with no presence/confidence flag, so watermarked vs. clean audio can't be reliably separated without Resemble's fixed payload or a confidence service. Same wall as the SynthID pixel detector.
diff --git a/pyproject.toml b/pyproject.toml
index 8b3d762..83dae0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "remove-ai-watermarks"
-version = "0.6.6"
+version = "0.6.7"
 description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/remove_ai_watermarks/__init__.py b/src/remove_ai_watermarks/__init__.py
index 5ea2d25..16eef5c 100644
--- a/src/remove_ai_watermarks/__init__.py
+++ b/src/remove_ai_watermarks/__init__.py
@@ -1,3 +1,3 @@
 """Remove-AI-Watermarks: Unified tool for removing visible and invisible AI watermarks."""
 
-__version__ = "0.6.6"
+__version__ = "0.6.7"
diff --git a/src/remove_ai_watermarks/cli.py b/src/remove_ai_watermarks/cli.py
index 435848e..ce90936 100644
--- a/src/remove_ai_watermarks/cli.py
+++ b/src/remove_ai_watermarks/cli.py
@@ -618,6 +618,11 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo
     console.print(f"\n  Verdict: {verdict}  [dim](confidence: {report.confidence})[/]")
     console.print(f"  Platform: {report.platform or '[dim]undetermined[/]'}")
 
+    if report.integrity_clashes:
+        console.print("\n  [bold red]⚠ Integrity clash[/] [dim](provenance signals contradict each other)[/]")
+        for clash in report.integrity_clashes:
+            console.print(f"  [red]- {clash}[/]")
+
     if report.watermarks:
         table = Table(show_header=True, header_style="bold", title="Watermarks / provenance markers")
         table.add_column("Marker", style="cyan")
diff --git a/src/remove_ai_watermarks/identify.py b/src/remove_ai_watermarks/identify.py
index ab72762..560b9d2 100644
--- a/src/remove_ai_watermarks/identify.py
+++ b/src/remove_ai_watermarks/identify.py
@@ -110,6 +110,11 @@ class ProvenanceReport:
     watermarks: list[str] = field(default_factory=list[str])
     signals: list[Signal] = field(default_factory=list["Signal"])
     caveats: list[str] = field(default_factory=list[str])
+    # Contradictions between independent provenance signals (e.g. two different
+    # AI vendors both claiming the image, or camera-capture credentials next to
+    # AI-generation markers). Non-empty means the provenance is internally
+    # inconsistent -- a strong tell of spoofed, transplanted, or laundered metadata.
+    integrity_clashes: list[str] = field(default_factory=list[str])
 
 
 def _issuers_in(data: bytes) -> list[str]:
@@ -188,6 +193,88 @@ def _attribute_platform(issuers: list[str], *, is_ai: bool = True) -> str | None
     return None
 
 
+# Coarse origin-vendor normalization for integrity-clash detection. Two signals
+# that resolve to the SAME key are consistent (a C2PA "Google (Gemini)" issuer
+# and a SynthID-Google proxy, or Adobe Firefly + its Adobe TrustMark soft
+# binding); two DIFFERENT keys from independent generator stamps are a
+# contradiction (a C2PA OpenAI manifest on an image whose EXIF says "Ideogram
+# AI"). Substring match on the lowercased platform/detail string; first hit wins,
+# so order specific tokens before brand umbrellas where they overlap.
+_AI_VENDOR_TOKENS: tuple[tuple[str, str], ...] = (
+    ("gpt-image", "OpenAI"),
+    ("dall", "OpenAI"),
+    ("sora", "OpenAI"),
+    ("openai", "OpenAI"),
+    ("gemini", "Google"),
+    ("imagen", "Google"),
+    ("nano banana", "Google"),
+    ("google", "Google"),
+    ("firefly", "Adobe"),
+    ("adobe", "Adobe"),
+    ("bing", "Microsoft"),
+    ("designer", "Microsoft"),
+    ("microsoft", "Microsoft"),
+    ("stability", "Stability AI"),
+    ("stable diffusion", "Stability AI"),
+    ("sdxl", "Stability AI"),
+    ("ideogram", "Ideogram"),
+    ("grok", "xAI"),
+    ("aurora", "xAI"),
+    ("xai", "xAI"),
+)
+
+
+def _vendor_of(text: str | None) -> str | None:
+    """Normalize a platform/generator string to a coarse origin-vendor key, or None."""
+    if not text:
+        return None
+    low = text.lower()
+    for token, vendor in _AI_VENDOR_TOKENS:
+        if token in low:
+            return vendor
+    return None
+
+
+def _integrity_clashes(
+    ai_vendors: dict[str, str], camera_label: str | None, *, camera_has_ai_marker: bool
+) -> list[str]:
+    """Surface contradictions between independent provenance signals.
+
+    Args:
+        ai_vendors: family name -> normalized AI-origin vendor, one entry per
+            generator-stamped signal (C2PA issuer when the source is AI, SynthID
+            proxy, EXIF/XMP generator tag, IPTC AISystemUsed, xAI, AIGC label).
+        camera_label: a camera/verified-capture C2PA device platform, if one was
+            identified (Pixel, Leica, Sony, Nikon, Truepic), else None.
+        camera_has_ai_marker: True when an AI-generation stamp coexists with the
+            camera credentials.
+
+    Returns:
+        Human-readable clash descriptions; empty when the signals agree.
+    """
+    clashes: list[str] = []
+
+    by_vendor: dict[str, list[str]] = {}
+    for family, vendor in ai_vendors.items():
+        by_vendor.setdefault(vendor, []).append(family)
+    if len(by_vendor) >= 2:
+        parts = [f"{vendor} (via {', '.join(sorted(fams))})" for vendor, fams in sorted(by_vendor.items())]
+        clashes.append(
+            "Conflicting AI-origin attributions from independent signals: "
+            + " vs ".join(parts)
+            + " -- one provenance set was likely spoofed, transplanted, or laundered."
+        )
+
+    if camera_label and camera_has_ai_marker:
+        vendors = ", ".join(sorted(set(ai_vendors.values()))) or "present"
+        clashes.append(
+            f"Camera-capture C2PA credentials ({camera_label}) coexist with AI-generation markers "
+            f"({vendors}) -- a genuine camera capture is not AI-generated, so the provenance is inconsistent."
+        )
+
+    return clashes
+
+
 def _visible_sparkle(image_path: Path) -> float | None:
     """Visible Gemini-sparkle confidence in [0, 1], or None if unavailable.
 
@@ -251,6 +338,13 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
     signals: list[Signal] = []
     watermarks: list[str] = []
     caveats: list[str] = []
+    # One normalized origin vendor per generator-stamped signal, for integrity-
+    # clash detection (see _integrity_clashes). Visible sparkle and the open
+    # invisible watermark are deliberately excluded: the former is a fuzzy visual
+    # score, the latter can be a by-product of our own SDXL removal pass, so
+    # neither is a trustworthy "the generator stamped its identity" claim.
+    ai_vendor_claims: dict[str, str] = {}
+    camera_label = _device_platform(head)
 
     # ── C2PA Content Credentials ────────────────────────────────────
     has_c2pa = bool(info) or b"c2pa" in head.lower() or C2PA_UUID in head
@@ -271,11 +365,17 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
     # signer/producer), with the issuer byte-scan only as fallback. The issuer
     # scan alone mis-attributed real samples (Leica->Truepic timestamp authority,
     # Nikon->Adobe namespace, Pixel->Google Gemini) -- the device scan fixes that.
-    platform = (_device_platform(head) or _attribute_platform(issuers, is_ai=c2pa_is_ai)) if has_c2pa else None
+    platform = (camera_label or _attribute_platform(issuers, is_ai=c2pa_is_ai)) if has_c2pa else None
     if has_c2pa:
         detail = ", ".join(filter(None, [", ".join(issuers), generator, info.get("source_type")]))
         signals.append(Signal("c2pa", detail or "C2PA manifest present", "high"))
         watermarks.append(f"C2PA Content Credentials ({', '.join(issuers) or 'unknown signer'})")
+        # Record the AI-origin vendor for clash detection only when the source is
+        # actually AI -- classify the issuer attribution / generator, NOT the
+        # resolved `platform` (which may be a camera device token whose label,
+        # e.g. "Google Pixel", would mis-normalize to an AI vendor).
+        if c2pa_is_ai and (v := (_vendor_of(_attribute_platform(issuers, is_ai=True)) or _vendor_of(generator))):
+            ai_vendor_claims["c2pa"] = v
 
     # ── SynthID metadata proxy ──────────────────────────────────────
     # get_ai_metadata already sets synthid_watermark for both PNG (caBX parser)
@@ -286,6 +386,8 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         caveats.append(_SYNTHID_CAVEAT)
         if "OpenAI" in (" ".join(issuers) + synthid):
             caveats.append(_OPENAI_CAVEAT)
+        if v := _vendor_of(synthid):
+            ai_vendor_claims["synthid"] = v
 
     # ── C2PA soft-binding: a named forensic/third-party watermark vendor ─
     # (Adobe TrustMark, Digimarc, Imatag, ...). Present in the manifest even when
@@ -315,6 +417,8 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         watermarks.append(f"IPTC 2025.1 AI disclosure ({system})" if named else "IPTC 2025.1 AI disclosure fields")
         if platform is None and named:
             platform = f"{system} (IPTC AISystemUsed)"
+        if named and (v := _vendor_of(system)):
+            ai_vendor_claims["iptc_ai_system"] = v
 
     # ── China TC260 AIGC label (Doubao and other China-served gens) ──
     aigc = any(m in head for m in AIGC_MARKERS)
@@ -324,6 +428,7 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         watermarks.append("China AIGC label (TC260 standard)")
         if platform is None:
             platform = "China AIGC-labeled generator (TC260; e.g. Doubao)"
+        ai_vendor_claims["aigc"] = "China AIGC (TC260)"
 
     # ── Local diffusion parameters (Stable Diffusion / ComfyUI) ──────
     local_keys = sorted(k for k in meta if k.lower() in _LOCAL_GEN_KEYS)
@@ -340,6 +445,8 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         watermarks.append(f"Embedded generator tag: {generator_tag}")
         if platform is None:
             platform = f"{generator_tag} (EXIF/XMP generator tag)"
+        if v := _vendor_of(generator_tag):
+            ai_vendor_claims["exif_generator"] = v
 
     # ── xAI / Grok EXIF signature scheme (no C2PA/SynthID/IPTC) ──────
     # Grok's only provenance signal: EXIF ImageDescription "Signature: <base64>"
@@ -350,6 +457,7 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         watermarks.append("xAI/Grok EXIF signature")
         if platform is None:
             platform = "xAI (Grok / Aurora)"
+        ai_vendor_claims["xai"] = "xAI"
 
     # ── Open invisible watermark (SD / SDXL / FLUX, dwtDct) ──────────
     # Public decoder, no key -- a definitive embedded signal on pristine files.
@@ -404,6 +512,9 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         is_ai = None
         confidence = "none"
 
+    # ── Integrity clashes: contradictions between independent signals ─
+    clashes = _integrity_clashes(ai_vendor_claims, camera_label, camera_has_ai_marker=bool(ai_vendor_claims))
+
     caveats.append(_STRIP_CAVEAT)
     # De-duplicate while preserving order.
     caveats = list(dict.fromkeys(caveats))
@@ -416,4 +527,5 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b
         watermarks=watermarks,
         signals=signals,
         caveats=caveats,
+        integrity_clashes=clashes,
     )
diff --git a/tests/test_identify.py b/tests/test_identify.py
index aa517b4..fb9104c 100644
--- a/tests/test_identify.py
+++ b/tests/test_identify.py
@@ -17,7 +17,9 @@ from remove_ai_watermarks.identify import (
     ProvenanceReport,
     _ai_tools_in,
     _attribute_platform,
+    _integrity_clashes,
     _issuers_in,
+    _vendor_of,
     identify,
 )
 
@@ -447,3 +449,101 @@ class TestIdentifyAIGC:
         r = identify(self._aigc_png(tmp_path), check_visible=False)
         sig = next(s for s in r.signals if s.name == "aigc")
         assert "BYTEDANCE001" in sig.detail
+
+
+# ── Integrity clashes (contradictions between independent signals) ──────
+
+
+class TestVendorOf:
+    def test_openai_variants(self):
+        assert _vendor_of("OpenAI (ChatGPT / gpt-image / DALL-E / Sora)") == "OpenAI"
+        assert _vendor_of("DALL-E 3") == "OpenAI"
+
+    def test_google_variants(self):
+        assert _vendor_of("Google (Gemini / Imagen)") == "Google"
+        assert _vendor_of("Imagen 3") == "Google"
+
+    def test_other_vendors(self):
+        assert _vendor_of("Ideogram AI") == "Ideogram"
+        assert _vendor_of("Adobe Firefly") == "Adobe"
+        assert _vendor_of("Stability AI (Stable Image)") == "Stability AI"
+
+    def test_camera_label_is_not_an_ai_vendor(self):
+        # Camera platform labels must NOT normalize to an AI vendor, or a camera
+        # capture would be mistaken for AI-generation in clash detection.
+        assert _vendor_of("Leica (camera, C2PA capture)") is None
+
+    def test_unknown_is_none(self):
+        assert _vendor_of("a regular photo") is None
+        assert _vendor_of(None) is None
+
+
+class TestIntegrityClashesHelper:
+    def test_two_ai_vendors_clash(self):
+        clashes = _integrity_clashes({"c2pa": "OpenAI", "exif_generator": "Ideogram"}, None, camera_has_ai_marker=True)
+        assert len(clashes) == 1
+        assert "OpenAI" in clashes[0]
+        assert "Ideogram" in clashes[0]
+
+    def test_same_vendor_two_signals_no_clash(self):
+        # C2PA Google + SynthID-Google proxy is consistent, not a contradiction.
+        assert _integrity_clashes({"c2pa": "Google", "synthid": "Google"}, None, camera_has_ai_marker=True) == []
+
+    def test_single_vendor_no_clash(self):
+        assert _integrity_clashes({"c2pa": "OpenAI"}, None, camera_has_ai_marker=True) == []
+
+    def test_empty_no_clash(self):
+        assert _integrity_clashes({}, None, camera_has_ai_marker=False) == []
+
+    def test_camera_plus_ai_marker_clashes(self):
+        clashes = _integrity_clashes(
+            {"exif_generator": "Ideogram"},
+            "Google Pixel (camera, C2PA capture)",
+            camera_has_ai_marker=True,
+        )
+        assert any("Camera-capture" in c and "Pixel" in c for c in clashes)
+
+    def test_camera_without_ai_marker_no_clash(self):
+        # A clean camera capture (the normal case for our Pixel/Leica/Sony files)
+        # must NOT raise a clash.
+        assert _integrity_clashes({}, "Leica (camera, C2PA capture)", camera_has_ai_marker=False) == []
+
+
+class TestIntegrityClashEndToEnd:
+    def _c2pa_jpeg(self, tmp_path: Path, blob: bytes) -> Path:
+        path = tmp_path / "img.jpg"
+        path.write_bytes(b"\xff\xd8\xff\xe1jumbc2pa" + blob + b"\xff\xd9")
+        return path
+
+    def test_two_generator_stamps_clash(self, tmp_path: Path):
+        # An OpenAI C2PA manifest (AI source) on an image that ALSO carries a
+        # China TC260 AIGC label = two independent generator stamps naming
+        # different origins -> a laundering tell.
+        path = self._c2pa_jpeg(tmp_path, b"OpenAI ... trainedAlgorithmicMedia ... TC260:AIGC label")
+        r = identify(path, check_visible=False, check_invisible=False)
+        assert r.integrity_clashes
+        assert any("Conflicting AI-origin" in c for c in r.integrity_clashes)
+
+    def test_single_stamp_no_clash(self, tmp_path: Path):
+        path = self._c2pa_jpeg(tmp_path, b"OpenAI ... trainedAlgorithmicMedia")
+        r = identify(path, check_visible=False, check_invisible=False)
+        assert r.integrity_clashes == []
+
+    def test_clash_serializes_to_json(self, tmp_path: Path):
+        path = self._c2pa_jpeg(tmp_path, b"OpenAI ... trainedAlgorithmicMedia ... TC260:AIGC label")
+        r = identify(path, check_visible=False, check_invisible=False)
+        payload = json.loads(json.dumps(asdict(r), default=str))
+        assert payload["integrity_clashes"] == r.integrity_clashes
+
+
+@pytest.mark.skipif(not SAMPLES_DIR.exists(), reason="data/samples not present")
+@pytest.mark.parametrize("fixture", ["chatgpt-1.png", "firefly-1.png", "doubao-1.png", "grok-1.jpg", "mj-1.png"])
+class TestRealSamplesHaveNoClash:
+    """Every real single-origin fixture must report zero clashes (false-positive guard)."""
+
+    def test_no_false_positive_clash(self, fixture: str):
+        path = SAMPLES_DIR / fixture
+        if not path.exists():
+            pytest.skip(f"{fixture} not present")
+        r = identify(path, check_visible=False, check_invisible=False)
+        assert r.integrity_clashes == []
diff --git a/uv.lock b/uv.lock
index 606c0a4..fe8bd8d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2865,7 +2865,7 @@ wheels = [
 
 [[package]]
 name = "remove-ai-watermarks"
-version = "0.6.6"
+version = "0.6.7"
 source = { editable = "." }
 dependencies = [
     { name = "click" },