diff --git a/CLAUDE.md b/CLAUDE.md index eefb5ca..769f641 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,7 +36,7 @@ You are a **principal Python engineer** maintaining a CLI tool and library for r Who embeds what, and whether it is locally detectable (so we know which gaps are fillable). See `identify.py` for what we read. - **Locally detectable (open decoder, no key/API):** Stable Diffusion / SDXL / FLUX via `imwatermark` DWT-DCT (now covered by `invisible_watermark.py`). FLUX uses the same library (`black-forest-labs/flux2` `src/flux2/watermark.py`, 48-bit `0b001010101111111010000111100111001111010100101110`); SDXL is the diffusers `WATERMARK_MESSAGE` (`0b101100111110110010010000011110111011000110011110`). Caveat: fragile to re-encoding. -- **C2PA / IPTC (covered by the issuer/marker scan):** OpenAI, Google, Adobe Firefly, Microsoft Designer (have positives); Bing Image Creator, Canva, Getty, Shutterstock, Stability hosted (no positives yet — collect to validate). Midjourney embeds NO C2PA and no invisible watermark (our `mj-*` sample carried only the IPTC tag). +- **C2PA / IPTC (covered by the issuer/marker scan):** OpenAI, Google, Adobe Firefly, Microsoft (Designer + **Bing Image Creator** — collected 2026-05-24; Bing now runs Microsoft's own **MAI-Image** model, signs C2PA as "Microsoft", NOT OpenAI/DALL-E), and **Stability AI** (collected from Brand Studio / DreamStudio successor; signs C2PA as "Stability AI Ltd", no SynthID, no imwatermark on its current Stable Image model — issuer added to `C2PA_ISSUERS`). Still unsampled: Canva (its downloads are re-encoded design *exports* that strip C2PA, so a Canva "positive" is inconclusive — skipped), Getty, Shutterstock. Midjourney embeds NO C2PA and no invisible watermark (our `mj-*` sample carried only the IPTC tag). - **Invisible but NOT locally detectable (proprietary, API/oracle only — same wall as SynthID):** Amazon Titan Image Generator + Nova Canvas (Bedrock `DetectGeneratedContent` API), Kakao (new SynthID image adopter, May 2026), NVIDIA Cosmos (SynthID video). No local detector possible; treat like SynthID. ## Known limitations diff --git a/data/synthid_corpus/manifest.csv b/data/synthid_corpus/manifest.csv index 064f7af..9fa589c 100644 --- a/data/synthid_corpus/manifest.csv +++ b/data/synthid_corpus/manifest.csv @@ -108,3 +108,5 @@ c86973424817f62510e2a312b85c52e05adf47ace87a8e717fd442607596f501,c8697342-aistud 1f81827c06d67cf6f6c7f5d53ec8f9738183942a6d1d2717b161fea0fdcc540a,1f81827c-Designer.png,pos,Microsoft Designer,dall-e (Designer),1024,1024,png,"OpenAI, Microsoft",yes,c2pa-metadata,2026-05-24T22:18:40Z,C2PA issuer OpenAI+Microsoft; synthid_source=OpenAI (DALL-E surface inherits OpenAI SynthID+C2PA) f3ebe4683fc40aa2a0b80cc3ced3dc8062385ae32b488f4b33cb2330949e658d,f3ebe468-leonardo_lake.jpg,neg,Leonardo.ai,lucid-origin,1024,1024,jpeg,,,none,2026-05-24T22:22:15Z,non-SynthID AI; NO provenance metadata (no C2PA/IPTC/SynthID) 12775f4c0aec1ecd615e88c4941a788a053d0258d08d31ad34d330d3e21969bd,12775f4c-grok_original.jpg,neg,Grok (xAI),aurora,784,1168,jpeg,,,none,2026-05-24T22:47:46Z,full-res original via Download button; xAI non-adopter; NO C2PA/IPTC/SynthID (has benign EXIF block) +74b605deb102d74443a182f70fbea34a6a470bed381d62ae0bfa0083a6ecc5a3,74b605de-_e1f7984d-98e7-4cdc-91d2-f72314a4bc56.jpeg,neg,Bing Image Creator,MAI-Image-2e,1024,1024,jpeg,,,c2pa-metadata,2026-05-25T00:10:28Z,C2PA issuer Microsoft; non-SynthID; issuer-coverage fixture +f9bb6b1039ee228221d2eead7daebd46422a36a4a8959a74fa0198e1aa19f9b8,f9bb6b10-a_red_ceramic_coffee_mug_on_a_wooden_table_soft_morning_light_photorealistic_20260525000725_04.png,neg,Stability Brand Studio,Stable Image,1024,1024,png,Stability AI,,c2pa-metadata,2026-05-25T00:10:31Z,C2PA issuer Stability AI; non-SynthID; no imwatermark diff --git a/src/remove_ai_watermarks/identify.py b/src/remove_ai_watermarks/identify.py index e0abbc8..355c37c 100644 --- a/src/remove_ai_watermarks/identify.py +++ b/src/remove_ai_watermarks/identify.py @@ -50,10 +50,13 @@ _SPARKLE_THRESHOLD = 0.5 # manifest names several issuers (Microsoft Designer signs as "OpenAI, # Microsoft"), the first match wins so the product, not the backend, is named. _ISSUER_PLATFORM: tuple[tuple[str, str], ...] = ( - ("Microsoft", "Microsoft Designer (DALL-E / OpenAI backend)"), + # Microsoft signs both Designer and Bing Image Creator; Bing now runs its + # own MAI-Image model (not DALL-E), so the label stays model-neutral. + ("Microsoft", "Microsoft (Bing Image Creator / Designer)"), ("Adobe", "Adobe Firefly"), ("OpenAI", "OpenAI (ChatGPT / gpt-image / DALL-E / Sora)"), ("Google", "Google (Gemini / Imagen)"), + ("Stability AI", "Stability AI (Stable Image / DreamStudio)"), ) # PNG-text / EXIF keys that indicate a local diffusion pipeline (vs. a hosted diff --git a/src/remove_ai_watermarks/noai/constants.py b/src/remove_ai_watermarks/noai/constants.py index dfcd2ef..8dec2e8 100644 --- a/src/remove_ai_watermarks/noai/constants.py +++ b/src/remove_ai_watermarks/noai/constants.py @@ -85,6 +85,9 @@ C2PA_ISSUERS = { b"Microsoft": "Microsoft", b"OpenAI": "OpenAI", b"Truepic": "Truepic", + # Stability AI signs C2PA as "Stability AI" (cert org "Stability AI Ltd"). + # Verified on a live Brand Studio (DreamStudio successor) output, 2026-05-24. + b"Stability AI": "Stability AI", } # C2PA issuers whose signed outputs also carry an invisible SynthID pixel diff --git a/tests/test_identify.py b/tests/test_identify.py index 737fa80..e725bdc 100644 --- a/tests/test_identify.py +++ b/tests/test_identify.py @@ -51,6 +51,17 @@ class TestAttributePlatform: assert platform assert "signer" in platform.lower() + def test_microsoft_label_is_model_neutral(self): + # Bing now runs MAI-Image, not DALL-E; the label must not claim DALL-E. + platform = _attribute_platform(["Microsoft"]) + assert platform + assert "DALL-E" not in platform + + def test_stability(self): + platform = _attribute_platform(["Stability AI"]) + assert platform + assert "Stability AI" in platform + def test_empty_is_none(self): assert _attribute_platform([]) is None @@ -100,6 +111,14 @@ class TestIdentifyNonPng: r = identify(path, check_visible=False) assert any("SynthID" in w for w in r.watermarks) + def test_stability_ai_issuer_attributed_no_synthid(self, tmp_path: Path): + path = self._c2pa_jpeg(tmp_path, b"Stability AI ... trainedAlgorithmicMedia") + r = identify(path, check_visible=False) + assert r.is_ai_generated is True + assert r.platform is not None + assert "Stability AI" in r.platform + assert not any("SynthID" in w for w in r.watermarks) # Stability does not use SynthID + def test_c2pa_without_ai_marker_is_unknown(self, tmp_path: Path): # Adobe signs C2PA on plain Photoshop edits too. Without an AI digital- # source marker, the honest verdict is unknown -- the C2PA watermark is