mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-05-26 14:17:47 +02:00
feat(c2pa): recognize Stability AI issuer; fix Microsoft platform label
Collected live C2PA positives from Bing Image Creator and Stability Brand Studio (DreamStudio successor) and learned two things our scan got wrong: - Bing now runs Microsoft's own MAI-Image model, not DALL-E, and signs C2PA as 'Microsoft'. The scan caught it, but the platform label claimed 'Microsoft Designer (DALL-E / OpenAI backend)'. Relabeled model-neutral: 'Microsoft (Bing Image Creator / Designer)'. - Stability signs C2PA as 'Stability AI' (cert 'Stability AI Ltd'), which was not in C2PA_ISSUERS, so it read as 'unknown signer'. Added the issuer and a platform mapping. Stability uses no SynthID and (on its current Stable Image model) no imwatermark watermark -- verified, both negative. Both ingested as SynthID-negative corpus fixtures (they are AI but not SynthID) for issuer-coverage. Canva skipped: its downloads are re-encoded design exports that strip C2PA, so a Canva sample would be inconclusive. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -36,7 +36,7 @@ You are a **principal Python engineer** maintaining a CLI tool and library for r
|
||||
|
||||
Who embeds what, and whether it is locally detectable (so we know which gaps are fillable). See `identify.py` for what we read.
|
||||
- **Locally detectable (open decoder, no key/API):** Stable Diffusion / SDXL / FLUX via `imwatermark` DWT-DCT (now covered by `invisible_watermark.py`). FLUX uses the same library (`black-forest-labs/flux2` `src/flux2/watermark.py`, 48-bit `0b001010101111111010000111100111001111010100101110`); SDXL is the diffusers `WATERMARK_MESSAGE` (`0b101100111110110010010000011110111011000110011110`). Caveat: fragile to re-encoding.
|
||||
- **C2PA / IPTC (covered by the issuer/marker scan):** OpenAI, Google, Adobe Firefly, Microsoft Designer (have positives); Bing Image Creator, Canva, Getty, Shutterstock, Stability hosted (no positives yet — collect to validate). Midjourney embeds NO C2PA and no invisible watermark (our `mj-*` sample carried only the IPTC tag).
|
||||
- **C2PA / IPTC (covered by the issuer/marker scan):** OpenAI, Google, Adobe Firefly, Microsoft (Designer + **Bing Image Creator** — collected 2026-05-24; Bing now runs Microsoft's own **MAI-Image** model, signs C2PA as "Microsoft", NOT OpenAI/DALL-E), and **Stability AI** (collected from Brand Studio / DreamStudio successor; signs C2PA as "Stability AI Ltd", no SynthID, no imwatermark on its current Stable Image model — issuer added to `C2PA_ISSUERS`). Still unsampled: Canva (its downloads are re-encoded design *exports* that strip C2PA, so a Canva "positive" is inconclusive — skipped), Getty, Shutterstock. Midjourney embeds NO C2PA and no invisible watermark (our `mj-*` sample carried only the IPTC tag).
|
||||
- **Invisible but NOT locally detectable (proprietary, API/oracle only — same wall as SynthID):** Amazon Titan Image Generator + Nova Canvas (Bedrock `DetectGeneratedContent` API), Kakao (new SynthID image adopter, May 2026), NVIDIA Cosmos (SynthID video). No local detector possible; treat like SynthID.
|
||||
|
||||
## Known limitations
|
||||
|
||||
@@ -108,3 +108,5 @@ c86973424817f62510e2a312b85c52e05adf47ace87a8e717fd442607596f501,c8697342-aistud
|
||||
1f81827c06d67cf6f6c7f5d53ec8f9738183942a6d1d2717b161fea0fdcc540a,1f81827c-Designer.png,pos,Microsoft Designer,dall-e (Designer),1024,1024,png,"OpenAI, Microsoft",yes,c2pa-metadata,2026-05-24T22:18:40Z,C2PA issuer OpenAI+Microsoft; synthid_source=OpenAI (DALL-E surface inherits OpenAI SynthID+C2PA)
|
||||
f3ebe4683fc40aa2a0b80cc3ced3dc8062385ae32b488f4b33cb2330949e658d,f3ebe468-leonardo_lake.jpg,neg,Leonardo.ai,lucid-origin,1024,1024,jpeg,,,none,2026-05-24T22:22:15Z,non-SynthID AI; NO provenance metadata (no C2PA/IPTC/SynthID)
|
||||
12775f4c0aec1ecd615e88c4941a788a053d0258d08d31ad34d330d3e21969bd,12775f4c-grok_original.jpg,neg,Grok (xAI),aurora,784,1168,jpeg,,,none,2026-05-24T22:47:46Z,full-res original via Download button; xAI non-adopter; NO C2PA/IPTC/SynthID (has benign EXIF block)
|
||||
74b605deb102d74443a182f70fbea34a6a470bed381d62ae0bfa0083a6ecc5a3,74b605de-_e1f7984d-98e7-4cdc-91d2-f72314a4bc56.jpeg,neg,Bing Image Creator,MAI-Image-2e,1024,1024,jpeg,,,c2pa-metadata,2026-05-25T00:10:28Z,C2PA issuer Microsoft; non-SynthID; issuer-coverage fixture
|
||||
f9bb6b1039ee228221d2eead7daebd46422a36a4a8959a74fa0198e1aa19f9b8,f9bb6b10-a_red_ceramic_coffee_mug_on_a_wooden_table_soft_morning_light_photorealistic_20260525000725_04.png,neg,Stability Brand Studio,Stable Image,1024,1024,png,Stability AI,,c2pa-metadata,2026-05-25T00:10:31Z,C2PA issuer Stability AI; non-SynthID; no imwatermark
|
||||
|
||||
|
@@ -50,10 +50,13 @@ _SPARKLE_THRESHOLD = 0.5
|
||||
# manifest names several issuers (Microsoft Designer signs as "OpenAI,
|
||||
# Microsoft"), the first match wins so the product, not the backend, is named.
|
||||
_ISSUER_PLATFORM: tuple[tuple[str, str], ...] = (
|
||||
("Microsoft", "Microsoft Designer (DALL-E / OpenAI backend)"),
|
||||
# Microsoft signs both Designer and Bing Image Creator; Bing now runs its
|
||||
# own MAI-Image model (not DALL-E), so the label stays model-neutral.
|
||||
("Microsoft", "Microsoft (Bing Image Creator / Designer)"),
|
||||
("Adobe", "Adobe Firefly"),
|
||||
("OpenAI", "OpenAI (ChatGPT / gpt-image / DALL-E / Sora)"),
|
||||
("Google", "Google (Gemini / Imagen)"),
|
||||
("Stability AI", "Stability AI (Stable Image / DreamStudio)"),
|
||||
)
|
||||
|
||||
# PNG-text / EXIF keys that indicate a local diffusion pipeline (vs. a hosted
|
||||
|
||||
@@ -85,6 +85,9 @@ C2PA_ISSUERS = {
|
||||
b"Microsoft": "Microsoft",
|
||||
b"OpenAI": "OpenAI",
|
||||
b"Truepic": "Truepic",
|
||||
# Stability AI signs C2PA as "Stability AI" (cert org "Stability AI Ltd").
|
||||
# Verified on a live Brand Studio (DreamStudio successor) output, 2026-05-24.
|
||||
b"Stability AI": "Stability AI",
|
||||
}
|
||||
|
||||
# C2PA issuers whose signed outputs also carry an invisible SynthID pixel
|
||||
|
||||
@@ -51,6 +51,17 @@ class TestAttributePlatform:
|
||||
assert platform
|
||||
assert "signer" in platform.lower()
|
||||
|
||||
def test_microsoft_label_is_model_neutral(self):
|
||||
# Bing now runs MAI-Image, not DALL-E; the label must not claim DALL-E.
|
||||
platform = _attribute_platform(["Microsoft"])
|
||||
assert platform
|
||||
assert "DALL-E" not in platform
|
||||
|
||||
def test_stability(self):
|
||||
platform = _attribute_platform(["Stability AI"])
|
||||
assert platform
|
||||
assert "Stability AI" in platform
|
||||
|
||||
def test_empty_is_none(self):
|
||||
assert _attribute_platform([]) is None
|
||||
|
||||
@@ -100,6 +111,14 @@ class TestIdentifyNonPng:
|
||||
r = identify(path, check_visible=False)
|
||||
assert any("SynthID" in w for w in r.watermarks)
|
||||
|
||||
def test_stability_ai_issuer_attributed_no_synthid(self, tmp_path: Path):
|
||||
path = self._c2pa_jpeg(tmp_path, b"Stability AI ... trainedAlgorithmicMedia")
|
||||
r = identify(path, check_visible=False)
|
||||
assert r.is_ai_generated is True
|
||||
assert r.platform is not None
|
||||
assert "Stability AI" in r.platform
|
||||
assert not any("SynthID" in w for w in r.watermarks) # Stability does not use SynthID
|
||||
|
||||
def test_c2pa_without_ai_marker_is_unknown(self, tmp_path: Path):
|
||||
# Adobe signs C2PA on plain Photoshop edits too. Without an AI digital-
|
||||
# source marker, the honest verdict is unknown -- the C2PA watermark is
|
||||
|
||||
Reference in New Issue
Block a user