From e27f24f52069b8495c2259920295da2d55021515 Mon Sep 17 00:00:00 2001 From: test-user Date: Mon, 25 May 2026 12:37:15 -0700 Subject: [PATCH] test(samples): commit real Doubao fixture + AIGC real-sample test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit data/samples/doubao-1.png is the real #13 sample: carries the China TC260 XMP label and a visible '豆包AI生成' text mark (bottom-right). Grounds the AIGC detection on a real file (alongside the synthetic tests) and serves as the fixture for visible-watermark removal work. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 2 +- tests/test_metadata.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5e0dd92..59b7d3d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,7 +14,7 @@ You are a **principal Python engineer** maintaining a CLI tool and library for r - `bash maintain.sh` — uv-outdated, uv-secure, ruff check/fix, ruff format, pyright, pytest -n auto - `maintain.sh` may not finish fully green (pre-existing, not per-change): strict pyright carries debt in `remove_ai_metadata` / `cli.py` (untyped piexif/PIL/click/rich). (`uv-secure` is clean since idna was bumped 3.11 -> 3.16, fixing GHSA-65pc-fj4g-8rjx.) To gate a change, run `uv run ruff check`, `uv run pyright `, `uv run pytest` directly. - Run `uv run` from the repo root — from another cwd it falls back to a bare env without numpy/cv2/torch. -- Metadata/C2PA tests assert against real committed fixtures in `data/samples/` (`chatgpt-*.png` = OpenAI C2PA, `firefly-1.png` = Adobe, `not-ai-*` = clean); synthetic byte blobs cover the JPEG/ISOBMFF format paths. +- Metadata/C2PA tests assert against real committed fixtures in `data/samples/` (`chatgpt-*.png` = OpenAI C2PA, `firefly-1.png` = Adobe, `mj-*` = Midjourney IPTC, `doubao-1.png` = ByteDance Doubao with the China TC260 `` XMP label **and** a visible "豆包AI生成" text mark bottom-right, `not-ai-*` = clean); synthetic byte blobs cover the JPEG/ISOBMFF format paths. - SynthID reference corpus: `scripts/synthid_corpus.py` ingests labeled images into `data/synthid_corpus/` (`manifest.csv` tracked, `images/` gitignored); see its README for the collection protocol and verification oracles. ## Configuration diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 7dafa8a..1ee3889 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -433,3 +433,20 @@ class TestAIGCLabel: meta = get_ai_metadata(self._aigc_png(tmp_path)) assert "aigc_label" in meta assert "TC260" in meta["aigc_label"] + + +@pytest.mark.skipif(not (SAMPLES_DIR / "doubao-1.png").exists(), reason="doubao sample not present") +class TestAIGCRealSample: + """Real Doubao (ByteDance) sample carries the China TC260 AIGC XMP label.""" + + def test_doubao_aigc_label(self): + from remove_ai_watermarks.metadata import aigc_label + + info = aigc_label(SAMPLES_DIR / "doubao-1.png") + assert info is not None + assert info["Label"] == "1" + assert info["ContentProducer"] # ByteDance producer code present + + def test_doubao_detected_as_ai(self): + assert has_ai_metadata(SAMPLES_DIR / "doubao-1.png") + assert "aigc_label" in get_ai_metadata(SAMPLES_DIR / "doubao-1.png")