From 0c215b5b2fe3b4643e1016815c48fe1a756ee92d Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Sat, 20 Jun 2026 15:34:20 -0700 Subject: [PATCH 1/4] feat(identify): C2PA vendor coverage, AI-enhanced split, detect/remove threshold unify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Retained-corpus mining (2026-06-20) surfaced three provenance gaps; all are oracle-free and regression-guarded. - C2PA vendor coverage (roadmap): register Volcano Engine under its Chinese legal entity 北京火山引擎科技有限公司 (the latin "volcengine" needle misses those certs) -> normalizes to the same ByteDance platform; register ElevenLabs ("Eleven Labs Inc.", pure generative-AI) as a generator. Document the deliberate exclusion of TikTok Inc. and PixelBin.io/"Fynd" (provenance/transform signers, not generators) so they are not re-added. - AI-generated vs AI-enhanced (roadmap): ProvenanceReport.ai_source_kind splits the C2PA digital-source-type into "generated" (trainedAlgorithmicMedia) vs "enhanced" (compositeWithTrainedAlgorithmicMedia) so a caller branches a full-frame scrub from a region-targeted clean. Parsed once in noai.c2pa._populate_registry_fields (PNG + any c2pa-python-readable container), with a raw head-scan fallback in identify for the non-PNG raw-blob path. CLI verdict reads "AI-generated (fully synthetic)" vs "AI-enhanced (real content with an AI-composited region)"; surfaced in --json. - Detect-vs-remove threshold desync (P0#7): identify's sparkle threshold and the removal arbitration gate were two independent 0.5 constants. Unify them into the single GEMINI_SPARKLE_TRUST_CONF (identify imports it) so they can never drift. Lowering the gate to recover faint sub-0.5 sparkles was evaluated and REJECTED: a real Doubao text mark scores ~0.40-0.42 as a gemini match with a higher core-ring brightness margin than a genuine faint sparkle, so neither confidence nor the brightness gate separates them in [0.35, 0.5) -- lowering would trade a rare miss for false-positive removals on clean images. Regression-guarded by TestSparkleDetectRemoveAlignment (real demo sparkle at borderline opacities; identify and best_auto_mark must agree on either side of the line). Co-Authored-By: Claude Opus 4.8 --- src/remove_ai_watermarks/cli.py | 7 ++ src/remove_ai_watermarks/identify.py | 40 +++++-- src/remove_ai_watermarks/noai/c2pa.py | 8 ++ src/remove_ai_watermarks/noai/constants.py | 32 +++++ .../watermark_registry.py | 28 +++-- tests/test_identify.py | 109 ++++++++++++++++++ 6 files changed, 209 insertions(+), 15 deletions(-) diff --git a/src/remove_ai_watermarks/cli.py b/src/remove_ai_watermarks/cli.py index 667258d..40918d4 100644 --- a/src/remove_ai_watermarks/cli.py +++ b/src/remove_ai_watermarks/cli.py @@ -881,6 +881,13 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo _banner() verdict = {True: "AI-generated", False: "not AI", None: "unknown"}[report.is_ai_generated] + # Sharpen the True verdict when the C2PA source type says the image is a real + # photo with an AI-composited region rather than a full AI generation, so the + # caller (and the user) can tell "scrub the whole frame" from "scrub the AI region". + if report.is_ai_generated and report.ai_source_kind == "enhanced": + verdict = "AI-enhanced (real content with an AI-composited region)" + elif report.is_ai_generated and report.ai_source_kind == "generated": + verdict = "AI-generated (fully synthetic)" console.print(f"\n Verdict: {verdict} (confidence: {report.confidence})") console.print(f" Platform: {report.platform or 'undetermined'}") diff --git a/src/remove_ai_watermarks/identify.py b/src/remove_ai_watermarks/identify.py index 63175fe..1d3d10d 100644 --- a/src/remove_ai_watermarks/identify.py +++ b/src/remove_ai_watermarks/identify.py @@ -42,6 +42,7 @@ from remove_ai_watermarks.metadata import ( ) from remove_ai_watermarks.noai.c2pa import cbor_text_after, extract_c2pa_info, soft_binding_vendors_in from remove_ai_watermarks.noai.constants import C2PA_AI_TOOLS, C2PA_AI_VENDORS, C2PA_ISSUERS +from remove_ai_watermarks.watermark_registry import GEMINI_SPARKLE_TRUST_CONF if TYPE_CHECKING: from pathlib import Path @@ -57,11 +58,14 @@ log = logging.getLogger(__name__) _SCAN_BYTES = 1024 * 1024 # Visible-sparkle confidence above which the signal is trusted as provenance. -# Stricter than the removal default (0.25): on the corpus, Gemini-family -# sparkles score >= 0.56 while non-sparkle images top out at 0.49, so 0.5 -# cleanly separates them and avoids false positives when sparkle is the only -# signal (e.g. an OpenAI image scored 0.37 -- below threshold, correctly dropped). -_SPARKLE_THRESHOLD = 0.5 +# Shared with the removal arbitration (watermark_registry.GEMINI_SPARKLE_TRUST_CONF) +# so the provenance "is there a sparkle" verdict and the removal "take the sparkle" +# decision can never drift apart -- the detect-vs-remove desync the retained-corpus +# mining surfaced (2026-06-20). On the corpus Gemini-family sparkles score >= 0.56 +# while non-sparkle images top out at 0.49, so 0.5 cleanly separates them and avoids +# false positives when the sparkle is the only signal (e.g. an OpenAI image scored +# 0.37 -- below threshold, correctly dropped). +_SPARKLE_THRESHOLD = GEMINI_SPARKLE_TRUST_CONF # Issuer (C2PA signer) -> human-readable generating platform, derived from the # single C2PA_AI_VENDORS registry. Ordered: when a manifest names several issuers @@ -132,6 +136,14 @@ class ProvenanceReport: is_ai_generated: bool | None # True / False is never asserted; None = unknown platform: str | None confidence: str # "high" | "medium" | "none" + # Coarse AI-origin kind from the C2PA digital-source-type, so a caller can + # branch on full generation vs an AI-touched real photo: + # "generated" -- digitalSourceType trainedAlgorithmicMedia (fully AI). + # "enhanced" -- compositeWithTrainedAlgorithmicMedia (real content with an + # AI-composited region; scrub the AI region, keep the photo). + # None -- no C2PA AI source-type (verdict, if AI, came from another + # signal: IPTC, AIGC, local gen params, xAI, ...). + ai_source_kind: str | None = None watermarks: list[str] = field(default_factory=list[str]) signals: list[Signal] = field(default_factory=list["Signal"]) caveats: list[str] = field(default_factory=list[str]) @@ -484,9 +496,18 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b # ── C2PA Content Credentials ──────────────────────────────────── has_c2pa = bool(info) or c2pa_marker_in(head) issuers = [info["issuer"]] if info.get("issuer") else _issuers_in(head) - c2pa_is_ai = "trainedAlgorithmicMedia" in info.get("source_type", "") or any( - m in head for m in (b"trainedAlgorithmicMedia", b"compositeWithTrainedAlgorithmicMedia") - ) + # Full AI generation (trainedAlgorithmicMedia) vs an AI-enhanced real photo + # (compositeWithTrainedAlgorithmicMedia). The structured kind is parsed once in + # noai.c2pa._populate_registry_fields (covers PNG + any container the c2pa-python + # reader handles); fall back to a raw head scan for the non-PNG raw-blob path + # where extract_c2pa_info returns {}. Full generation wins when both appear. + c2pa_source_kind = info.get("ai_source_kind") + if c2pa_source_kind is None: + if b"trainedAlgorithmicMedia" in head: + c2pa_source_kind = "generated" + elif b"compositeWithTrainedAlgorithmicMedia" in head: + c2pa_source_kind = "enhanced" + c2pa_is_ai = c2pa_source_kind is not None # Generator string (for the signal detail): structured for PNG, CBOR-scanned # for other containers. Best-effort -- some manifests key it as # `claim_generator_info` (Pixel), so this can be None even when a device is @@ -734,6 +755,9 @@ def identify(image_path: Path, *, check_visible: bool = True, check_invisible: b is_ai_generated=is_ai, platform=platform, confidence=confidence, + # Only meaningful when the AI verdict actually came from the C2PA source + # type; a non-C2PA AI signal (IPTC/AIGC/local gen) leaves it None. + ai_source_kind=c2pa_source_kind if (is_ai and has_c2pa) else None, watermarks=watermarks, signals=signals, caveats=caveats, diff --git a/src/remove_ai_watermarks/noai/c2pa.py b/src/remove_ai_watermarks/noai/c2pa.py index 8b882af..72a5386 100644 --- a/src/remove_ai_watermarks/noai/c2pa.py +++ b/src/remove_ai_watermarks/noai/c2pa.py @@ -363,14 +363,22 @@ def _populate_registry_fields(buf: bytes, c2pa_info: dict[str, Any]) -> bool: # Digital source type (matched anywhere in the store, including ingredient # manifests -- a ChatGPT edit of a Sora generation carries the AI marker on # the parent, not the active manifest). + # ``ai_source_kind`` is the structured generated-vs-enhanced split the caller + # branches on (full-frame scrub vs region-targeted clean); ``source_type`` is the + # human-readable form. The two byte strings are unambiguous: + # "compositeWithTrainedAlgorithmicMedia" capitalizes the inner "Trained", so a + # lowercase "trainedAlgorithmicMedia" match is standalone full generation, which + # wins when both appear (an edit chain). ai_source = False if b"trainedAlgorithmicMedia" in buf: c2pa_info["source_type"] = "trainedAlgorithmicMedia (AI-generated)" + c2pa_info["ai_source_kind"] = "generated" ai_source = True elif b"algorithmicMedia" in buf: c2pa_info["source_type"] = "algorithmicMedia" elif b"compositeWithTrainedAlgorithmicMedia" in buf: c2pa_info["source_type"] = "compositeWithTrainedAlgorithmicMedia (AI-enhanced)" + c2pa_info["ai_source_kind"] = "enhanced" ai_source = True # SynthID pixel-watermark proxy: a C2PA manifest from a SynthID-using diff --git a/src/remove_ai_watermarks/noai/constants.py b/src/remove_ai_watermarks/noai/constants.py index 764000f..84e5855 100644 --- a/src/remove_ai_watermarks/noai/constants.py +++ b/src/remove_ai_watermarks/noai/constants.py @@ -122,6 +122,20 @@ C2PA_AI_VENDORS: tuple[C2paAiVendor, ...] = ( C2paAiVendor( b"volcengine", "ByteDance (Volcano Engine)", "ByteDance (Doubao / Jimeng / Volcano Engine)", "ByteDance" ), + # Some Volcano Engine certs name the signer with the Chinese legal entity + # "北京火山引擎科技有限公司" (Beijing Volcano Engine Technology Co., Ltd.) rather + # than the latin "volcengine" -- the latin needle misses it entirely, so real + # ByteDance output was un-attributed in production traffic. The issuer is the + # UTF-8 of the Chinese name (it appears UTF-8-encoded in the manifest-store + # JSON and the raw caBX bytes alike); it normalizes to the same "ByteDance" + # needle and platform as the volcengine row, so the two collapse together for + # clash detection. Verified against the mined retained corpus, 2026-06-20. + C2paAiVendor( + "北京火山引擎科技有限公司".encode(), + "ByteDance (Volcano Engine)", + "ByteDance (Doubao / Jimeng / Volcano Engine)", + "ByteDance", + ), # ByteDance's international brand (BytePlus / Seedream / Seededit) signs its # cert as "Byteplus Pte. Ltd." -- the bare ``volcengine`` needle misses it, so # real BytePlus AI output was mis-attributed (an incidental "Adobe XMP" string @@ -136,11 +150,29 @@ C2PA_AI_VENDORS: tuple[C2paAiVendor, ...] = ( # source read AI but no platform was attributed. Verified on real signed files # in production traffic, 2026-06-19. Canva does not use SynthID. C2paAiVendor(b"Canva", "Canva", "Canva (Magic Media)", "Canva"), + # ElevenLabs is a pure generative-AI company (AI voice / audio, and image / + # video via its API); it signs output as "Eleven Labs Inc.", so the C2PA + # manifest alone marks AI generation. Verified against the mined retained + # corpus, 2026-06-20. ElevenLabs does not use SynthID. + C2paAiVendor(b"Eleven Labs", "ElevenLabs", "ElevenLabs", "ElevenLabs"), # Truepic is a C2PA signing authority, not an AI generator: no platform label, # never asserts is_ai (the verdict comes from the digital-source-type). C2paAiVendor(b"Truepic", "Truepic", None, None), ) +# Deliberately NOT registered as AI-generation vendors (mined-corpus candidates +# evaluated 2026-06-20): +# - TikTok Inc.: signs C2PA as a content-provenance / AI-labeling authority on +# uploads, not as an image generator. The is_ai verdict keys off the +# digitalSourceType (trainedAlgorithmicMedia), which is already honored; a +# bare TikTok signer marks distribution provenance, not generation, so adding +# it as a generator needle would mis-label human uploads as AI. +# - PixelBin.io (issuer "Fynd"): an image transformation / optimization / CDN +# service. Its C2PA stamps a transform/upload step, not a generation event. +# Both are excluded to avoid false-positive AI attribution; re-evaluate only +# against a real signed file whose manifest carries a trainedAlgorithmicMedia +# digital-source type produced by the vendor itself. + # Derived view -- add a vendor to C2PA_AI_VENDORS above, not here. # C2PA issuer signature -> resolved org name, for the manifest byte-scan. C2PA_ISSUERS: dict[bytes, str] = {v.issuer: v.org for v in C2PA_AI_VENDORS} diff --git a/src/remove_ai_watermarks/watermark_registry.py b/src/remove_ai_watermarks/watermark_registry.py index bcc3935..39e042c 100644 --- a/src/remove_ai_watermarks/watermark_registry.py +++ b/src/remove_ai_watermarks/watermark_registry.py @@ -90,13 +90,27 @@ class KnownMark: return self._remove(image, inpaint_method, inpaint, inpaint_strength, force) -# Gemini-sparkle confidence above which the registry treats it as a confident -# detection for arbitration. Matches identify's corpus-validated sparkle -# threshold (0.5): the gemini engine's own detect flag uses a looser internal -# threshold and weakly fires (~0.36) on unrelated bottom-right text (e.g. the -# Doubao mark), which would otherwise let it hijack `--mark auto`. 0.5 gives 0 -# false positives on the corpus. -_GEMINI_AUTO_MIN_CONF = 0.5 +# Single source of truth for the Gemini-sparkle "trust this as a real mark" +# confidence, shared by BOTH the removal arbitration here (`best_auto_mark` / +# `_gemini_detect`) and the provenance detector in `identify` (which imports it +# as its sparkle threshold). Defining it once removes the detect-vs-remove +# threshold drift the retained-corpus mining surfaced (2026-06-20): identify +# would report a sparkle while removal declined it, or vice versa, whenever the +# two independently-maintained 0.5 constants fell out of step. Now they cannot. +# +# Value 0.5 is corpus-validated: the gemini engine's own `detected` flag uses a +# looser internal threshold (0.35) and weakly fires (~0.36-0.42) on unrelated +# bottom-right text -- a real Doubao mark scores ~0.40-0.42 as a gemini match, +# and its core-ring brightness margin is HIGHER than a genuine faint sparkle's, +# so neither confidence nor the brightness gate separates them in the [0.35, 0.5) +# band. Lowering this gate to recover faint sparkles was evaluated against that +# band (2026-06-20) and REJECTED: it cannot be done without re-admitting the +# Doubao-text / content false positives, trading a rare miss for false-positive +# removals on clean images. The band below the gate is therefore intentionally +# left to the higher-strength / metadata paths. 0.5 gives 0 false positives on +# the corpus. +GEMINI_SPARKLE_TRUST_CONF = 0.5 +_GEMINI_AUTO_MIN_CONF = GEMINI_SPARKLE_TRUST_CONF # ── Engine adapters (lazy singletons; engines are cv2-only, no model load) ── diff --git a/tests/test_identify.py b/tests/test_identify.py index 91413f3..f0c39fb 100644 --- a/tests/test_identify.py +++ b/tests/test_identify.py @@ -24,6 +24,7 @@ from remove_ai_watermarks.identify import ( _vendor_of, identify, ) +from remove_ai_watermarks.watermark_registry import GEMINI_SPARKLE_TRUST_CONF # Where the lazy import inside identify._visible_sparkle resolves the detector. _SPARKLE_TARGET = "remove_ai_watermarks.gemini_engine.detect_sparkle_confidence" @@ -140,6 +141,23 @@ class TestIdentifyNonPng: assert r.is_ai_generated is True assert "ByteDance" in (r.platform or "") + def test_bytedance_chinese_legal_name_attributed(self, tmp_path: Path): + # Some Volcano Engine certs name the signer with the Chinese legal entity + # rather than the latin "volcengine"; the latin needle misses it, so the + # Chinese-name registry entry is what attributes real ByteDance output. + blob = "北京火山引擎科技有限公司".encode() + b" ... trainedAlgorithmicMedia" + path = self._c2pa_jpeg(tmp_path, blob) + r = identify(path, check_visible=False, check_invisible=False) + assert r.is_ai_generated is True + assert "ByteDance" in (r.platform or "") + + def test_elevenlabs_attributed(self, tmp_path: Path): + path = self._c2pa_jpeg(tmp_path, b"Eleven Labs Inc. ... trainedAlgorithmicMedia") + r = identify(path, check_visible=False, check_invisible=False) + assert r.is_ai_generated is True + assert r.platform == "ElevenLabs" + assert not any("SynthID" in w for w in r.watermarks) # ElevenLabs does not use SynthID + def test_stability_ai_issuer_attributed_no_synthid(self, tmp_path: Path): path = self._c2pa_jpeg(tmp_path, b"Stability AI ... trainedAlgorithmicMedia") r = identify(path, check_visible=False) @@ -148,6 +166,21 @@ class TestIdentifyNonPng: assert "Stability AI" in r.platform assert not any("SynthID" in w for w in r.watermarks) # Stability does not use SynthID + def test_trained_source_is_generated_kind(self, tmp_path: Path): + path = self._c2pa_jpeg(tmp_path, b"OpenAI ... trainedAlgorithmicMedia") + r = identify(path, check_visible=False, check_invisible=False) + assert r.is_ai_generated is True + assert r.ai_source_kind == "generated" + + def test_composite_source_is_enhanced_kind(self, tmp_path: Path): + # compositeWithTrainedAlgorithmicMedia: a real photo with an AI-composited + # region. Still AI (is_ai True), but the kind must read "enhanced" so a + # caller can do region-targeted cleaning instead of a full-frame regen. + path = self._c2pa_jpeg(tmp_path, b"Adobe ... compositeWithTrainedAlgorithmicMedia") + r = identify(path, check_visible=False, check_invisible=False) + assert r.is_ai_generated is True + assert r.ai_source_kind == "enhanced" + def test_c2pa_without_ai_marker_is_unknown(self, tmp_path: Path): # Adobe signs C2PA on plain Photoshop edits too. Without an AI digital- # source marker, the honest verdict is unknown -- the C2PA watermark is @@ -202,6 +235,16 @@ class TestIdentifySamsungGalaxy: assert r.platform == "ASUS Gallery (C2PA signer)" assert any("C2PA" in w for w in r.watermarks) + def test_galaxy_capture_without_ai_marker_is_not_ai(self, tmp_path: Path): + # A genuine Galaxy phone capture carries Samsung Galaxy C2PA provenance but + # NO AI source-type / genAIType. It must stay is_ai=None -- the device cert + # is authenticity provenance of a real photo, not an AI-generation signal. + path = self._jpeg(tmp_path, "s25_capture.jpg", b"Samsung Galaxy Galaxy S25 c2pa-rs no ai marker") + r = identify(path, check_visible=False, check_invisible=False) + assert r.is_ai_generated is None + assert r.platform == "Samsung Galaxy (C2PA)" + assert any("C2PA" in w for w in r.watermarks) + # ── End-to-end verdicts on real fixtures ──────────────────────────── @@ -277,6 +320,12 @@ class TestIdentifyLocalParams: assert "parameters" in signal.detail assert signal.confidence == "high" + def test_local_gen_params_have_no_c2pa_source_kind(self, tmp_png_with_ai_metadata: Path): + # AI verdict from local SD params (not C2PA) -> ai_source_kind stays None. + r = identify(tmp_png_with_ai_metadata, check_visible=False) + assert r.is_ai_generated is True + assert r.ai_source_kind is None + def test_clean_png_is_unknown(self, tmp_clean_png: Path): r = identify(tmp_clean_png, check_visible=False) assert r.is_ai_generated is None @@ -399,6 +448,66 @@ class TestIdentifyVisibleSparkle: assert r.confidence == "high" +REPO_ROOT = Path(__file__).resolve().parent.parent +_DEMO_BEFORE = REPO_ROOT / "demo_banana_before.png" +_DEMO_AFTER = REPO_ROOT / "demo_banana_after.png" + + +@pytest.mark.skipif(not (_DEMO_BEFORE.exists() and _DEMO_AFTER.exists()), reason="demo banana pair not present") +class TestSparkleDetectRemoveAlignment: + """Detect (identify) and remove (registry.best_auto_mark) must agree on the + same image -- the retained-corpus desync where identify reported a sparkle the + removal arbitration declined (or vice versa). Both gate on the single shared + GEMINI_SPARKLE_TRUST_CONF, so a sparkle just over the line is taken by BOTH + and one just under is declined by BOTH. Fixtures composite the real captured + sparkle (before-minus-after) back at reduced opacity to land on either side. + """ + + @staticmethod + def _faint_sparkle(tmp_path: Path, opacity: float) -> Path: + import numpy as np + + from remove_ai_watermarks import image_io + + before = image_io.imread(_DEMO_BEFORE).astype("float32") + after = image_io.imread(_DEMO_AFTER).astype("float32") + faint = np.clip(after + opacity * (before - after), 0, 255).astype("uint8") + out = tmp_path / f"sparkle_{int(opacity * 100)}.png" + image_io.imwrite(out, faint) + return out + + def _detect_remove(self, path: Path) -> tuple[bool, bool, float]: + from remove_ai_watermarks import image_io, watermark_registry + from remove_ai_watermarks.gemini_engine import detect_sparkle_confidence + + conf = detect_sparkle_confidence(path) or 0.0 + identify_fires = conf >= GEMINI_SPARKLE_TRUST_CONF + best = watermark_registry.best_auto_mark(image_io.imread(path)) + remove_takes_gemini = best is not None and best.key == "gemini" + return identify_fires, remove_takes_gemini, conf + + def test_above_threshold_both_fire(self, tmp_path: Path): + path = self._faint_sparkle(tmp_path, 0.7) # ~0.55 conf, just over the line + identify_fires, remove_takes, conf = self._detect_remove(path) + assert conf >= GEMINI_SPARKLE_TRUST_CONF + assert identify_fires, f"identify declined a sparkle above threshold (conf={conf:.3f})" + assert remove_takes, f"removal declined a sparkle above threshold (conf={conf:.3f})" + + def test_below_threshold_both_decline(self, tmp_path: Path): + path = self._faint_sparkle(tmp_path, 0.5) # ~0.37 conf, just under the line + identify_fires, remove_takes, conf = self._detect_remove(path) + assert conf < GEMINI_SPARKLE_TRUST_CONF + assert not identify_fires, f"identify fired below threshold (conf={conf:.3f})" + assert not remove_takes, f"removal fired below threshold (conf={conf:.3f})" + + def test_full_strength_both_fire(self): + # The shipped demo sparkle at full strength: unambiguous agreement. + identify_fires, remove_takes, conf = self._detect_remove(_DEMO_BEFORE) + assert conf >= GEMINI_SPARKLE_TRUST_CONF + assert identify_fires + assert remove_takes + + class TestIdentifyImportIsLight: """`import identify` must stay torch-free (lazy noai/__init__): the package is deployed on a 512 MB host where eagerly pulling torch/diffusers OOMs.""" From 33fddbc6fac01a5c5d77d86e2e0d610e4630dd78 Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Sat, 20 Jun 2026 15:34:39 -0700 Subject: [PATCH 2/4] fix(visible): over-subtraction guard for Doubao/Jimeng/Samsung text marks Port the Gemini sparkle dark-pit guard (commit 41f6797) to the shared TextMarkEngine reverse-alpha base (roadmap P0#8): on a dark or mid-tone background the captured alpha can over-estimate this image's mark opacity, and reverse-alpha leaves a darker-than-background glyph ghost instead of recovering the true pixels. The sparkle-only fix left the text marks unhandled. _reverse_alpha_oversubtracts predicts the reverse-alpha output PER PIXEL over the glyph body from the INPUT ((obs - a*logo)/(1-a), the remover's own math); when the predicted body lands more than _OVERSUB_DARK_MARGIN (25) gray levels below the local background ring it abandons the reverse-alpha output for the footprint and inpaints it from the original surroundings (_inpaint_footprint, wider dilate/ radius than the thin residual pass). Predicting per-pixel from the input (not the produced output, which depends on which placement the remover picked) keeps a cleanly captured full-strength mark byte-identical -- it predicts back to the background everywhere, so the guard never trips on it (verified across all three engines on white/mid/dark/midgray backgrounds). Regression-guarded by tests/test_text_mark_oversubtraction.py: predicate True on faint / False on clean, end-to-end no-dark-pit acceptance, clean-mark byte identity, and textured-background footprint recovery. Co-Authored-By: Claude Opus 4.8 --- src/remove_ai_watermarks/_text_mark_engine.py | 97 ++++++++++++++ tests/test_text_mark_oversubtraction.py | 120 ++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 tests/test_text_mark_oversubtraction.py diff --git a/src/remove_ai_watermarks/_text_mark_engine.py b/src/remove_ai_watermarks/_text_mark_engine.py index 5e4fbc8..226a3f2 100644 --- a/src/remove_ai_watermarks/_text_mark_engine.py +++ b/src/remove_ai_watermarks/_text_mark_engine.py @@ -37,6 +37,28 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# Reverse-alpha over-subtraction guard (ported from gemini_engine, 2026-06-20). +# The reverse-alpha blend ``(wm - a*logo)/(1-a)`` over-subtracts when the captured +# alpha over-estimates THIS image's mark opacity: on a dark or mid-tone background +# it drives the glyph footprint into a visibly DARKER-than-background ghost (a +# "dark pit") instead of recovering the true pixels. The retained-corpus mining +# (2026-06-20) showed the sparkle-only fix (commit 41f6797) left this unhandled +# for the Doubao/Jimeng text marks. Mirror the sparkle gate: when the recovered +# glyph body lands more than this many gray levels below the local background +# ring, abandon the reverse-alpha output for the footprint and inpaint it from +# the surroundings instead. Calibrated to the same 25-level margin the sparkle +# gate uses -- clean text-mark removals recover within ~10 of the ring, the dark +# pit lands tens of levels below. +_OVERSUB_DARK_MARGIN = 25.0 +# Glyph-body / background-ring sampling for the guard. The ring is a pad around +# the glyph box (excluding the box); the body is the bright-core glyph pixels. +_OVERSUB_RING_PAD_FRAC = 0.6 # ring pad as a fraction of the glyph-box height +_OVERSUB_BODY_ALPHA_FLOOR = 0.15 # alpha above which a block pixel counts as glyph body +# Footprint inpaint when the guard trips: dilate the glyph mask wider than the +# thin residual pass so the whole darkened ghost is reconstructed, not just its edge. +_OVERSUB_INPAINT_DILATE = 9 +_OVERSUB_INPAINT_RADIUS = 4 + @dataclass(frozen=True) class TextMarkConfig: @@ -335,6 +357,74 @@ class TextMarkEngine: out[y1:y2, x1:x2] = np.clip((roi - a3 * logo) / np.clip(1.0 - a3, 0.25, 1.0), 0, 255).astype(np.uint8) return out + def _reverse_alpha_oversubtracts( + self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int] + ) -> bool: + """True when reverse-alpha would darken the glyph footprint into a dark pit. + + Ported from ``gemini_engine._reverse_alpha_oversubtracts`` (2026-06-20): + PREDICT the reverse-alpha output at the bright glyph core directly from the + INPUT and the captured alpha, ``(core_obs - a*logo)/(1-a)``, and trip when it + lands more than ``_OVERSUB_DARK_MARGIN`` gray levels below the local + background ring. Predicting from the input (not the produced output) keeps the + gate independent of which placement the reverse-alpha picked, so a clean + full-strength mark (whose strokes predict back to the background) never trips, + while a mark fainter than the capture (over-subtracted into a ghost) does. + """ + ax, ay, gw, gh = region + ih, iw = image.shape[:2] + if gw < 4 or gh < 4: + return False + if float(amap.max()) < 0.2: # too faint a capture to over-subtract meaningfully + return False + body_box = amap >= _OVERSUB_BODY_ALPHA_FLOOR # glyph strokes + if not bool(body_box.any()): + return False + pad = max(4, int(gh * _OVERSUB_RING_PAD_FRAC)) + ry1, ry2 = max(0, ay - pad), min(ih, ay + gh + pad) + rx1, rx2 = max(0, ax - pad), min(iw, ax + gw + pad) + ring = image[ry1:ry2, rx1:rx2].astype(np.float32).mean(axis=2) + fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw + ring_mask = np.ones(ring.shape, dtype=bool) + ring_mask[fy1:fy2, fx1:fx2] = False + if int(ring_mask.sum()) < 10: + return False + # Predict the reverse-alpha output PER PIXEL over the glyph body -- exactly + # the (obs - a*logo)/(1-a) math the remover applies -- so a cleanly captured + # mark predicts back to the true background everywhere (no trip), while a mark + # fainter than the capture predicts a body far below the local ring. The + # per-pixel alpha (not a single peak value) keeps the prediction faithful + # across the glyph's anti-aliased alpha gradient. + obs = ring[fy1:fy2, fx1:fx2] + a = np.clip(amap, 0.0, 0.99) + logo = float(np.mean(self.config.alpha_logo_bgr)) + predicted = (obs - a * logo) / (1.0 - a) + predicted_core = float(np.median(predicted[body_box])) + bg = float(np.median(ring[ring_mask])) + oversub = predicted_core < bg - _OVERSUB_DARK_MARGIN + if oversub: + logger.debug( + "%s reverse-alpha over-subtracts: predicted core=%.1f bg=%.1f (margin %.0f) -> footprint inpaint", + self.config.name, + predicted_core, + bg, + _OVERSUB_DARK_MARGIN, + ) + return oversub + + def _inpaint_footprint( + self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int] + ) -> NDArray[Any]: + """Reconstruct the glyph footprint from its surroundings (used when + reverse-alpha would over-subtract into a dark pit). Inpaints the ORIGINAL + image over a dilated glyph mask, so the result never contains the darkened + reverse-alpha pixels.""" + ax, ay, gw, gh = region + mask = np.zeros(image.shape[:2], np.uint8) + mask[ay : ay + gh, ax : ax + gw] = (amap > self.config.residual_alpha_floor).astype(np.uint8) * 255 + mask = cv2.dilate(mask, np.ones((_OVERSUB_INPAINT_DILATE, _OVERSUB_INPAINT_DILATE), np.uint8)) + return cv2.inpaint(image, mask, _OVERSUB_INPAINT_RADIUS, cv2.INPAINT_NS) + def remove_watermark_reverse_alpha(self, image: NDArray[Any], *, residual_inpaint: bool = True) -> NDArray[Any]: """Recover the original pixels by inverting the alpha blend, then clear the residual outline with a thin inpaint over the glyph footprint. @@ -370,6 +460,13 @@ class TextMarkEngine: best_residual, best_out, best_amap, best_region = residual, out, amap, region if best_out is None or best_amap is None or best_region is None: # pragma: no cover - maps is non-empty return image.copy() + # Over-subtraction guard: on a dark/mid-tone background the captured alpha can + # over-estimate the mark's opacity and reverse-alpha leaves a darker-than- + # background ghost. When the recovered glyph body sits far below the local + # ring, reconstruct the footprint from its surroundings instead of shipping the + # dark pit (the thin residual inpaint cannot fix a footprint-wide darkening). + if self._reverse_alpha_oversubtracts(image, best_amap, best_region): + return self._inpaint_footprint(image, best_amap, best_region) if residual_inpaint: # Embed the glyph-sized alpha block into a full-frame uint8 mask only for # the inpaint (cv2.inpaint needs a mask matching best_out). One uint8 diff --git a/tests/test_text_mark_oversubtraction.py b/tests/test_text_mark_oversubtraction.py new file mode 100644 index 0000000..d24d71b --- /dev/null +++ b/tests/test_text_mark_oversubtraction.py @@ -0,0 +1,120 @@ +"""Reverse-alpha over-subtraction guard for the visible text-mark engines. + +Ported from the Gemini sparkle fix (commit 41f6797) to Doubao/Jimeng/Samsung +(retained-corpus mining 2026-06-20, roadmap P0#8): on a dark or mid-tone +background the captured alpha can over-estimate THIS image's mark opacity, and +reverse-alpha leaves a darker-than-background glyph ghost (a "dark pit") instead +of recovering the true pixels. The guard predicts the reverse-alpha output per +pixel and, when the glyph body lands far below the local ring, reconstructs the +footprint from the original surroundings instead of shipping the pit. + +These assert visual residual (pixel levels vs the local background), not just a +detector re-fire -- a dark pit can clear the NCC detector while still looking wrong. +""" + +from __future__ import annotations + +import numpy as np +import pytest + +from remove_ai_watermarks import image_io +from remove_ai_watermarks._text_mark_engine import _OVERSUB_DARK_MARGIN +from remove_ai_watermarks.doubao_engine import DoubaoEngine +from remove_ai_watermarks.jimeng_engine import JimengEngine +from remove_ai_watermarks.samsung_engine import SamsungEngine + +_ENGINES = [DoubaoEngine, JimengEngine, SamsungEngine] + + +def _compose(engine, bg: float, opacity_gain: float, w: int = 1024, h: int = 1024): + """Composite the engine's captured mark onto a flat ``bg`` at ``opacity_gain``. + + ``opacity_gain < 1`` makes the mark FAINTER than the capture, so reverse-alpha + at the full captured alpha over-subtracts into a dark pit -- the case the guard + must catch. Returns ``(watermarked_uint8, alpha_block, region)`` where the block + and region are exactly what the engine's reverse-alpha receives. + """ + img = np.full((h, w, 3), float(bg), np.float32) + block, (ax, ay, gw, gh) = engine._fixed_alpha_map(img) + a = np.clip(block * opacity_gain, 0.0, 0.99)[:, :, None] + logo = np.array(engine.config.alpha_logo_bgr, np.float32) + img[ay : ay + gh, ax : ax + gw] = img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a + return np.clip(img, 0, 255).astype(np.uint8), block, (ax, ay, gw, gh) + + +def _body_vs_ring(out, region, block) -> tuple[float, float]: + """Median luma of the glyph body vs the local background ring in ``out``.""" + ax, ay, gw, gh = region + g = out.astype(np.float32).mean(axis=2) + body = block >= 0.15 + pad = max(4, int(gh * 0.6)) + ry1, ry2 = max(0, ay - pad), min(g.shape[0], ay + gh + pad) + rx1, rx2 = max(0, ax - pad), min(g.shape[1], ax + gw + pad) + ring = g[ry1:ry2, rx1:rx2] + fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw + ring_mask = np.ones(ring.shape, dtype=bool) + ring_mask[fy1:fy2, fx1:fx2] = False + core = float(np.median(g[ay : ay + gh, ax : ax + gw][body])) + return core, float(np.median(ring[ring_mask])) + + +@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__) +class TestOversubtractionGuard: + @pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4), (90, 0.5)]) + def test_guard_trips_on_faint_mark(self, Engine, bg, gain): + eng = Engine() + wm, block, region = _compose(eng, bg, gain) + assert eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region) + + @pytest.mark.parametrize("bg", [255, 200, 128, 60]) + def test_guard_skips_clean_full_strength_mark(self, Engine, bg): + # A cleanly captured (gain 1.0) mark predicts back to the background, so the + # guard must NOT trip -- no regression of the common clean-removal path. + eng = Engine() + wm, block, region = _compose(eng, bg, 1.0) + assert not eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region) + + @pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4)]) + def test_faint_removal_leaves_no_dark_pit(self, Engine, bg, gain): + # End-to-end acceptance (roadmap P0#8): after removal the glyph footprint is + # not a region more than _OVERSUB_DARK_MARGIN below the local background. + eng = Engine() + wm, block, region = _compose(eng, bg, gain) + out = eng.remove_watermark_reverse_alpha(wm) + core, ring_bg = _body_vs_ring(out, region, block) + assert core >= ring_bg - _OVERSUB_DARK_MARGIN, f"dark pit: body {core:.0f} vs ring {ring_bg:.0f}" + + def test_clean_mark_removal_unchanged_by_guard(self, Engine, monkeypatch): + # On a clean mark the guard must be a no-op: forcing it off yields the same + # output (the guard only ever diverts the over-subtraction case). + eng = Engine() + wm, _block, _region = _compose(eng, 200, 1.0) + guarded = eng.remove_watermark_reverse_alpha(wm) + monkeypatch.setattr(type(eng), "_reverse_alpha_oversubtracts", lambda self, *a, **k: False) + unguarded = eng.remove_watermark_reverse_alpha(wm) + assert np.array_equal(guarded, unguarded) + + +@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__) +def test_guard_recovers_pit_on_textured_background(Engine): + """The guard's footprint inpaint reconstructs from the ORIGINAL surroundings, + so a faint mark over-subtracted on a textured background recovers to roughly the + local content level rather than a glyph-shaped dark ghost.""" + eng = Engine() + w = h = 1024 + yy, xx = np.mgrid[0:h, 0:w].astype(np.float32) + base = 120 + 35 * np.sin(xx / 80.0) + 25 * np.cos(yy / 60.0) + bg_img = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255) + block, (ax, ay, gw, gh) = eng._fixed_alpha_map(bg_img) + a = np.clip(block * 0.45, 0.0, 0.99)[:, :, None] + logo = np.array(eng.config.alpha_logo_bgr, np.float32) + bg_img[ay : ay + gh, ax : ax + gw] = bg_img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a + wm = np.clip(bg_img, 0, 255).astype(np.uint8) + out = eng.remove_watermark_reverse_alpha(wm).astype(np.float32) + # Compare the recovered glyph body to the clean texture under the mark. + clean = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255) + body = block >= 0.15 + region_out = out[ay : ay + gh, ax : ax + gw].mean(axis=2) + region_clean = clean[ay : ay + gh, ax : ax + gw].mean(axis=2) + err = float(np.abs(region_out[body] - region_clean[body]).mean()) + assert err < 25.0, f"glyph body not recovered (mean abs err {err:.1f})" From c1971a3e8d9be7a89afdf8ca67d5bd8a586449fc Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Sat, 20 Jun 2026 15:34:39 -0700 Subject: [PATCH 3/4] feat(invisible): region-targeted regeneration for AI-enhanced composites For AI-enhanced composites (digitalSourceType compositeWithTrainedAlgorithmicMedia, identify ai_source_kind == "enhanced"; roadmap P1#8): regenerate ONLY the AI region and preserve the real photo elsewhere, instead of regenerating the whole frame. - noai.tiling.feather_region_composite(base, regenerated, box, *, feather): pure, model-free compositor that blends the regenerated AI box back over the original with a feathered seam, leaving pixels OUTSIDE the box exactly equal to base. Fully unit-tested (outside-box exactness, interior == regenerated, hard paste at feather 0, monotonic seam ramp, dtype/grayscale/clamp/empty-box/shape-mismatch). - WatermarkRemover.remove_watermark(region=, region_feather=) and the module-level convenience function thread it through: the remover regenerates (or tiles) the frame, then composites only the AI box back over the original input. The box is caller-supplied -- a C2PA composite manifest carries no reliable machine-readable region, so none is fabricated. The no-model lossless region path stays region_eraser.erase. Co-Authored-By: Claude Opus 4.8 --- src/remove_ai_watermarks/noai/tiling.py | 53 ++++++++++++++ .../noai/watermark_remover.py | 33 +++++++++ tests/test_tiling.py | 70 +++++++++++++++++++ 3 files changed, 156 insertions(+) diff --git a/src/remove_ai_watermarks/noai/tiling.py b/src/remove_ai_watermarks/noai/tiling.py index 4eb6502..a858c91 100644 --- a/src/remove_ai_watermarks/noai/tiling.py +++ b/src/remove_ai_watermarks/noai/tiling.py @@ -100,6 +100,59 @@ def feather_weights(width: int, height: int, overlap: int) -> NDArray[Any]: return weights +def feather_region_composite( + base: NDArray[Any], + regenerated: NDArray[Any], + box: tuple[int, int, int, int], + *, + feather: int = 64, +) -> NDArray[Any]: + """Composite ``regenerated`` over ``base`` inside ``box`` only, feathering the seam. + + For AI-ENHANCED composites (digitalSourceType ``compositeWithTrainedAlgorithmicMedia``): + the diffusion remover regenerates the whole frame, but only the AI-composited + REGION should change -- the rest is a real photo that must be preserved. This + blends the regenerated pixels in over ``box = (x, y, w, h)`` with a separable + linear taper of ``feather`` px at the box edges, so the result equals ``base`` + EXACTLY outside the box and ramps smoothly (no hard seam) at the boundary. + + Pure and model-free (unit-tested): ``base`` and ``regenerated`` must be the same + shape (H x W, or H x W x C). The output preserves ``base``'s dtype. ``feather`` is + clamped to half the box on each axis, so a small region still tapers symmetrically; + ``feather=0`` is a hard-edged paste. + """ + import numpy as np + + if base.shape != regenerated.shape: + raise ValueError(f"shape mismatch: base {base.shape} vs regenerated {regenerated.shape}") + h, w = base.shape[:2] + x, y, bw, bh = box + x0, y0 = max(0, x), max(0, y) + x1, y1 = min(w, x + bw), min(h, y + bh) + out = base.copy() + if x1 <= x0 or y1 <= y0: + return out # empty / off-image box -> nothing regenerated + + def taper(n: int) -> NDArray[Any]: + win = np.ones(n, dtype=np.float32) + f = min(max(feather, 0), n // 2) + if f > 0: + ramp = (np.arange(f, dtype=np.float32) + 1.0) / (f + 1.0) # in (0, 1), 0 at the edge + win[:f] = ramp + win[n - f :] = ramp[::-1] + return win + + rh, rw = y1 - y0, x1 - x0 + wmap = np.outer(taper(rh), taper(rw)) # ~0 at the box edge, 1 in the interior + if base.ndim == 3: + wmap = wmap[:, :, None] + roi_base = base[y0:y1, x0:x1].astype(np.float32) + roi_gen = regenerated[y0:y1, x0:x1].astype(np.float32) + blended = roi_base * (1.0 - wmap) + roi_gen * wmap + out[y0:y1, x0:x1] = np.clip(blended, 0, 255).astype(base.dtype) + return out + + def run_tiled( generate_tile: Callable[[PILImage.Image], PILImage.Image], image: PILImage.Image, diff --git a/src/remove_ai_watermarks/noai/watermark_remover.py b/src/remove_ai_watermarks/noai/watermark_remover.py index 3c4a2c6..fa38b9f 100644 --- a/src/remove_ai_watermarks/noai/watermark_remover.py +++ b/src/remove_ai_watermarks/noai/watermark_remover.py @@ -566,6 +566,8 @@ class WatermarkRemover: tile: bool = False, tile_size: int = 1024, tile_overlap: int = 128, + region: tuple[int, int, int, int] | None = None, + region_feather: int = 64, ) -> Path: """Remove watermark from an image using regeneration attack. @@ -589,6 +591,15 @@ class WatermarkRemover: tile_size: Tile dimension in px (default 1024, SDXL's training size). tile_overlap: Overlap between adjacent tiles in px (default 128), feather- blended so there is no visible seam. + region: Restrict the regeneration to the AI-composited box ``(x, y, w, h)`` + and feather-composite it back over the ORIGINAL pixels everywhere else. + For AI-ENHANCED composites (digitalSourceType + ``compositeWithTrainedAlgorithmicMedia``, surfaced as + ``identify.ProvenanceReport.ai_source_kind == "enhanced"``): the real + photo outside the box is preserved exactly, only the AI region is + scrubbed. The box is supplied by the caller (a C2PA composite manifest + does not carry a reliable machine-readable region). None -> whole frame. + region_feather: Seam taper in px for ``region`` compositing (default 64). Returns: Path to the cleaned image. @@ -660,6 +671,22 @@ class WatermarkRemover: self._controlnet_pipeline = None cleaned_image = _generate() + # Region-targeted regeneration for AI-enhanced composites: keep the real photo + # outside the AI box pixel-exact, blend only the regenerated AI region back in. + if region is not None: + import numpy as np + + from remove_ai_watermarks.noai.tiling import feather_region_composite + + gen = cleaned_image.convert("RGB") + if gen.size != init_image.size: # a downscaled/tiled pass can resize + gen = gen.resize(init_image.size) + cleaned_image = gen + base_rgb = np.asarray(init_image) # original RGB, untouched outside the box + merged = feather_region_composite(base_rgb, np.asarray(gen), region, feather=region_feather) + cleaned_image = Image.fromarray(merged) + self._set_progress(f"Region-targeted regeneration: AI box {region}, real photo preserved") + self._set_progress(f"Regeneration complete · Output: {w}x{h}px {cleaned_image.mode}") output_path.parent.mkdir(parents=True, exist_ok=True) @@ -877,12 +904,17 @@ def remove_watermark( model_id: str | None = None, device: str | None = None, hf_token: str | None = None, + region: tuple[int, int, int, int] | None = None, ) -> Path: """Convenience function to remove watermark from an image. ``strength=None`` lets the profile pick its vendor-adaptive default (0.20 OpenAI / 0.30 Google / 0.30 unknown, from the C2PA SynthID proxy on the input; same ladder for the controlnet and sdxl pipelines). Pass a value to override. + + ``region=(x, y, w, h)`` restricts the regeneration to that box and preserves the + real photo elsewhere -- for AI-enhanced composites (see + ``WatermarkRemover.remove_watermark``). """ from remove_ai_watermarks.noai.watermark_profiles import vendor_for_strength @@ -892,4 +924,5 @@ def remove_watermark( output_path=output_path, strength=strength, vendor=vendor_for_strength(image_path), + region=region, ) diff --git a/tests/test_tiling.py b/tests/test_tiling.py index fbe440f..ab6df75 100644 --- a/tests/test_tiling.py +++ b/tests/test_tiling.py @@ -15,6 +15,7 @@ from PIL import Image from remove_ai_watermarks.noai.tiling import ( Tile, _axis_positions, + feather_region_composite, feather_weights, plan_tiles, run_tiled, @@ -138,3 +139,72 @@ class TestRunTiled: image = Image.new("RGB", (1500, 1100), (200, 100, 50)) out = run_tiled(generate, image, tile_size=1024, overlap=128) assert out.size == (1500, 1100) + + +class TestFeatherRegionComposite: + """Region-targeted compositing for AI-enhanced composites: only the AI box is + regenerated, the real photo outside it stays pixel-exact (roadmap P1#8).""" + + @staticmethod + def _frames(h=200, w=300): + base = np.full((h, w, 3), 80, np.uint8) + regenerated = np.full((h, w, 3), 200, np.uint8) + return base, regenerated + + def test_outside_box_is_pixel_exact(self): + base, regen = self._frames() + out = feather_region_composite(base, regen, (100, 60, 80, 50), feather=8) + # Far corners are well outside the box -> identical to base. + assert np.array_equal(out[:50, :80], base[:50, :80]) + assert np.array_equal(out[150:, 220:], base[150:, 220:]) + + def test_interior_equals_regenerated(self): + base, regen = self._frames() + out = feather_region_composite(base, regen, (100, 60, 80, 50), feather=8) + # Deep interior of the box (past the feather ramp) is fully regenerated. + assert np.array_equal(out[80:90, 130:150], regen[80:90, 130:150]) + + def test_hard_paste_when_no_feather(self): + base, regen = self._frames() + out = feather_region_composite(base, regen, (100, 60, 80, 50), feather=0) + assert np.array_equal(out[60:110, 100:180], regen[60:110, 100:180]) + assert np.array_equal(out[:60], base[:60]) + + def test_seam_is_monotonic_ramp(self): + base, regen = self._frames() + out = feather_region_composite(base, regen, (100, 60, 80, 50), feather=10).astype(np.float32) + # Along a horizontal line crossing the left edge, values rise from base(80) + # toward regenerated(200) monotonically through the feather band. + row = out[85, 100:115, 0] + assert row[0] < row[-1] + assert np.all(np.diff(row) >= -1e-3) + + def test_dtype_preserved(self): + base, regen = self._frames() + out = feather_region_composite(base, regen, (50, 50, 40, 40), feather=4) + assert out.dtype == base.dtype + + def test_grayscale_2d_supported(self): + base = np.full((100, 120), 30, np.uint8) + regen = np.full((100, 120), 220, np.uint8) + out = feather_region_composite(base, regen, (40, 30, 30, 30), feather=4) + assert out.shape == base.shape + assert np.array_equal(out[:30], base[:30]) + + def test_empty_or_offimage_box_returns_base(self): + base, regen = self._frames() + assert np.array_equal(feather_region_composite(base, regen, (0, 0, 0, 0)), base) + assert np.array_equal(feather_region_composite(base, regen, (500, 500, 40, 40)), base) + + def test_box_clamped_to_image_bounds(self): + base, regen = self._frames() + # Box overhangs the bottom-right; only the in-image part is composited. + out = feather_region_composite(base, regen, (280, 180, 60, 60), feather=0) + assert np.array_equal(out[180:, 280:], regen[180:, 280:]) + assert out.shape == base.shape + + def test_shape_mismatch_raises(self): + base, _ = self._frames(200, 300) + bad = np.full((100, 100, 3), 200, np.uint8) + with pytest.raises(ValueError, match="shape mismatch"): + feather_region_composite(base, bad, (10, 10, 20, 20)) From 737305858d2e4854e2206482d59c9ba77a502125 Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Sat, 20 Jun 2026 15:34:39 -0700 Subject: [PATCH 4/4] docs: sync module map for the corpus-mining provenance + removal fixes Update CLAUDE.md and docs/module-internals.md for: ProvenanceReport.ai_source_kind (generated vs enhanced) and the shared GEMINI_SPARKLE_TRUST_CONF; the text-mark over-subtraction guard; noai/tiling.feather_region_composite + the region-targeted WatermarkRemover.remove_watermark(region=) path; the new C2PA vendor rows (Volcano Engine Chinese legal name, ElevenLabs) and the documented TikTok/PixelBin exclusion. Record the rejected gemini-gate-lowering experiment. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 8 ++++---- docs/module-internals.md | 10 ++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 483f844..39e7b9c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -53,16 +53,16 @@ Compact map. The full per-module detail (design decisions, tuned thresholds, cal - `noai/c2pa.py` — C2PA reading. `extract_c2pa_info(path)` uses the official **c2pa-python `Reader`** first (core dep, any container; `read_manifest_store_json` returns the WHOLE store JSON — active + ingredient manifests — so an AI marker on a parent manifest is seen), and falls back to the hand-rolled caBX/CBOR parser (`has_c2pa_metadata` / `extract_c2pa_chunk` / `_extract_c2pa_info_png`) for synthetic/partial blobs the validator rejects or a broken/absent wheel. The registry scan (issuer / source-type / SynthID / soft-binding) is shared by both paths via `_populate_registry_fields`, so the return-dict shape is identical. Do not reimplement chunk parsing; chunk reads are clamped to the remaining file size by design. `extract_c2pa_chunk`/`inject_c2pa_chunk` stay PNG-only (raw caBX bytes, test/extractor use). - `noai/constants.py` — the single `C2PA_AI_VENDORS` registry (+ `C2PA_SOFT_BINDINGS`) from which `C2PA_ISSUERS` / `SYNTHID_C2PA_ISSUERS` / `identify._ISSUER_PLATFORM` are all derived. Add a new vendor as one registry entry; never edit the derived dicts and never add inline. - `metadata.py` — `scan_head(path)` is the shared (memoized) input for every C2PA/AIGC/IPTC byte scan; use it instead of `open().read(1MB)` for any new marker scan. Also home to `synthid_source`, `xai_signature`, `iptc_ai_system`, `aigc_label`, `huggingface_job`, `samsung_genai`, and `remove_ai_metadata` (fail-safe `strip_c2pa_boxes`). -- `identify.py` — aggregates every locally-readable signal into one `ProvenanceReport`; `is_ai_generated` is True or None, never asserted False. `import identify` is deliberately light (lazy `noai/__init__`, fits a 512 MB host) — keep heavy imports out. Add capture-camera tokens to `_DEVICE_C2PA_PLATFORM` only when verified against a real C2PA file; editing-app/AI-device signer tokens go to `_SIGNER_C2PA_PLATFORM`; generator/issuer platforms to `C2PA_AI_VENDORS` in `constants.py`. Integrity-clash detection is high-precision by design (only hard generator stamps feed it, source-grouped independence). +- `identify.py` — aggregates every locally-readable signal into one `ProvenanceReport`; `is_ai_generated` is True or None, never asserted False. `ProvenanceReport.ai_source_kind` exposes the C2PA digital-source-type split — `"generated"` (trainedAlgorithmicMedia, fully AI) vs `"enhanced"` (compositeWithTrainedAlgorithmicMedia, a real photo with an AI-composited region), else None — so a caller branches full-frame scrub vs region-targeted clean (see `noai/tiling.feather_region_composite` + `WatermarkRemover.remove_watermark(region=...)`). The sparkle provenance threshold is the SHARED `watermark_registry.GEMINI_SPARKLE_TRUST_CONF` (imported, not a private copy) so the provenance "is there a sparkle" verdict and the removal "take the sparkle" decision can never drift. `import identify` is deliberately light (lazy `noai/__init__`, fits a 512 MB host) — keep heavy imports out (the `watermark_registry` constant import stays light: engines are lazy there). Add capture-camera tokens to `_DEVICE_C2PA_PLATFORM` only when verified against a real C2PA file; editing-app/AI-device signer tokens go to `_SIGNER_C2PA_PLATFORM`; generator/issuer platforms to `C2PA_AI_VENDORS` in `constants.py`. Integrity-clash detection is high-precision by design (only hard generator stamps feed it, source-grouped independence). - `watermark_registry.py` — the single catalog of known visible watermarks (gemini / doubao / jimeng / samsung), reverse-alpha based by policy. Add a new visible text mark = one `_text_mark(...)` row + a `TextMarkConfig` with a captured alpha map; do not re-add per-mark `if` branches. `cli._write_bgr_with_alpha` must NOT zero alpha in the watermark bbox (issue #30 white-box regression). - `gemini_engine.py` — visible Gemini-sparkle remover/detector (cv2/numpy, no GPU): top-K size-weighted fusion candidate selection (`_SELECT_TOPK`), corner-promote, over/under-subtraction guards, false-positive gate, self-verify repair. Detection scores the top-K size-weighted matches by full fusion (spatial+gradient+variance) and keeps the highest — NOT the raw-NCC argmax, which re-admits the tiny-patch FPs the size weight suppresses (the osachub 2026-06-12 sub-0.85 corner-sparkle regression; see `docs/module-internals.md`). Keep the 0.85 corner-promote NCC gate; a margin/chroma-gated lower promote was measured and REJECTED 2026-06-11 (~33% FP on non-Google content). Gate any removal candidate on a physical brightness check, not the detector alone. - `_text_mark_engine.py` — shared base for the three reverse-alpha text-mark engines (extracted 2026-06-09); the per-engine modules are config-only subclasses. New text mark = a `TextMarkConfig` + a thin subclass + one registry row. Gemini stays a separate engine (different model). -- `doubao_engine.py` / `jimeng_engine.py` / `samsung_engine.py` — thin `TextMarkEngine` subclasses: Doubao "豆包AI生成" (bottom-right), Jimeng "★ 即梦AI" (bottom-right), Samsung Galaxy AI "✦ Contenuti generati dall'AI" (bottom-LEFT, locale-specific — Italian variant calibrated). Removal = reverse-alpha (always-align) + thin residual inpaint. A detector-only removal test is insufficient — assert visual residual (the textured-shift tests). +- `doubao_engine.py` / `jimeng_engine.py` / `samsung_engine.py` — thin `TextMarkEngine` subclasses: Doubao "豆包AI生成" (bottom-right), Jimeng "★ 即梦AI" (bottom-right), Samsung Galaxy AI "✦ Contenuti generati dall'AI" (bottom-LEFT, locale-specific — Italian variant calibrated). Removal = reverse-alpha (always-align) + thin residual inpaint, **with an over-subtraction guard ported from `gemini_engine` (2026-06-20)**: `_reverse_alpha_oversubtracts` predicts the reverse-alpha output PER PIXEL over the glyph body from the INPUT, and when the recovered body lands more than `_OVERSUB_DARK_MARGIN` (25) gray levels below the local ring it abandons the reverse-alpha pixels and inpaints the footprint from the original surroundings (`_inpaint_footprint`) — fixing the dark-pit ghost on dark/mid-tone backgrounds (roadmap P0#8). Predicting per-pixel from the input (not the produced output) keeps a clean full-strength mark byte-identical (no false trip). A detector-only removal test is insufficient — assert visual residual (the textured-shift tests + `tests/test_text_mark_oversubtraction.py`). - `region_eraser.py` — universal region eraser (`erase` CLI): cv2 backend default (no deps), optional big-LaMa via onnxruntime (~3.5-4 GB peak RAM, ~5-6 s/call CPU — does not fit a minimal droplet). - `invisible_watermark.py` — decodes the OPEN DWT-DCT watermarks (SD / SDXL / FLUX) via `imwatermark` (extra `detect`, pulls torch). Fragile two ways: (1) does not survive JPEG re-encode/resize; (2) **carrier-fragile on a broad class of pristine images** -- a clean encode->decode round-trip recovers 48/48 on chatgpt/firefly/random but FAILS (28-39/48, below the `_MATCH_48`=44 gate) on the FLUX fox, doubao, a flat FLUX generation, AND a clean synthetic flat fill with no watermark. The failure does NOT track texture; it goes with a degenerate **all-ones decode that is a CARRIER ARTIFACT, not a watermark** (synthetic clean image reproduces it). So `detect_invisible_watermark` is **positive-only**: trust a hit; a `None` is inconclusive unless a same-carrier positive-control embed first recovers >=44. Verified 2026-06-19; full caveat in `docs/watermarking-landscape.md`. - `trustmark_detector.py` — Adobe TrustMark open decoder (extra `trustmark`). Do NOT remove the JPEG re-encode false-positive gate — a lone TrustMark hit without it is almost always content noise. -- `noai/watermark_remover.py` — `WatermarkRemover` with three diffusion pipelines selected by the explicit `pipeline` ctor arg, never inferred from `model_id`: `sdxl` (plain SDXL img2img), `controlnet` (SDXL + canny ControlNet, **the DEFAULT since 2026-06-09**), and `qwen` (Qwen-Image 20B MMDiT img2img, Apache-2.0, CUDA/cloud-class — best **text** preservation (incl. CJK); `_load_qwen_pipeline`/`_run_qwen`, bf16, no MPS fallback; call shape in the pure `_build_qwen_kwargs` using `true_cfg_scale`). Removal comes from the img2img `strength`; ControlNet only preserves text/face STRUCTURE — SynthID CAN survive controlnet on photoreal content at low strength. Qwen prototype oracle floors (single-seed, pending seed-repeat cert): OpenAI ~0.10, Gemini ~0.30 (higher than the controlnet Gemini floor — pass explicit `--strength` for Gemini on `qwen` until certified). Fidelity measured by `scripts/fidelity_metrics.py` (OCR-CER / ArcFace / LPIPS / Laplacian-var, NOT eyeball), compared ONLY at each pipeline's oracle-confirmed scrub floor (where SynthID is removed in BOTH — equal-strength is invalid where it leaves one un-scrubbed): Qwen wins TEXT (incl. CJK), controlnet wins FACES (Qwen smooths faces more) — Qwen is the text-preserving remover, not a universal fidelity win. No face-restore extra ships, by validated decision (every restore approach looked MORE AI-generated). -- `noai/tiling.py` — sliding-window tiled diffusion for large inputs (CLI `--tile`). `WatermarkRemover.remove_watermark` branches to `run_tiled` when `tile` is set AND the long side exceeds `tile_size`, refactoring the single-pass `_generate` into a per-tile `_generate_one` (the ControlNet edge map is rebuilt per tile inside it). Pure helpers `plan_tiles` (uniform-size tiles, last one flush to the edge) and `feather_weights` (strictly-positive separable taper -> partition-of-unity blend) are unit-tested without the model. New tile-blend tuning goes in those pure helpers; do not inline blend math into the runner. +- `noai/watermark_remover.py` — `WatermarkRemover` with three diffusion pipelines selected by the explicit `pipeline` ctor arg, never inferred from `model_id`: `sdxl` (plain SDXL img2img), `controlnet` (SDXL + canny ControlNet, **the DEFAULT since 2026-06-09**), and `qwen` (Qwen-Image 20B MMDiT img2img, Apache-2.0, CUDA/cloud-class — best **text** preservation (incl. CJK); `_load_qwen_pipeline`/`_run_qwen`, bf16, no MPS fallback; call shape in the pure `_build_qwen_kwargs` using `true_cfg_scale`). Removal comes from the img2img `strength`; ControlNet only preserves text/face STRUCTURE — SynthID CAN survive controlnet on photoreal content at low strength. Qwen prototype oracle floors (single-seed, pending seed-repeat cert): OpenAI ~0.10, Gemini ~0.30 (higher than the controlnet Gemini floor — pass explicit `--strength` for Gemini on `qwen` until certified). Fidelity measured by `scripts/fidelity_metrics.py` (OCR-CER / ArcFace / LPIPS / Laplacian-var, NOT eyeball), compared ONLY at each pipeline's oracle-confirmed scrub floor (where SynthID is removed in BOTH — equal-strength is invalid where it leaves one un-scrubbed): Qwen wins TEXT (incl. CJK), controlnet wins FACES (Qwen smooths faces more) — Qwen is the text-preserving remover, not a universal fidelity win. No face-restore extra ships, by validated decision (every restore approach looked MORE AI-generated). `remove_watermark(region=(x,y,w,h), region_feather=...)` runs the regeneration but feather-composites only the AI box back over the original (via `noai/tiling.feather_region_composite`), preserving the real photo elsewhere — the **AI-enhanced composite** path (`identify` `ai_source_kind == "enhanced"`); the box is supplied by the caller (a C2PA composite manifest carries no reliable machine-readable region, so we do not fabricate one). +- `noai/tiling.py` — sliding-window tiled diffusion for large inputs (CLI `--tile`). `WatermarkRemover.remove_watermark` branches to `run_tiled` when `tile` is set AND the long side exceeds `tile_size`, refactoring the single-pass `_generate` into a per-tile `_generate_one` (the ControlNet edge map is rebuilt per tile inside it). Pure helpers `plan_tiles` (uniform-size tiles, last one flush to the edge) and `feather_weights` (strictly-positive separable taper -> partition-of-unity blend) are unit-tested without the model. Also home to `feather_region_composite(base, regenerated, box, *, feather)` — the pure region-targeted compositor for **AI-enhanced composites** (`ai_source_kind == "enhanced"`): blends the regenerated AI box back over the original with a feathered seam, leaving the real photo OUTSIDE the box pixel-exact. It backs `WatermarkRemover.remove_watermark(region=...)` (regenerate ONLY the AI region, not the whole frame); the no-model lossless region path stays `region_eraser.erase`. New tile/region-blend tuning goes in these pure helpers; do not inline blend math into the runner. - `auto_config.py` + the content-detection layer were REMOVED 2026-06-09; `--auto` is a deprecated no-op (controlnet is the default pipeline and the adaptive polish is ON by default and self-gates to a no-op where there is no detail deficit). - `upscaler.py` — optional Real-ESRGAN pre-diffusion super-resolution for small inputs (extra `esrgan`, spandrel only). Manual opt-in; the default `--upscaler` stays `lanczos` and the engine always falls back to Lanczos on absence/error. ESRGAN can degrade faces and thin text. - `image_io.py` — Unicode-safe cv2 IO (issue #17). Every cv2 file read/write in the package routes through `imread`/`imwrite`; do not call `cv2.imread`/`cv2.imwrite` directly. `to_bgr(image)` is the shared channel normalizer — use it instead of inlining `cvtColor` branches. diff --git a/docs/module-internals.md b/docs/module-internals.md index cdcc824..7a7c7a0 100644 --- a/docs/module-internals.md +++ b/docs/module-internals.md @@ -15,7 +15,7 @@ module. ## `noai/constants.py` -`noai/constants.py` — PNG_SIGNATURE, C2PA_CHUNK_TYPE, C2PA_SIGNATURES, and `C2PA_AI_VENDORS` — the single `C2paAiVendor` registry of C2PA-signing vendors (issuer byte, resolved org name, the `identify` platform label, and a `synthid` flag), from which `C2PA_ISSUERS`, `SYNTHID_C2PA_ISSUERS` (issuers that pair SynthID with C2PA: Google, OpenAI), and `identify._ISSUER_PLATFORM` are all **derived** — plus `C2PA_SOFT_BINDINGS` (soft-binding `alg` prefix → forensic-watermark vendor: Adobe TrustMark, Digimarc, Imatag, Steg.AI, Microsoft, ...). Add a new C2PA vendor as one `C2PA_AI_VENDORS` entry (never edit the derived dicts), a new soft-binding to `C2PA_SOFT_BINDINGS`; not inline. +`noai/constants.py` — PNG_SIGNATURE, C2PA_CHUNK_TYPE, C2PA_SIGNATURES, and `C2PA_AI_VENDORS` — the single `C2paAiVendor` registry of C2PA-signing vendors (issuer byte, resolved org name, the `identify` platform label, and a `synthid` flag), from which `C2PA_ISSUERS`, `SYNTHID_C2PA_ISSUERS` (issuers that pair SynthID with C2PA: Google, OpenAI), and `identify._ISSUER_PLATFORM` are all **derived** — plus `C2PA_SOFT_BINDINGS` (soft-binding `alg` prefix → forensic-watermark vendor: Adobe TrustMark, Digimarc, Imatag, Steg.AI, Microsoft, ...). Add a new C2PA vendor as one `C2PA_AI_VENDORS` entry (never edit the derived dicts), a new soft-binding to `C2PA_SOFT_BINDINGS`; not inline. A vendor that signs under multiple legal names needs one entry PER distinctive issuer byte string: e.g. ByteDance's Volcano Engine is registered both as latin `volcengine` AND the Chinese legal entity `北京火山引擎科技有限公司` (UTF-8; the latin needle misses the Chinese-named certs entirely) — both normalize to the same "ByteDance" needle/platform. ElevenLabs ("Eleven Labs Inc.", pure generative-AI) is registered as a generator. Deliberately EXCLUDED (mined-corpus candidates 2026-06-20, documented in the file): TikTok Inc. (a content-provenance / AI-labeling signer on uploads, not a generator) and PixelBin.io / "Fynd" (an image transform / CDN signer) — registering either as a generator would mis-label human uploads as AI; the `is_ai` verdict keys off the digitalSourceType, which is already honored. ## `metadata.py` @@ -27,7 +27,9 @@ module. `identify.py` — the OpenAI rollout caveat is keyed on `_vendor_of(synthid) == "OpenAI"` (not a raw substring over the issuer + verdict blob). `identify(path)` aggregates every locally-readable signal (C2PA issuer→platform, C2PA soft-binding forensic-watermark vendor, **C2PA cloud-manifest reference** via `metadata.c2pa_cloud_manifest` — signal `c2pa_cloud`, **medium**, provenance-only (does NOT set `is_ai`, excluded from `ai_from_metadata` + clash vendors): a C2PA 2.4 Durable-Content-Credentials case where the embedded manifest is stripped but an XMP `dcterms:provenance` pointer to the vendor's cloud manifest store (`_C2PA_MANIFEST_REPOSITORIES`, today `cai-manifests.adobe.com` → "Adobe Content Authenticity") survives, so the credentials stay recoverable server-side; only emitted when no embedded manifest already attributed the file — surfaced on 2 corpus PNGs 2026-06-10 that read fully `unknown` before, IPTC "Made with AI" + IPTC 2025.1 `AISystemUsed`, embedded SD/ComfyUI params, SynthID proxy, xAI/Grok EXIF signature via `metadata.xai_signature`, the China TC260 AIGC label via `metadata.aigc_label`, the HuggingFace `hf-job-id` job marker via `metadata.huggingface_job`, the Samsung Galaxy AI editing marker via `metadata.samsung_genai`, the visible marks — Gemini sparkle plus the ByteDance Doubao 豆包AI生成 / Jimeng 即梦AI / Samsung Galaxy AI "Contenuti generati dall'AI" text marks via the `watermark_registry` — open invisible watermark, Adobe TrustMark via `trustmark_detector`) into one `ProvenanceReport`. `is_ai_generated` is True or None (never asserted False — stripped metadata is not proof of clean origin). The `hf_job`, visible-mark, and Samsung `samsung_genai` signals are **medium** confidence: each lifts an otherwise-Unknown verdict to a tentative AI (`hf_only` / `visible_only` / `samsung_only`, parallel branches; `visible_only` fires on any `visible_*` signal) but is excluded from the high-confidence `ai_from_metadata` set, so none overrides a hard metadata signal. -**Visible-mark detection** (`check_visible`, signals `visible_sparkle` / `visible_doubao` / `visible_jimeng` / `visible_samsung`): the Gemini sparkle keeps its own file-level path (`_visible_sparkle` → `gemini_engine.detect_sparkle_confidence`, promoted only at confidence ≥ `_SPARKLE_THRESHOLD` 0.5; corpus-tuned to separate Gemini sparkles ≥0.56 from non-sparkle ≤0.49), while Doubao/Jimeng/Samsung reuse the registry detectors (`_visible_text_marks` → `watermark_registry`, iterating `_VISIBLE_MARK_PLATFORM`), each gated by its own engine NCC threshold via `MarkDetection.detected` (Doubao 0.4, Jimeng 0.45, Samsung 0.4). Doubao/Jimeng are normally also caught by the TC260 AIGC metadata label and Samsung by its C2PA + `genAIType` marker, so the visible path is their stripped-metadata fallback. Visible marks set `platform` only when no harder signal already did, and (like the sparkle) are excluded from integrity-clash vendor claims. The cv2 dependency lives in the engines, not here. +**AI-generated vs AI-enhanced** (`ProvenanceReport.ai_source_kind`, roadmap item): the C2PA digital-source-type is split into `"generated"` (trainedAlgorithmicMedia, fully synthetic) vs `"enhanced"` (compositeWithTrainedAlgorithmicMedia, a real photo with an AI-composited region) — the two byte strings are unambiguous (`compositeWithTrainedAlgorithmicMedia` capitalizes the inner "Trained", so a lowercase `trainedAlgorithmicMedia` match is standalone full generation; full generation wins when both appear). `ai_source_kind` is set only when the AI verdict actually came from the C2PA source type (a non-C2PA AI signal — IPTC/AIGC/local gen/xAI — leaves it None). It lets a caller branch a full-frame scrub (`generated`) from a region-targeted clean that preserves the real photo (`enhanced`; see `noai/tiling.feather_region_composite`). The CLI verdict line reads "AI-generated (fully synthetic)" vs "AI-enhanced (real content with an AI-composited region)". + +**Visible-mark detection** (`check_visible`, signals `visible_sparkle` / `visible_doubao` / `visible_jimeng` / `visible_samsung`): the Gemini sparkle keeps its own file-level path (`_visible_sparkle` → `gemini_engine.detect_sparkle_confidence`, promoted only at confidence ≥ `_SPARKLE_THRESHOLD`, which is the SHARED `watermark_registry.GEMINI_SPARKLE_TRUST_CONF` (0.5) — imported, not a private copy, so the provenance detect threshold and the removal `best_auto_mark` / `_gemini_detect` arbitration gate can never drift (the detect-vs-remove desync from roadmap P0#7; regression-guarded by `tests/test_identify.py::TestSparkleDetectRemoveAlignment`, which composites the real demo sparkle at borderline opacities and asserts identify and `best_auto_mark` AGREE on either side of the line). Lowering the gate to recover faint sub-0.5 sparkles was evaluated 2026-06-20 and REJECTED: a real Doubao text mark scores ~0.40-0.42 as a gemini match with a HIGHER core-ring brightness margin than a genuine faint sparkle, so neither confidence nor the brightness gate separates them in the [0.35, 0.5) band — lowering trades a rare miss for false-positive removals on clean images. Corpus-tuned to separate Gemini sparkles ≥0.56 from non-sparkle ≤0.49), while Doubao/Jimeng/Samsung reuse the registry detectors (`_visible_text_marks` → `watermark_registry`, iterating `_VISIBLE_MARK_PLATFORM`), each gated by its own engine NCC threshold via `MarkDetection.detected` (Doubao 0.4, Jimeng 0.45, Samsung 0.4). Doubao/Jimeng are normally also caught by the TC260 AIGC metadata label and Samsung by its C2PA + `genAIType` marker, so the visible path is their stripped-metadata fallback. Visible marks set `platform` only when no harder signal already did, and (like the sparkle) are excluded from integrity-clash vendor claims. The cv2 dependency lives in the engines, not here. **`import identify` is deliberately light** (~26 MB; ~36 MB with cv2 loaded by a visible-mark run, ~106 MB for a full `check_visible` run): it imports the `noai.c2pa`/`noai.constants` submodules, and `noai/__init__` is lazy (see "Test and lint"), so torch/diffusers are NOT pulled at import even in a full `gpu`/`detect` install — fits a 512 MB host. `noai.c2pa` does eagerly import the **c2pa-python** binary (Rust + cryptography, ~+5 MB RSS, no torch) for the primary `Reader` path — light enough to stay on the dependency-light host; a broken/absent wheel degrades to the byte-scan parser (`reader_available()` False). The heavy paths are opt-in: `check_invisible=True` needs the `detect`/`trustmark` extras (each pulls **torch**; TrustMark also **downloads weights**), so on a core-only deploy leave `check_invisible` off (it is a no-op there anyway). Before the lazy `__init__`, the mere presence of torch in the env inflated `import identify` to ~420 MB. @@ -105,6 +107,8 @@ The 11 survivors are near-white ill-conditioning (reverse-alpha divides by `1-a` **`_apply_reverse_alpha` runs on the glyph crop only:** the blend is a no-op outside the glyph `region` (x, y, w, h) (`(wm - 0)/(1 - 0) == wm`, and a uint8→float32→uint8 round-trip is exact). It copies the frame through and computes the reverse-alpha math on the `region` crop only — byte-identical to the old full-frame pass (verified: Doubao 130 + Jimeng 22 placements, 0 mismatches) but O(glyph) not O(image). The full-frame pass cost ~275 ms on a 12 MP frame for a glyph that is <0.1% of it, once per candidate placement (fixed + aligned ≈ 2×/removal); the crop drops that to ~2 ms. Mirror of the Gemini `_core_and_bg` crop. +**Over-subtraction guard (`_reverse_alpha_oversubtracts` → `_inpaint_footprint`, ported from `gemini_engine` 2026-06-20, roadmap P0#8):** on a dark or mid-tone background the captured alpha can over-estimate THIS image's mark opacity, and reverse-alpha leaves a darker-than-background glyph ghost (a "dark pit") instead of recovering the true pixels — the sparkle-only fix (commit 41f6797) left the text marks unhandled. After `remove_watermark_reverse_alpha` selects the winning placement, the guard PREDICTS the reverse-alpha output PER PIXEL over the glyph body from the INPUT (`(obs - a*logo)/(1-a)`, exactly the remover's math) and, when the predicted body lands more than `_OVERSUB_DARK_MARGIN` (25) gray levels below the local background ring, abandons the reverse-alpha output for the footprint and inpaints it from the ORIGINAL surroundings (`_inpaint_footprint`, a wider dilate/radius than the thin residual pass). Predicting per-pixel from the INPUT (not the produced output, which depends on which placement the remover picked) is what keeps a cleanly captured full-strength mark byte-identical — it predicts back to the background everywhere, so the guard never trips on it (verified across Doubao/Jimeng/Samsung on white/mid/dark/midgray backgrounds). A faint mark predicts a body far below the ring and diverts to the inpaint. Regression-guarded by `tests/test_text_mark_oversubtraction.py` (predicate True on faint / False on clean, end-to-end no-dark-pit acceptance, clean-mark byte-identity, textured-background recovery). A flat synthetic background cannot exhibit the residual-inpaint failure (inpaint-from-flat is perfect regardless), so the value shows on textured/real content where the footprint inpaint samples un-darkened original pixels instead of the darkened reverse-alpha halo. + **`_fixed/_aligned_alpha_map` and `extract_mask` return footprint-sized arrays, not full frames (memory):** the alpha-map helpers return the glyph-sized alpha **block** (`(gh, gw)` float32) plus its placement `(ax, ay, gw, gh)`, and `extract_mask` returns the box-sized glyph mask (`(loc.h, loc.w)` uint8) — both used to allocate a full `(h, w)` array that is read only inside the small glyph/box. A full-frame float32 alpha map is ~48 MB on a 12 MP frame and two were held at once during removal (fixed + aligned ≈ 96 MB of mostly-zeros); the box mask was a ~12 MB uint8 allocation rebuilt per text-mark `detect` on the memory-tight `identify` path. `_apply_reverse_alpha` consumes the block directly; the residual inpaint embeds it into one full-frame uint8 mask only at `cv2.inpaint` time (which needs a full-frame mask). Byte-identical to the old full-frame path — the block equals the old map's `[ay:ay+gh, ax:ax+gw]` slice and the box equals the old mask cropped to `loc.bbox` (regression-guarded by `tests/test_text_mark_memory.py`, which reconstructs the old full-frame path inline and asserts equality, so the proof survives a cv2/asset bump). `remove_watermark_reverse_alpha` tracks the winning `region` alongside `best_amap` to place that mask. ## `doubao_engine.py` @@ -199,6 +203,8 @@ At the shared low removal strength the canny edge-conditioning keeps the regener Pure sliding-window tiling for the diffusion path (no torch import; numpy/PIL only). `plan_tiles(w, h, tile_size, overlap)` returns a row-major grid of uniform-size `Tile` boxes — every tile is exactly `tile_size` (the SDXL training size), with the last tile on each axis pulled back flush to the far edge (`_axis_positions` clamps a pathological `overlap >= tile` to `tile - 1` so the step stays >= 1). `feather_weights(w, h, overlap)` is a separable linear taper (1 in the interior, ramping toward each edge) floored at `_WEIGHT_EPS` so it is **strictly positive everywhere** — that makes the normalised `accum / weight_sum` blend a partition of unity, so identical/unchanged tiles reconstruct the input exactly (the seam-free guarantee). `run_tiled(generate_tile, image, tile_size, overlap, set_progress)` is the orchestration loop: crop each planned tile, call `generate_tile` (one diffusion pass on a single PIL tile — injected, so this stays decoupled from the pipeline), resize a latent-grid-rounded result back to the exact tile size, and feather-accumulate. All three are unit-tested without the model (`tests/test_tiling.py`: axis math, grid coverage, taper shape/symmetry/positivity, identity reconstruction, per-tile call count, and the resize-back path). New blend tuning belongs in these pure helpers, not inlined into the runner. +`feather_region_composite(base, regenerated, box, *, feather)` is the pure region-targeted compositor for **AI-enhanced composites** (roadmap P1#8; `identify` `ai_source_kind == "enhanced"`, digitalSourceType `compositeWithTrainedAlgorithmicMedia`). It blends `regenerated` over `base` inside `box = (x, y, w, h)` with a separable linear taper of `feather` px at the box edges (the taper anchors to ~0 at the boundary, so unlike `feather_weights` it is NOT floored — the result equals `base` EXACTLY outside the box), preserving dtype and supporting HxW or HxWxC. It backs `WatermarkRemover.remove_watermark(region=..., region_feather=...)`: the remover regenerates the frame (or tiles), then composites only the AI box back over the original input, so the real photo outside the box stays pixel-exact and only the AI region is scrubbed. The box is caller-supplied (a C2PA composite manifest carries no reliable machine-readable region); the no-model lossless region path remains `region_eraser.erase`. Unit-tested in `tests/test_tiling.py::TestFeatherRegionComposite` (outside-box exactness, interior == regenerated, hard-paste at feather 0, monotonic seam ramp, dtype/grayscale/clamp/empty-box/shape-mismatch). + ## `auto_config.py` (REMOVED 2026-06-09) **`auto_config.py` + the content-detection layer were REMOVED 2026-06-09.**