fix(visible): over-subtraction guard for Doubao/Jimeng/Samsung text marks

Port the Gemini sparkle dark-pit guard (commit 41f6797) to the shared TextMarkEngine reverse-alpha base (roadmap P0#8): on a dark or mid-tone background the captured alpha can over-estimate this image's mark opacity, and reverse-alpha leaves a darker-than-background glyph ghost instead of recovering the true pixels. The sparkle-only fix left the text marks unhandled. _reverse_alpha_oversubtracts predicts the reverse-alpha output PER PIXEL over the glyph body from the INPUT ((obs - a*logo)/(1-a), the remover's own math); when the predicted body lands more than _OVERSUB_DARK_MARGIN (25) gray levels below the local background ring it abandons the reverse-alpha output for the footprint and inpaints it from the original surroundings (_inpaint_footprint, wider dilate/ radius than the thin residual pass). Predicting per-pixel from the input (not the produced output, which depends on which placement the remover picked) keeps a cleanly captured full-strength mark byte-identical -- it predicts back to the background everywhere, so the guard never trips on it (verified across all three engines on white/mid/dark/midgray backgrounds). Regression-guarded by tests/test_text_mark_oversubtraction.py: predicate True on faint / False on clean, end-to-end no-dark-pit acceptance, clean-mark byte identity, and textured-background footprint recovery. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-04 23:47:49 +02:00 · 2026-06-20 15:34:39 -07:00
parent 0c215b5b2f
commit 33fddbc6fa
2 changed files with 217 additions and 0 deletions
@@ -37,6 +37,28 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+# Reverse-alpha over-subtraction guard (ported from gemini_engine, 2026-06-20).
+# The reverse-alpha blend ``(wm - a*logo)/(1-a)`` over-subtracts when the captured
+# alpha over-estimates THIS image's mark opacity: on a dark or mid-tone background
+# it drives the glyph footprint into a visibly DARKER-than-background ghost (a
+# "dark pit") instead of recovering the true pixels. The retained-corpus mining
+# (2026-06-20) showed the sparkle-only fix (commit 41f6797) left this unhandled
+# for the Doubao/Jimeng text marks. Mirror the sparkle gate: when the recovered
+# glyph body lands more than this many gray levels below the local background
+# ring, abandon the reverse-alpha output for the footprint and inpaint it from
+# the surroundings instead. Calibrated to the same 25-level margin the sparkle
+# gate uses -- clean text-mark removals recover within ~10 of the ring, the dark
+# pit lands tens of levels below.
+_OVERSUB_DARK_MARGIN = 25.0
+# Glyph-body / background-ring sampling for the guard. The ring is a pad around
+# the glyph box (excluding the box); the body is the bright-core glyph pixels.
+_OVERSUB_RING_PAD_FRAC = 0.6  # ring pad as a fraction of the glyph-box height
+_OVERSUB_BODY_ALPHA_FLOOR = 0.15  # alpha above which a block pixel counts as glyph body
+# Footprint inpaint when the guard trips: dilate the glyph mask wider than the
+# thin residual pass so the whole darkened ghost is reconstructed, not just its edge.
+_OVERSUB_INPAINT_DILATE = 9
+_OVERSUB_INPAINT_RADIUS = 4
+

@dataclass(frozen=True)
 class TextMarkConfig:
@@ -335,6 +357,74 @@ class TextMarkEngine:
        out[y1:y2, x1:x2] = np.clip((roi - a3 * logo) / np.clip(1.0 - a3, 0.25, 1.0), 0, 255).astype(np.uint8)
        return out

+    def _reverse_alpha_oversubtracts(
+        self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
+    ) -> bool:
+        """True when reverse-alpha would darken the glyph footprint into a dark pit.
+
+        Ported from ``gemini_engine._reverse_alpha_oversubtracts`` (2026-06-20):
+        PREDICT the reverse-alpha output at the bright glyph core directly from the
+        INPUT and the captured alpha, ``(core_obs - a*logo)/(1-a)``, and trip when it
+        lands more than ``_OVERSUB_DARK_MARGIN`` gray levels below the local
+        background ring. Predicting from the input (not the produced output) keeps the
+        gate independent of which placement the reverse-alpha picked, so a clean
+        full-strength mark (whose strokes predict back to the background) never trips,
+        while a mark fainter than the capture (over-subtracted into a ghost) does.
+        """
+        ax, ay, gw, gh = region
+        ih, iw = image.shape[:2]
+        if gw < 4 or gh < 4:
+            return False
+        if float(amap.max()) < 0.2:  # too faint a capture to over-subtract meaningfully
+            return False
+        body_box = amap >= _OVERSUB_BODY_ALPHA_FLOOR  # glyph strokes
+        if not bool(body_box.any()):
+            return False
+        pad = max(4, int(gh * _OVERSUB_RING_PAD_FRAC))
+        ry1, ry2 = max(0, ay - pad), min(ih, ay + gh + pad)
+        rx1, rx2 = max(0, ax - pad), min(iw, ax + gw + pad)
+        ring = image[ry1:ry2, rx1:rx2].astype(np.float32).mean(axis=2)
+        fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
+        ring_mask = np.ones(ring.shape, dtype=bool)
+        ring_mask[fy1:fy2, fx1:fx2] = False
+        if int(ring_mask.sum()) < 10:
+            return False
+        # Predict the reverse-alpha output PER PIXEL over the glyph body -- exactly
+        # the (obs - a*logo)/(1-a) math the remover applies -- so a cleanly captured
+        # mark predicts back to the true background everywhere (no trip), while a mark
+        # fainter than the capture predicts a body far below the local ring. The
+        # per-pixel alpha (not a single peak value) keeps the prediction faithful
+        # across the glyph's anti-aliased alpha gradient.
+        obs = ring[fy1:fy2, fx1:fx2]
+        a = np.clip(amap, 0.0, 0.99)
+        logo = float(np.mean(self.config.alpha_logo_bgr))
+        predicted = (obs - a * logo) / (1.0 - a)
+        predicted_core = float(np.median(predicted[body_box]))
+        bg = float(np.median(ring[ring_mask]))
+        oversub = predicted_core < bg - _OVERSUB_DARK_MARGIN
+        if oversub:
+            logger.debug(
+                "%s reverse-alpha over-subtracts: predicted core=%.1f bg=%.1f (margin %.0f) -> footprint inpaint",
+                self.config.name,
+                predicted_core,
+                bg,
+                _OVERSUB_DARK_MARGIN,
+            )
+        return oversub
+
+    def _inpaint_footprint(
+        self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
+    ) -> NDArray[Any]:
+        """Reconstruct the glyph footprint from its surroundings (used when
+        reverse-alpha would over-subtract into a dark pit). Inpaints the ORIGINAL
+        image over a dilated glyph mask, so the result never contains the darkened
+        reverse-alpha pixels."""
+        ax, ay, gw, gh = region
+        mask = np.zeros(image.shape[:2], np.uint8)
+        mask[ay : ay + gh, ax : ax + gw] = (amap > self.config.residual_alpha_floor).astype(np.uint8) * 255
+        mask = cv2.dilate(mask, np.ones((_OVERSUB_INPAINT_DILATE, _OVERSUB_INPAINT_DILATE), np.uint8))
+        return cv2.inpaint(image, mask, _OVERSUB_INPAINT_RADIUS, cv2.INPAINT_NS)
+
    def remove_watermark_reverse_alpha(self, image: NDArray[Any], *, residual_inpaint: bool = True) -> NDArray[Any]:
        """Recover the original pixels by inverting the alpha blend, then clear the
        residual outline with a thin inpaint over the glyph footprint.
@@ -370,6 +460,13 @@ class TextMarkEngine:
                best_residual, best_out, best_amap, best_region = residual, out, amap, region
        if best_out is None or best_amap is None or best_region is None:  # pragma: no cover - maps is non-empty
            return image.copy()
+        # Over-subtraction guard: on a dark/mid-tone background the captured alpha can
+        # over-estimate the mark's opacity and reverse-alpha leaves a darker-than-
+        # background ghost. When the recovered glyph body sits far below the local
+        # ring, reconstruct the footprint from its surroundings instead of shipping the
+        # dark pit (the thin residual inpaint cannot fix a footprint-wide darkening).
+        if self._reverse_alpha_oversubtracts(image, best_amap, best_region):
+            return self._inpaint_footprint(image, best_amap, best_region)
        if residual_inpaint:
            # Embed the glyph-sized alpha block into a full-frame uint8 mask only for
            # the inpaint (cv2.inpaint needs a mask matching best_out). One uint8
@@ -0,0 +1,120 @@
+"""Reverse-alpha over-subtraction guard for the visible text-mark engines.
+
+Ported from the Gemini sparkle fix (commit 41f6797) to Doubao/Jimeng/Samsung
+(retained-corpus mining 2026-06-20, roadmap P0#8): on a dark or mid-tone
+background the captured alpha can over-estimate THIS image's mark opacity, and
+reverse-alpha leaves a darker-than-background glyph ghost (a "dark pit") instead
+of recovering the true pixels. The guard predicts the reverse-alpha output per
+pixel and, when the glyph body lands far below the local ring, reconstructs the
+footprint from the original surroundings instead of shipping the pit.
+
+These assert visual residual (pixel levels vs the local background), not just a
+detector re-fire -- a dark pit can clear the NCC detector while still looking wrong.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from remove_ai_watermarks import image_io
+from remove_ai_watermarks._text_mark_engine import _OVERSUB_DARK_MARGIN
+from remove_ai_watermarks.doubao_engine import DoubaoEngine
+from remove_ai_watermarks.jimeng_engine import JimengEngine
+from remove_ai_watermarks.samsung_engine import SamsungEngine
+
+_ENGINES = [DoubaoEngine, JimengEngine, SamsungEngine]
+
+
+def _compose(engine, bg: float, opacity_gain: float, w: int = 1024, h: int = 1024):
+    """Composite the engine's captured mark onto a flat ``bg`` at ``opacity_gain``.
+
+    ``opacity_gain < 1`` makes the mark FAINTER than the capture, so reverse-alpha
+    at the full captured alpha over-subtracts into a dark pit -- the case the guard
+    must catch. Returns ``(watermarked_uint8, alpha_block, region)`` where the block
+    and region are exactly what the engine's reverse-alpha receives.
+    """
+    img = np.full((h, w, 3), float(bg), np.float32)
+    block, (ax, ay, gw, gh) = engine._fixed_alpha_map(img)
+    a = np.clip(block * opacity_gain, 0.0, 0.99)[:, :, None]
+    logo = np.array(engine.config.alpha_logo_bgr, np.float32)
+    img[ay : ay + gh, ax : ax + gw] = img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
+    return np.clip(img, 0, 255).astype(np.uint8), block, (ax, ay, gw, gh)
+
+
+def _body_vs_ring(out, region, block) -> tuple[float, float]:
+    """Median luma of the glyph body vs the local background ring in ``out``."""
+    ax, ay, gw, gh = region
+    g = out.astype(np.float32).mean(axis=2)
+    body = block >= 0.15
+    pad = max(4, int(gh * 0.6))
+    ry1, ry2 = max(0, ay - pad), min(g.shape[0], ay + gh + pad)
+    rx1, rx2 = max(0, ax - pad), min(g.shape[1], ax + gw + pad)
+    ring = g[ry1:ry2, rx1:rx2]
+    fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
+    ring_mask = np.ones(ring.shape, dtype=bool)
+    ring_mask[fy1:fy2, fx1:fx2] = False
+    core = float(np.median(g[ay : ay + gh, ax : ax + gw][body]))
+    return core, float(np.median(ring[ring_mask]))
+
+
+@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
+class TestOversubtractionGuard:
+    @pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4), (90, 0.5)])
+    def test_guard_trips_on_faint_mark(self, Engine, bg, gain):
+        eng = Engine()
+        wm, block, region = _compose(eng, bg, gain)
+        assert eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
+
+    @pytest.mark.parametrize("bg", [255, 200, 128, 60])
+    def test_guard_skips_clean_full_strength_mark(self, Engine, bg):
+        # A cleanly captured (gain 1.0) mark predicts back to the background, so the
+        # guard must NOT trip -- no regression of the common clean-removal path.
+        eng = Engine()
+        wm, block, region = _compose(eng, bg, 1.0)
+        assert not eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
+
+    @pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4)])
+    def test_faint_removal_leaves_no_dark_pit(self, Engine, bg, gain):
+        # End-to-end acceptance (roadmap P0#8): after removal the glyph footprint is
+        # not a region more than _OVERSUB_DARK_MARGIN below the local background.
+        eng = Engine()
+        wm, block, region = _compose(eng, bg, gain)
+        out = eng.remove_watermark_reverse_alpha(wm)
+        core, ring_bg = _body_vs_ring(out, region, block)
+        assert core >= ring_bg - _OVERSUB_DARK_MARGIN, f"dark pit: body {core:.0f} vs ring {ring_bg:.0f}"
+
+    def test_clean_mark_removal_unchanged_by_guard(self, Engine, monkeypatch):
+        # On a clean mark the guard must be a no-op: forcing it off yields the same
+        # output (the guard only ever diverts the over-subtraction case).
+        eng = Engine()
+        wm, _block, _region = _compose(eng, 200, 1.0)
+        guarded = eng.remove_watermark_reverse_alpha(wm)
+        monkeypatch.setattr(type(eng), "_reverse_alpha_oversubtracts", lambda self, *a, **k: False)
+        unguarded = eng.remove_watermark_reverse_alpha(wm)
+        assert np.array_equal(guarded, unguarded)
+
+
+@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
+def test_guard_recovers_pit_on_textured_background(Engine):
+    """The guard's footprint inpaint reconstructs from the ORIGINAL surroundings,
+    so a faint mark over-subtracted on a textured background recovers to roughly the
+    local content level rather than a glyph-shaped dark ghost."""
+    eng = Engine()
+    w = h = 1024
+    yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
+    base = 120 + 35 * np.sin(xx / 80.0) + 25 * np.cos(yy / 60.0)
+    bg_img = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
+    block, (ax, ay, gw, gh) = eng._fixed_alpha_map(bg_img)
+    a = np.clip(block * 0.45, 0.0, 0.99)[:, :, None]
+    logo = np.array(eng.config.alpha_logo_bgr, np.float32)
+    bg_img[ay : ay + gh, ax : ax + gw] = bg_img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
+    wm = np.clip(bg_img, 0, 255).astype(np.uint8)
+    out = eng.remove_watermark_reverse_alpha(wm).astype(np.float32)
+    # Compare the recovered glyph body to the clean texture under the mark.
+    clean = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
+    body = block >= 0.15
+    region_out = out[ay : ay + gh, ax : ax + gw].mean(axis=2)
+    region_clean = clean[ay : ay + gh, ax : ax + gw].mean(axis=2)
+    err = float(np.abs(region_out[body] - region_clean[body]).mean())
+    assert err < 25.0, f"glyph body not recovered (mean abs err {err:.1f})"