fix(visible): over-subtraction guard for Doubao/Jimeng/Samsung text marks

Port the Gemini sparkle dark-pit guard (commit 41f6797) to the shared
TextMarkEngine reverse-alpha base (roadmap P0#8): on a dark or mid-tone
background the captured alpha can over-estimate this image's mark opacity, and
reverse-alpha leaves a darker-than-background glyph ghost instead of recovering
the true pixels. The sparkle-only fix left the text marks unhandled.

_reverse_alpha_oversubtracts predicts the reverse-alpha output PER PIXEL over the
glyph body from the INPUT ((obs - a*logo)/(1-a), the remover's own math); when
the predicted body lands more than _OVERSUB_DARK_MARGIN (25) gray levels below
the local background ring it abandons the reverse-alpha output for the footprint
and inpaints it from the original surroundings (_inpaint_footprint, wider dilate/
radius than the thin residual pass). Predicting per-pixel from the input (not the
produced output, which depends on which placement the remover picked) keeps a
cleanly captured full-strength mark byte-identical -- it predicts back to the
background everywhere, so the guard never trips on it (verified across all three
engines on white/mid/dark/midgray backgrounds).

Regression-guarded by tests/test_text_mark_oversubtraction.py: predicate True on
faint / False on clean, end-to-end no-dark-pit acceptance, clean-mark byte
identity, and textured-background footprint recovery.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Victor Kuznetsov
2026-06-20 15:34:39 -07:00
parent 0c215b5b2f
commit 33fddbc6fa
2 changed files with 217 additions and 0 deletions
@@ -37,6 +37,28 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
# Reverse-alpha over-subtraction guard (ported from gemini_engine, 2026-06-20).
# The reverse-alpha blend ``(wm - a*logo)/(1-a)`` over-subtracts when the captured
# alpha over-estimates THIS image's mark opacity: on a dark or mid-tone background
# it drives the glyph footprint into a visibly DARKER-than-background ghost (a
# "dark pit") instead of recovering the true pixels. The retained-corpus mining
# (2026-06-20) showed the sparkle-only fix (commit 41f6797) left this unhandled
# for the Doubao/Jimeng text marks. Mirror the sparkle gate: when the recovered
# glyph body lands more than this many gray levels below the local background
# ring, abandon the reverse-alpha output for the footprint and inpaint it from
# the surroundings instead. Calibrated to the same 25-level margin the sparkle
# gate uses -- clean text-mark removals recover within ~10 of the ring, the dark
# pit lands tens of levels below.
_OVERSUB_DARK_MARGIN = 25.0
# Glyph-body / background-ring sampling for the guard. The ring is a pad around
# the glyph box (excluding the box); the body is the bright-core glyph pixels.
_OVERSUB_RING_PAD_FRAC = 0.6 # ring pad as a fraction of the glyph-box height
_OVERSUB_BODY_ALPHA_FLOOR = 0.15 # alpha above which a block pixel counts as glyph body
# Footprint inpaint when the guard trips: dilate the glyph mask wider than the
# thin residual pass so the whole darkened ghost is reconstructed, not just its edge.
_OVERSUB_INPAINT_DILATE = 9
_OVERSUB_INPAINT_RADIUS = 4
@dataclass(frozen=True)
class TextMarkConfig:
@@ -335,6 +357,74 @@ class TextMarkEngine:
out[y1:y2, x1:x2] = np.clip((roi - a3 * logo) / np.clip(1.0 - a3, 0.25, 1.0), 0, 255).astype(np.uint8)
return out
def _reverse_alpha_oversubtracts(
self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
) -> bool:
"""True when reverse-alpha would darken the glyph footprint into a dark pit.
Ported from ``gemini_engine._reverse_alpha_oversubtracts`` (2026-06-20):
PREDICT the reverse-alpha output at the bright glyph core directly from the
INPUT and the captured alpha, ``(core_obs - a*logo)/(1-a)``, and trip when it
lands more than ``_OVERSUB_DARK_MARGIN`` gray levels below the local
background ring. Predicting from the input (not the produced output) keeps the
gate independent of which placement the reverse-alpha picked, so a clean
full-strength mark (whose strokes predict back to the background) never trips,
while a mark fainter than the capture (over-subtracted into a ghost) does.
"""
ax, ay, gw, gh = region
ih, iw = image.shape[:2]
if gw < 4 or gh < 4:
return False
if float(amap.max()) < 0.2: # too faint a capture to over-subtract meaningfully
return False
body_box = amap >= _OVERSUB_BODY_ALPHA_FLOOR # glyph strokes
if not bool(body_box.any()):
return False
pad = max(4, int(gh * _OVERSUB_RING_PAD_FRAC))
ry1, ry2 = max(0, ay - pad), min(ih, ay + gh + pad)
rx1, rx2 = max(0, ax - pad), min(iw, ax + gw + pad)
ring = image[ry1:ry2, rx1:rx2].astype(np.float32).mean(axis=2)
fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
ring_mask = np.ones(ring.shape, dtype=bool)
ring_mask[fy1:fy2, fx1:fx2] = False
if int(ring_mask.sum()) < 10:
return False
# Predict the reverse-alpha output PER PIXEL over the glyph body -- exactly
# the (obs - a*logo)/(1-a) math the remover applies -- so a cleanly captured
# mark predicts back to the true background everywhere (no trip), while a mark
# fainter than the capture predicts a body far below the local ring. The
# per-pixel alpha (not a single peak value) keeps the prediction faithful
# across the glyph's anti-aliased alpha gradient.
obs = ring[fy1:fy2, fx1:fx2]
a = np.clip(amap, 0.0, 0.99)
logo = float(np.mean(self.config.alpha_logo_bgr))
predicted = (obs - a * logo) / (1.0 - a)
predicted_core = float(np.median(predicted[body_box]))
bg = float(np.median(ring[ring_mask]))
oversub = predicted_core < bg - _OVERSUB_DARK_MARGIN
if oversub:
logger.debug(
"%s reverse-alpha over-subtracts: predicted core=%.1f bg=%.1f (margin %.0f) -> footprint inpaint",
self.config.name,
predicted_core,
bg,
_OVERSUB_DARK_MARGIN,
)
return oversub
def _inpaint_footprint(
self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
) -> NDArray[Any]:
"""Reconstruct the glyph footprint from its surroundings (used when
reverse-alpha would over-subtract into a dark pit). Inpaints the ORIGINAL
image over a dilated glyph mask, so the result never contains the darkened
reverse-alpha pixels."""
ax, ay, gw, gh = region
mask = np.zeros(image.shape[:2], np.uint8)
mask[ay : ay + gh, ax : ax + gw] = (amap > self.config.residual_alpha_floor).astype(np.uint8) * 255
mask = cv2.dilate(mask, np.ones((_OVERSUB_INPAINT_DILATE, _OVERSUB_INPAINT_DILATE), np.uint8))
return cv2.inpaint(image, mask, _OVERSUB_INPAINT_RADIUS, cv2.INPAINT_NS)
def remove_watermark_reverse_alpha(self, image: NDArray[Any], *, residual_inpaint: bool = True) -> NDArray[Any]:
"""Recover the original pixels by inverting the alpha blend, then clear the
residual outline with a thin inpaint over the glyph footprint.
@@ -370,6 +460,13 @@ class TextMarkEngine:
best_residual, best_out, best_amap, best_region = residual, out, amap, region
if best_out is None or best_amap is None or best_region is None: # pragma: no cover - maps is non-empty
return image.copy()
# Over-subtraction guard: on a dark/mid-tone background the captured alpha can
# over-estimate the mark's opacity and reverse-alpha leaves a darker-than-
# background ghost. When the recovered glyph body sits far below the local
# ring, reconstruct the footprint from its surroundings instead of shipping the
# dark pit (the thin residual inpaint cannot fix a footprint-wide darkening).
if self._reverse_alpha_oversubtracts(image, best_amap, best_region):
return self._inpaint_footprint(image, best_amap, best_region)
if residual_inpaint:
# Embed the glyph-sized alpha block into a full-frame uint8 mask only for
# the inpaint (cv2.inpaint needs a mask matching best_out). One uint8
+120
View File
@@ -0,0 +1,120 @@
"""Reverse-alpha over-subtraction guard for the visible text-mark engines.
Ported from the Gemini sparkle fix (commit 41f6797) to Doubao/Jimeng/Samsung
(retained-corpus mining 2026-06-20, roadmap P0#8): on a dark or mid-tone
background the captured alpha can over-estimate THIS image's mark opacity, and
reverse-alpha leaves a darker-than-background glyph ghost (a "dark pit") instead
of recovering the true pixels. The guard predicts the reverse-alpha output per
pixel and, when the glyph body lands far below the local ring, reconstructs the
footprint from the original surroundings instead of shipping the pit.
These assert visual residual (pixel levels vs the local background), not just a
detector re-fire -- a dark pit can clear the NCC detector while still looking wrong.
"""
from __future__ import annotations
import numpy as np
import pytest
from remove_ai_watermarks import image_io
from remove_ai_watermarks._text_mark_engine import _OVERSUB_DARK_MARGIN
from remove_ai_watermarks.doubao_engine import DoubaoEngine
from remove_ai_watermarks.jimeng_engine import JimengEngine
from remove_ai_watermarks.samsung_engine import SamsungEngine
_ENGINES = [DoubaoEngine, JimengEngine, SamsungEngine]
def _compose(engine, bg: float, opacity_gain: float, w: int = 1024, h: int = 1024):
"""Composite the engine's captured mark onto a flat ``bg`` at ``opacity_gain``.
``opacity_gain < 1`` makes the mark FAINTER than the capture, so reverse-alpha
at the full captured alpha over-subtracts into a dark pit -- the case the guard
must catch. Returns ``(watermarked_uint8, alpha_block, region)`` where the block
and region are exactly what the engine's reverse-alpha receives.
"""
img = np.full((h, w, 3), float(bg), np.float32)
block, (ax, ay, gw, gh) = engine._fixed_alpha_map(img)
a = np.clip(block * opacity_gain, 0.0, 0.99)[:, :, None]
logo = np.array(engine.config.alpha_logo_bgr, np.float32)
img[ay : ay + gh, ax : ax + gw] = img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
return np.clip(img, 0, 255).astype(np.uint8), block, (ax, ay, gw, gh)
def _body_vs_ring(out, region, block) -> tuple[float, float]:
"""Median luma of the glyph body vs the local background ring in ``out``."""
ax, ay, gw, gh = region
g = out.astype(np.float32).mean(axis=2)
body = block >= 0.15
pad = max(4, int(gh * 0.6))
ry1, ry2 = max(0, ay - pad), min(g.shape[0], ay + gh + pad)
rx1, rx2 = max(0, ax - pad), min(g.shape[1], ax + gw + pad)
ring = g[ry1:ry2, rx1:rx2]
fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
ring_mask = np.ones(ring.shape, dtype=bool)
ring_mask[fy1:fy2, fx1:fx2] = False
core = float(np.median(g[ay : ay + gh, ax : ax + gw][body]))
return core, float(np.median(ring[ring_mask]))
@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
class TestOversubtractionGuard:
@pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4), (90, 0.5)])
def test_guard_trips_on_faint_mark(self, Engine, bg, gain):
eng = Engine()
wm, block, region = _compose(eng, bg, gain)
assert eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
@pytest.mark.parametrize("bg", [255, 200, 128, 60])
def test_guard_skips_clean_full_strength_mark(self, Engine, bg):
# A cleanly captured (gain 1.0) mark predicts back to the background, so the
# guard must NOT trip -- no regression of the common clean-removal path.
eng = Engine()
wm, block, region = _compose(eng, bg, 1.0)
assert not eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
@pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4)])
def test_faint_removal_leaves_no_dark_pit(self, Engine, bg, gain):
# End-to-end acceptance (roadmap P0#8): after removal the glyph footprint is
# not a region more than _OVERSUB_DARK_MARGIN below the local background.
eng = Engine()
wm, block, region = _compose(eng, bg, gain)
out = eng.remove_watermark_reverse_alpha(wm)
core, ring_bg = _body_vs_ring(out, region, block)
assert core >= ring_bg - _OVERSUB_DARK_MARGIN, f"dark pit: body {core:.0f} vs ring {ring_bg:.0f}"
def test_clean_mark_removal_unchanged_by_guard(self, Engine, monkeypatch):
# On a clean mark the guard must be a no-op: forcing it off yields the same
# output (the guard only ever diverts the over-subtraction case).
eng = Engine()
wm, _block, _region = _compose(eng, 200, 1.0)
guarded = eng.remove_watermark_reverse_alpha(wm)
monkeypatch.setattr(type(eng), "_reverse_alpha_oversubtracts", lambda self, *a, **k: False)
unguarded = eng.remove_watermark_reverse_alpha(wm)
assert np.array_equal(guarded, unguarded)
@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
def test_guard_recovers_pit_on_textured_background(Engine):
"""The guard's footprint inpaint reconstructs from the ORIGINAL surroundings,
so a faint mark over-subtracted on a textured background recovers to roughly the
local content level rather than a glyph-shaped dark ghost."""
eng = Engine()
w = h = 1024
yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
base = 120 + 35 * np.sin(xx / 80.0) + 25 * np.cos(yy / 60.0)
bg_img = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
block, (ax, ay, gw, gh) = eng._fixed_alpha_map(bg_img)
a = np.clip(block * 0.45, 0.0, 0.99)[:, :, None]
logo = np.array(eng.config.alpha_logo_bgr, np.float32)
bg_img[ay : ay + gh, ax : ax + gw] = bg_img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
wm = np.clip(bg_img, 0, 255).astype(np.uint8)
out = eng.remove_watermark_reverse_alpha(wm).astype(np.float32)
# Compare the recovered glyph body to the clean texture under the mark.
clean = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
body = block >= 0.15
region_out = out[ay : ay + gh, ax : ax + gw].mean(axis=2)
region_clean = clean[ay : ay + gh, ax : ax + gw].mean(axis=2)
err = float(np.abs(region_out[body] - region_clean[body]).mean())
assert err < 25.0, f"glyph body not recovered (mean abs err {err:.1f})"