mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-07-04 23:47:49 +02:00
fix(visible): over-subtraction guard for Doubao/Jimeng/Samsung text marks
Port the Gemini sparkle dark-pit guard (commit 41f6797) to the shared
TextMarkEngine reverse-alpha base (roadmap P0#8): on a dark or mid-tone
background the captured alpha can over-estimate this image's mark opacity, and
reverse-alpha leaves a darker-than-background glyph ghost instead of recovering
the true pixels. The sparkle-only fix left the text marks unhandled.
_reverse_alpha_oversubtracts predicts the reverse-alpha output PER PIXEL over the
glyph body from the INPUT ((obs - a*logo)/(1-a), the remover's own math); when
the predicted body lands more than _OVERSUB_DARK_MARGIN (25) gray levels below
the local background ring it abandons the reverse-alpha output for the footprint
and inpaints it from the original surroundings (_inpaint_footprint, wider dilate/
radius than the thin residual pass). Predicting per-pixel from the input (not the
produced output, which depends on which placement the remover picked) keeps a
cleanly captured full-strength mark byte-identical -- it predicts back to the
background everywhere, so the guard never trips on it (verified across all three
engines on white/mid/dark/midgray backgrounds).
Regression-guarded by tests/test_text_mark_oversubtraction.py: predicate True on
faint / False on clean, end-to-end no-dark-pit acceptance, clean-mark byte
identity, and textured-background footprint recovery.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -37,6 +37,28 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Reverse-alpha over-subtraction guard (ported from gemini_engine, 2026-06-20).
|
||||
# The reverse-alpha blend ``(wm - a*logo)/(1-a)`` over-subtracts when the captured
|
||||
# alpha over-estimates THIS image's mark opacity: on a dark or mid-tone background
|
||||
# it drives the glyph footprint into a visibly DARKER-than-background ghost (a
|
||||
# "dark pit") instead of recovering the true pixels. The retained-corpus mining
|
||||
# (2026-06-20) showed the sparkle-only fix (commit 41f6797) left this unhandled
|
||||
# for the Doubao/Jimeng text marks. Mirror the sparkle gate: when the recovered
|
||||
# glyph body lands more than this many gray levels below the local background
|
||||
# ring, abandon the reverse-alpha output for the footprint and inpaint it from
|
||||
# the surroundings instead. Calibrated to the same 25-level margin the sparkle
|
||||
# gate uses -- clean text-mark removals recover within ~10 of the ring, the dark
|
||||
# pit lands tens of levels below.
|
||||
_OVERSUB_DARK_MARGIN = 25.0
|
||||
# Glyph-body / background-ring sampling for the guard. The ring is a pad around
|
||||
# the glyph box (excluding the box); the body is the bright-core glyph pixels.
|
||||
_OVERSUB_RING_PAD_FRAC = 0.6 # ring pad as a fraction of the glyph-box height
|
||||
_OVERSUB_BODY_ALPHA_FLOOR = 0.15 # alpha above which a block pixel counts as glyph body
|
||||
# Footprint inpaint when the guard trips: dilate the glyph mask wider than the
|
||||
# thin residual pass so the whole darkened ghost is reconstructed, not just its edge.
|
||||
_OVERSUB_INPAINT_DILATE = 9
|
||||
_OVERSUB_INPAINT_RADIUS = 4
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TextMarkConfig:
|
||||
@@ -335,6 +357,74 @@ class TextMarkEngine:
|
||||
out[y1:y2, x1:x2] = np.clip((roi - a3 * logo) / np.clip(1.0 - a3, 0.25, 1.0), 0, 255).astype(np.uint8)
|
||||
return out
|
||||
|
||||
def _reverse_alpha_oversubtracts(
|
||||
self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
|
||||
) -> bool:
|
||||
"""True when reverse-alpha would darken the glyph footprint into a dark pit.
|
||||
|
||||
Ported from ``gemini_engine._reverse_alpha_oversubtracts`` (2026-06-20):
|
||||
PREDICT the reverse-alpha output at the bright glyph core directly from the
|
||||
INPUT and the captured alpha, ``(core_obs - a*logo)/(1-a)``, and trip when it
|
||||
lands more than ``_OVERSUB_DARK_MARGIN`` gray levels below the local
|
||||
background ring. Predicting from the input (not the produced output) keeps the
|
||||
gate independent of which placement the reverse-alpha picked, so a clean
|
||||
full-strength mark (whose strokes predict back to the background) never trips,
|
||||
while a mark fainter than the capture (over-subtracted into a ghost) does.
|
||||
"""
|
||||
ax, ay, gw, gh = region
|
||||
ih, iw = image.shape[:2]
|
||||
if gw < 4 or gh < 4:
|
||||
return False
|
||||
if float(amap.max()) < 0.2: # too faint a capture to over-subtract meaningfully
|
||||
return False
|
||||
body_box = amap >= _OVERSUB_BODY_ALPHA_FLOOR # glyph strokes
|
||||
if not bool(body_box.any()):
|
||||
return False
|
||||
pad = max(4, int(gh * _OVERSUB_RING_PAD_FRAC))
|
||||
ry1, ry2 = max(0, ay - pad), min(ih, ay + gh + pad)
|
||||
rx1, rx2 = max(0, ax - pad), min(iw, ax + gw + pad)
|
||||
ring = image[ry1:ry2, rx1:rx2].astype(np.float32).mean(axis=2)
|
||||
fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
|
||||
ring_mask = np.ones(ring.shape, dtype=bool)
|
||||
ring_mask[fy1:fy2, fx1:fx2] = False
|
||||
if int(ring_mask.sum()) < 10:
|
||||
return False
|
||||
# Predict the reverse-alpha output PER PIXEL over the glyph body -- exactly
|
||||
# the (obs - a*logo)/(1-a) math the remover applies -- so a cleanly captured
|
||||
# mark predicts back to the true background everywhere (no trip), while a mark
|
||||
# fainter than the capture predicts a body far below the local ring. The
|
||||
# per-pixel alpha (not a single peak value) keeps the prediction faithful
|
||||
# across the glyph's anti-aliased alpha gradient.
|
||||
obs = ring[fy1:fy2, fx1:fx2]
|
||||
a = np.clip(amap, 0.0, 0.99)
|
||||
logo = float(np.mean(self.config.alpha_logo_bgr))
|
||||
predicted = (obs - a * logo) / (1.0 - a)
|
||||
predicted_core = float(np.median(predicted[body_box]))
|
||||
bg = float(np.median(ring[ring_mask]))
|
||||
oversub = predicted_core < bg - _OVERSUB_DARK_MARGIN
|
||||
if oversub:
|
||||
logger.debug(
|
||||
"%s reverse-alpha over-subtracts: predicted core=%.1f bg=%.1f (margin %.0f) -> footprint inpaint",
|
||||
self.config.name,
|
||||
predicted_core,
|
||||
bg,
|
||||
_OVERSUB_DARK_MARGIN,
|
||||
)
|
||||
return oversub
|
||||
|
||||
def _inpaint_footprint(
|
||||
self, image: NDArray[Any], amap: NDArray[Any], region: tuple[int, int, int, int]
|
||||
) -> NDArray[Any]:
|
||||
"""Reconstruct the glyph footprint from its surroundings (used when
|
||||
reverse-alpha would over-subtract into a dark pit). Inpaints the ORIGINAL
|
||||
image over a dilated glyph mask, so the result never contains the darkened
|
||||
reverse-alpha pixels."""
|
||||
ax, ay, gw, gh = region
|
||||
mask = np.zeros(image.shape[:2], np.uint8)
|
||||
mask[ay : ay + gh, ax : ax + gw] = (amap > self.config.residual_alpha_floor).astype(np.uint8) * 255
|
||||
mask = cv2.dilate(mask, np.ones((_OVERSUB_INPAINT_DILATE, _OVERSUB_INPAINT_DILATE), np.uint8))
|
||||
return cv2.inpaint(image, mask, _OVERSUB_INPAINT_RADIUS, cv2.INPAINT_NS)
|
||||
|
||||
def remove_watermark_reverse_alpha(self, image: NDArray[Any], *, residual_inpaint: bool = True) -> NDArray[Any]:
|
||||
"""Recover the original pixels by inverting the alpha blend, then clear the
|
||||
residual outline with a thin inpaint over the glyph footprint.
|
||||
@@ -370,6 +460,13 @@ class TextMarkEngine:
|
||||
best_residual, best_out, best_amap, best_region = residual, out, amap, region
|
||||
if best_out is None or best_amap is None or best_region is None: # pragma: no cover - maps is non-empty
|
||||
return image.copy()
|
||||
# Over-subtraction guard: on a dark/mid-tone background the captured alpha can
|
||||
# over-estimate the mark's opacity and reverse-alpha leaves a darker-than-
|
||||
# background ghost. When the recovered glyph body sits far below the local
|
||||
# ring, reconstruct the footprint from its surroundings instead of shipping the
|
||||
# dark pit (the thin residual inpaint cannot fix a footprint-wide darkening).
|
||||
if self._reverse_alpha_oversubtracts(image, best_amap, best_region):
|
||||
return self._inpaint_footprint(image, best_amap, best_region)
|
||||
if residual_inpaint:
|
||||
# Embed the glyph-sized alpha block into a full-frame uint8 mask only for
|
||||
# the inpaint (cv2.inpaint needs a mask matching best_out). One uint8
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
"""Reverse-alpha over-subtraction guard for the visible text-mark engines.
|
||||
|
||||
Ported from the Gemini sparkle fix (commit 41f6797) to Doubao/Jimeng/Samsung
|
||||
(retained-corpus mining 2026-06-20, roadmap P0#8): on a dark or mid-tone
|
||||
background the captured alpha can over-estimate THIS image's mark opacity, and
|
||||
reverse-alpha leaves a darker-than-background glyph ghost (a "dark pit") instead
|
||||
of recovering the true pixels. The guard predicts the reverse-alpha output per
|
||||
pixel and, when the glyph body lands far below the local ring, reconstructs the
|
||||
footprint from the original surroundings instead of shipping the pit.
|
||||
|
||||
These assert visual residual (pixel levels vs the local background), not just a
|
||||
detector re-fire -- a dark pit can clear the NCC detector while still looking wrong.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from remove_ai_watermarks import image_io
|
||||
from remove_ai_watermarks._text_mark_engine import _OVERSUB_DARK_MARGIN
|
||||
from remove_ai_watermarks.doubao_engine import DoubaoEngine
|
||||
from remove_ai_watermarks.jimeng_engine import JimengEngine
|
||||
from remove_ai_watermarks.samsung_engine import SamsungEngine
|
||||
|
||||
_ENGINES = [DoubaoEngine, JimengEngine, SamsungEngine]
|
||||
|
||||
|
||||
def _compose(engine, bg: float, opacity_gain: float, w: int = 1024, h: int = 1024):
|
||||
"""Composite the engine's captured mark onto a flat ``bg`` at ``opacity_gain``.
|
||||
|
||||
``opacity_gain < 1`` makes the mark FAINTER than the capture, so reverse-alpha
|
||||
at the full captured alpha over-subtracts into a dark pit -- the case the guard
|
||||
must catch. Returns ``(watermarked_uint8, alpha_block, region)`` where the block
|
||||
and region are exactly what the engine's reverse-alpha receives.
|
||||
"""
|
||||
img = np.full((h, w, 3), float(bg), np.float32)
|
||||
block, (ax, ay, gw, gh) = engine._fixed_alpha_map(img)
|
||||
a = np.clip(block * opacity_gain, 0.0, 0.99)[:, :, None]
|
||||
logo = np.array(engine.config.alpha_logo_bgr, np.float32)
|
||||
img[ay : ay + gh, ax : ax + gw] = img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
|
||||
return np.clip(img, 0, 255).astype(np.uint8), block, (ax, ay, gw, gh)
|
||||
|
||||
|
||||
def _body_vs_ring(out, region, block) -> tuple[float, float]:
|
||||
"""Median luma of the glyph body vs the local background ring in ``out``."""
|
||||
ax, ay, gw, gh = region
|
||||
g = out.astype(np.float32).mean(axis=2)
|
||||
body = block >= 0.15
|
||||
pad = max(4, int(gh * 0.6))
|
||||
ry1, ry2 = max(0, ay - pad), min(g.shape[0], ay + gh + pad)
|
||||
rx1, rx2 = max(0, ax - pad), min(g.shape[1], ax + gw + pad)
|
||||
ring = g[ry1:ry2, rx1:rx2]
|
||||
fy1, fy2, fx1, fx2 = ay - ry1, ay - ry1 + gh, ax - rx1, ax - rx1 + gw
|
||||
ring_mask = np.ones(ring.shape, dtype=bool)
|
||||
ring_mask[fy1:fy2, fx1:fx2] = False
|
||||
core = float(np.median(g[ay : ay + gh, ax : ax + gw][body]))
|
||||
return core, float(np.median(ring[ring_mask]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
|
||||
class TestOversubtractionGuard:
|
||||
@pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4), (90, 0.5)])
|
||||
def test_guard_trips_on_faint_mark(self, Engine, bg, gain):
|
||||
eng = Engine()
|
||||
wm, block, region = _compose(eng, bg, gain)
|
||||
assert eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
|
||||
|
||||
@pytest.mark.parametrize("bg", [255, 200, 128, 60])
|
||||
def test_guard_skips_clean_full_strength_mark(self, Engine, bg):
|
||||
# A cleanly captured (gain 1.0) mark predicts back to the background, so the
|
||||
# guard must NOT trip -- no regression of the common clean-removal path.
|
||||
eng = Engine()
|
||||
wm, block, region = _compose(eng, bg, 1.0)
|
||||
assert not eng._reverse_alpha_oversubtracts(image_io.to_bgr(wm), block, region)
|
||||
|
||||
@pytest.mark.parametrize(("bg", "gain"), [(120, 0.45), (150, 0.4)])
|
||||
def test_faint_removal_leaves_no_dark_pit(self, Engine, bg, gain):
|
||||
# End-to-end acceptance (roadmap P0#8): after removal the glyph footprint is
|
||||
# not a region more than _OVERSUB_DARK_MARGIN below the local background.
|
||||
eng = Engine()
|
||||
wm, block, region = _compose(eng, bg, gain)
|
||||
out = eng.remove_watermark_reverse_alpha(wm)
|
||||
core, ring_bg = _body_vs_ring(out, region, block)
|
||||
assert core >= ring_bg - _OVERSUB_DARK_MARGIN, f"dark pit: body {core:.0f} vs ring {ring_bg:.0f}"
|
||||
|
||||
def test_clean_mark_removal_unchanged_by_guard(self, Engine, monkeypatch):
|
||||
# On a clean mark the guard must be a no-op: forcing it off yields the same
|
||||
# output (the guard only ever diverts the over-subtraction case).
|
||||
eng = Engine()
|
||||
wm, _block, _region = _compose(eng, 200, 1.0)
|
||||
guarded = eng.remove_watermark_reverse_alpha(wm)
|
||||
monkeypatch.setattr(type(eng), "_reverse_alpha_oversubtracts", lambda self, *a, **k: False)
|
||||
unguarded = eng.remove_watermark_reverse_alpha(wm)
|
||||
assert np.array_equal(guarded, unguarded)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Engine", _ENGINES, ids=lambda e: e.__name__)
|
||||
def test_guard_recovers_pit_on_textured_background(Engine):
|
||||
"""The guard's footprint inpaint reconstructs from the ORIGINAL surroundings,
|
||||
so a faint mark over-subtracted on a textured background recovers to roughly the
|
||||
local content level rather than a glyph-shaped dark ghost."""
|
||||
eng = Engine()
|
||||
w = h = 1024
|
||||
yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
|
||||
base = 120 + 35 * np.sin(xx / 80.0) + 25 * np.cos(yy / 60.0)
|
||||
bg_img = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
|
||||
block, (ax, ay, gw, gh) = eng._fixed_alpha_map(bg_img)
|
||||
a = np.clip(block * 0.45, 0.0, 0.99)[:, :, None]
|
||||
logo = np.array(eng.config.alpha_logo_bgr, np.float32)
|
||||
bg_img[ay : ay + gh, ax : ax + gw] = bg_img[ay : ay + gh, ax : ax + gw] * (1 - a) + logo * a
|
||||
wm = np.clip(bg_img, 0, 255).astype(np.uint8)
|
||||
out = eng.remove_watermark_reverse_alpha(wm).astype(np.float32)
|
||||
# Compare the recovered glyph body to the clean texture under the mark.
|
||||
clean = np.clip(np.stack([base, base * 0.95, base * 1.05], axis=-1), 0, 255)
|
||||
body = block >= 0.15
|
||||
region_out = out[ay : ay + gh, ax : ax + gw].mean(axis=2)
|
||||
region_clean = clean[ay : ay + gh, ax : ax + gw].mean(axis=2)
|
||||
err = float(np.abs(region_out[body] - region_clean[body]).mean())
|
||||
assert err < 25.0, f"glyph body not recovered (mean abs err {err:.1f})"
|
||||
Reference in New Issue
Block a user