Files
remove-ai-watermarks/tests/test_text_mark_memory.py
Victor Kuznetsov 99e57c872f perf(text-mark): footprint-sized arrays in reverse-alpha CPU path
The reverse-alpha text-mark engine (Doubao/Jimeng/Samsung) allocated
full-frame arrays where only the glyph footprint is ever read:

  - _fixed_alpha_map / _aligned_alpha_map each built a full (h, w) float32
    alpha map non-zero only inside the glyph box, and two were held at once
    during removal (~96 MB of mostly-zeros on a 12 MP frame);
  - extract_mask built a full (h, w) uint8 mask that every caller cropped to
    the located box (~12 MB, rebuilt per text-mark detector on the
    memory-tight identify path).

Both now return footprint-sized arrays: the alpha helpers return the
glyph-sized block plus its placement (ax, ay, gw, gh), and extract_mask
returns the box-sized mask. _apply_reverse_alpha consumes the block
directly; the residual inpaint embeds it into one full-frame uint8 mask only
at cv2.inpaint time (which needs a full-frame mask). remove_watermark_
reverse_alpha tracks the winning region alongside best_amap to place it.

Peak allocation drops from O(image*4)x2 + O(image) to O(footprint)x2 +
one gated O(image*1) uint8 mask -- a win every consumer gets, motivated by
the 512 MB raiw.cc worker that OOMs on large decodes. GPU path untouched.

Byte-identical to the old full-frame path (verified: 17 output hashes
across the three engines, inpaint/no-inpaint, detect, and the real
doubao-1.png fixture, unchanged before/after). tests/test_text_mark_memory.py
guards it by reconstructing the old full-frame path inline and asserting
equality, so the proof survives a cv2/asset bump, and pins the O(footprint)
shape so a regression to full-frame fails loudly.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-19 10:01:07 -07:00

135 lines
5.9 KiB
Python

"""Byte-identity guards for the text-mark engine memory optimization.
The reverse-alpha text-mark engine used to allocate full-frame arrays where only
the glyph footprint is ever read:
* ``extract_mask`` built a full ``(h, w)`` uint8 mask and every caller cropped
it to the located box;
* ``_fixed_alpha_map`` / ``_aligned_alpha_map`` each built a full ``(h, w)``
float32 alpha map that is non-zero only inside the glyph box, and two were
held at once during removal.
Both now return footprint-sized arrays. These tests prove the new footprint-sized
path is BYTE-IDENTICAL to the old full-frame path by reconstructing the old
behavior inline from the new building blocks (so the proof survives a cv2/asset
version bump, unlike a pinned output hash), and lock in the O(footprint) memory
characteristic so a regression back to a full-frame allocation fails loudly.
"""
from __future__ import annotations
import cv2
import numpy as np
import pytest
import remove_ai_watermarks.doubao_engine as D
import remove_ai_watermarks.jimeng_engine as J
import remove_ai_watermarks.samsung_engine as S
from remove_ai_watermarks.doubao_engine import DoubaoEngine
from remove_ai_watermarks.jimeng_engine import JimengEngine
from remove_ai_watermarks.samsung_engine import SamsungEngine
# (engine factory, engine module) for each reverse-alpha text mark.
ENGINES = [
pytest.param(DoubaoEngine, D, id="doubao"),
pytest.param(JimengEngine, J, id="jimeng"),
pytest.param(SamsungEngine, S, id="samsung"),
]
def _watermarked(engine, module) -> np.ndarray:
"""Composite the engine's real alpha glyph onto a flat mid-gray field at the
captured native width (so both placement candidates fire)."""
cfg = engine.config
nw = module._ALPHA_NATIVE_WIDTH
at = module._alpha_template()
gw, gh = int(cfg.alpha_width_frac * nw), int(cfg.alpha_height_frac * nw)
ax = (nw - int(cfg.alpha_margin_x_frac * nw) - gw) if cfg.corner == "br" else int(cfg.alpha_margin_x_frac * nw)
ay = nw - int(cfg.alpha_margin_bottom_frac * nw) - gh
amap = np.zeros((nw, nw), np.float32)
amap[ay : ay + gh, ax : ax + gw] = cv2.resize(at, (gw, gh))
a3 = amap[:, :, None]
img = np.full((nw, nw, 3), 100.0, np.float32)
return (a3 * np.array(cfg.alpha_logo_bgr, np.float32) + (1 - a3) * img).clip(0, 255).astype(np.uint8)
@pytest.mark.parametrize(("factory", "module"), ENGINES)
class TestExtractMaskFootprint:
def test_returns_box_sized_mask(self, factory, module):
eng = factory()
img = _watermarked(eng, module)
loc = eng.locate(img)
box = eng.extract_mask(img, loc)
assert box.dtype == np.uint8
# Shape == loc.bbox, i.e. the old full-frame mask's [y:y+bh, x:x+bw] crop.
assert box.shape == (loc.h, loc.w)
# Footprint, not full frame: the box is a tiny fraction of the image.
assert box.size * 4 < img.shape[0] * img.shape[1]
@pytest.mark.parametrize(("factory", "module"), ENGINES)
class TestAlphaMapFootprint:
def test_maps_are_footprint_sized_blocks(self, factory, module):
eng = factory()
img = _watermarked(eng, module)
for placed in (eng._fixed_alpha_map(img), eng._aligned_alpha_map(img)):
assert placed is not None
block, (ax, ay, gw, gh) = placed
assert block.dtype == np.float32
assert block.shape == (gh, gw)
# The placement stays fully inside the image (no clipping needed).
assert ax >= 0
assert ax + gw <= img.shape[1]
assert ay >= 0
assert ay + gh <= img.shape[0]
# O(footprint): far smaller than the frame.
assert block.size * 4 < img.shape[0] * img.shape[1]
def test_apply_reverse_alpha_equals_old_fullframe(self, factory, module):
"""``_apply_reverse_alpha`` with the glyph block is byte-identical to the
old full-frame path: rebuild the full ``(h, w)`` map, run the old-style
full-frame reverse-alpha, and compare to the new block-based output."""
eng = factory()
img = _watermarked(eng, module)
h, w = img.shape[:2]
for placed in (eng._fixed_alpha_map(img), eng._aligned_alpha_map(img)):
assert placed is not None
block, region = placed
ax, ay, gw, gh = region
new_out = eng._apply_reverse_alpha(img, block, region)
# Old behavior: a full-frame map, indexed by region inside _apply_reverse_alpha.
full = np.zeros((h, w), np.float32)
full[ay : ay + gh, ax : ax + gw] = block
old_out = img.copy()
a3 = np.clip(full[ay : ay + gh, ax : ax + gw], 0.0, 1.0)[:, :, None]
logo = np.array(eng.config.alpha_logo_bgr, np.float32)
roi = old_out[ay : ay + gh, ax : ax + gw].astype(np.float32)
old_out[ay : ay + gh, ax : ax + gw] = np.clip(
(roi - a3 * logo) / np.clip(1.0 - a3, 0.25, 1.0), 0, 255
).astype(np.uint8)
assert np.array_equal(new_out, old_out)
def test_residual_mask_equals_old_fullframe(self, factory, module):
"""The residual inpaint mask built from the block embedded in a full-frame
canvas equals thresholding the old full-frame float32 map (zero outside the
block), so the dilate + inpaint see the same mask."""
eng = factory()
img = _watermarked(eng, module)
h, w = img.shape[:2]
cfg = eng.config
block, (ax, ay, gw, gh) = eng._fixed_alpha_map(img)
# New: embed the block into a uint8 canvas, then threshold.
new_mask = np.zeros((h, w), np.uint8)
new_mask[ay : ay + gh, ax : ax + gw] = (block > cfg.residual_alpha_floor).astype(np.uint8) * 255
# Old: a full-frame float32 map, thresholded everywhere.
old_full = np.zeros((h, w), np.float32)
old_full[ay : ay + gh, ax : ax + gw] = block
old_mask = (old_full > cfg.residual_alpha_floor).astype(np.uint8) * 255
assert np.array_equal(new_mask, old_mask)