Detect text at native resolution (capped), fixing small-text recall on large images (#27)

The text-protection detector scaled every image to a fixed 736 px long side, so
small text on large canvases (e.g. ~16 px on 2048) was downscaled below the
detector and missed -> deformed by the SDXL pass (issue #14). Detect at the
native long side capped at 1536, never upscaled (_detection_input_size, a pure
unit-tested helper). Detection is script-agnostic (DB segments regions, not
characters), so this is language-agnostic: a new benchmark
(scripts/text_detection_benchmark.py) measures recall across Latin/Cyrillic/CJK/
Hangul/Arabic/digits x sizes x canvas -> overall hit-rate 0.91 -> 1.00, worst
cell (2048/16 px) 0.06 -> 1.00. Docs updated.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Victor Kuznetsov
2026-05-29 12:28:30 -07:00
committed by GitHub
parent 0c7ff1874e
commit ef6fdaeeec
5 changed files with 224 additions and 8 deletions
+44 -1
View File
@@ -11,7 +11,50 @@ from __future__ import annotations
import numpy as np
from remove_ai_watermarks.text_protector import build_change_map
from remove_ai_watermarks.text_protector import _DET_MAX_LONG_SIDE, _detection_input_size, build_change_map
class TestDetectionInputSize:
"""Resolution contract for the DB detector input (issue #14 recall fix).
A fixed small input (the old 736) downscaled large canvases so far that small
text fell below the detector's resolution and was missed. Detection now runs
at the native long side, capped and never upscaled.
"""
def test_large_canvas_not_downscaled_to_old_736(self):
# The #14 regression: a 2048 canvas must detect well above the old 736
# so ~12-16 px text survives. Capped at the max long side.
in_w, in_h = _detection_input_size(2048, 2048)
assert in_w == _DET_MAX_LONG_SIDE
assert in_h == _DET_MAX_LONG_SIDE
assert in_w > 736 # the old fixed input that missed small text
def test_native_resolution_not_upscaled(self):
# A 1024 canvas detects at native 1024 (not upscaled to the cap, not
# downscaled to the old 736).
assert _detection_input_size(1024, 1024) == (1024, 1024)
def test_small_image_is_native(self):
assert _detection_input_size(512, 512) == (512, 512)
def test_dims_are_multiples_of_32(self):
for h, w in [(2048, 1024), (1234, 567), (4096, 4096), (1000, 1000)]:
in_w, in_h = _detection_input_size(h, w)
assert in_w % 32 == 0
assert in_h % 32 == 0
def test_aspect_ratio_preserved_when_capped(self):
# Portrait 2048x1024: long side capped to the max, short side scaled by
# the same factor (so the 2:1 aspect is roughly kept).
in_w, in_h = _detection_input_size(2048, 1024)
assert in_h == _DET_MAX_LONG_SIDE
assert abs((in_w / in_h) - 0.5) < 0.05
def test_floor_at_32(self):
in_w, in_h = _detection_input_size(10, 5)
assert in_w >= 32
assert in_h >= 32
class TestBuildChangeMap: