Detect text at native resolution (capped), fixing small-text recall on large images (#27)

The text-protection detector scaled every image to a fixed 736 px long side, so small text on large canvases (e.g. ~16 px on 2048) was downscaled below the detector and missed -> deformed by the SDXL pass (issue #14). Detect at the native long side capped at 1536, never upscaled (_detection_input_size, a pure unit-tested helper). Detection is script-agnostic (DB segments regions, not characters), so this is language-agnostic: a new benchmark (scripts/text_detection_benchmark.py) measures recall across Latin/Cyrillic/CJK/ Hangul/Arabic/digits x sizes x canvas -> overall hit-rate 0.91 -> 1.00, worst cell (2048/16 px) 0.06 -> 1.00. Docs updated. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-25 00:50:48 +02:00 · 2026-05-29 12:28:30 -07:00
parent 0c7ff1874e
commit ef6fdaeeec
5 changed files with 224 additions and 8 deletions
@@ -11,7 +11,50 @@ from __future__ import annotations

 import numpy as np

-from remove_ai_watermarks.text_protector import build_change_map
+from remove_ai_watermarks.text_protector import _DET_MAX_LONG_SIDE, _detection_input_size, build_change_map
+
+
+class TestDetectionInputSize:
+    """Resolution contract for the DB detector input (issue #14 recall fix).
+
+    A fixed small input (the old 736) downscaled large canvases so far that small
+    text fell below the detector's resolution and was missed. Detection now runs
+    at the native long side, capped and never upscaled.
+    """
+
+    def test_large_canvas_not_downscaled_to_old_736(self):
+        # The #14 regression: a 2048 canvas must detect well above the old 736
+        # so ~12-16 px text survives. Capped at the max long side.
+        in_w, in_h = _detection_input_size(2048, 2048)
+        assert in_w == _DET_MAX_LONG_SIDE
+        assert in_h == _DET_MAX_LONG_SIDE
+        assert in_w > 736  # the old fixed input that missed small text
+
+    def test_native_resolution_not_upscaled(self):
+        # A 1024 canvas detects at native 1024 (not upscaled to the cap, not
+        # downscaled to the old 736).
+        assert _detection_input_size(1024, 1024) == (1024, 1024)
+
+    def test_small_image_is_native(self):
+        assert _detection_input_size(512, 512) == (512, 512)
+
+    def test_dims_are_multiples_of_32(self):
+        for h, w in [(2048, 1024), (1234, 567), (4096, 4096), (1000, 1000)]:
+            in_w, in_h = _detection_input_size(h, w)
+            assert in_w % 32 == 0
+            assert in_h % 32 == 0
+
+    def test_aspect_ratio_preserved_when_capped(self):
+        # Portrait 2048x1024: long side capped to the max, short side scaled by
+        # the same factor (so the 2:1 aspect is roughly kept).
+        in_w, in_h = _detection_input_size(2048, 1024)
+        assert in_h == _DET_MAX_LONG_SIDE
+        assert abs((in_w / in_h) - 0.5) < 0.05
+
+    def test_floor_at_32(self):
+        in_w, in_h = _detection_input_size(10, 5)
+        assert in_w >= 32
+        assert in_h >= 32


 class TestBuildChangeMap: