fix(gemini): rescue small corner sparkle buried by the size weight (#36)

detect_watermark's size-weighted global NCC search lets a larger, mediocre
match (e.g. a bright collar in a portrait) outrank a small, near-perfect
sparkle in the bottom-right corner, so a faint sparkle on a busy background
scored below threshold and the image read as clean -- the regression from
widening the search window 256px->512px between v0.7.2 and v0.8.8.

Add _corner_promote: a bottom-right-corner raw-NCC pass that overrides the
global pick when the corner holds a match with raw NCC >= 0.85 that beats it.
It only ever replaces a lower-fidelity pick (cannot weaken an existing
detection) and keeps the wider window for variant margins. The corner side is
relative-clamped (0.20 of the short side, [96, 384]) so it stays a true corner
at every scale: a fixed 256px covers ~70% of a small portrait, where a real
photo raw-matches the star at ~0.81; relative tightening drops that to ~0.69.
The 0.85 gate sits between the worst real-photo corner match (~0.78) and a
genuine faint sparkle (~0.93): zero false positives across native + downscaled
negatives, headshot rescued from below-threshold to 0.71.

Factor the shared multi-scale matchTemplate loop into _scan_scales.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Victor Kuznetsov
2026-06-03 16:51:03 -07:00
parent 07c96bed53
commit 175609b60a
3 changed files with 160 additions and 16 deletions
+1 -1
View File
File diff suppressed because one or more lines are too long
+94 -15
View File
@@ -29,6 +29,8 @@ import cv2
import numpy as np
if TYPE_CHECKING:
from collections.abc import Iterator
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
@@ -138,6 +140,34 @@ class GeminiEngine:
# gate separates them with a wide margin.
_OVERSUB_FOOTPRINT_FRAC = 0.05
# Corner promotion (issue #36): the size weight that suppresses tiny-patch
# false positives also buries a small, near-perfect sparkle when a larger,
# mediocre match sits elsewhere (e.g. a bright collar in a portrait). A small
# faint sparkle on a busy background therefore loses the global argmax and the
# image reads as clean -- the regression osachub reported when the search
# window widened 256px -> 512px (v0.7.2's tighter window still found it).
# Remedy: if the bottom-right corner holds a very-high-fidelity raw-NCC match,
# trust it regardless of size, without reverting the wider window (which is
# needed for variant margins). The threshold sits midway between the worst
# real-photo corner match (~0.78 across native + downscaled real photos) and a
# genuine faint sparkle (~0.93), so it adds true detections without adding
# false ones; it only ever overrides a lower-fidelity global pick, so it cannot
# weaken an existing detection.
_CORNER_PROMOTE_NCC = 0.85
# Bottom-right corner side for the promotion search, as a fraction of the
# image's short side, clamped to an absolute pixel band. Relative so the corner
# stays a true corner at every scale: a fixed 256 px is a genuine corner on a
# large image but covers ~70% of a small portrait, where a busy real photo can
# then raw-match the star template at ~0.81 (only 0.04 below the promote gate).
# Scaling the side down on small images drops that worst case to ~0.69, while
# the upper clamp stops it ballooning on huge images (more corner area = more
# random texture to false-match -- a real photo reached ~0.83 at 512 px). The
# Gemini sparkle sits ~60-160 px from the corner (fixed margins, not
# proportional), and the [96, 384] band covers that at every measured size.
_CORNER_PROMOTE_FRAC = 0.20
_CORNER_PROMOTE_MIN = 96
_CORNER_PROMOTE_MAX = 384
def __init__(self, logo_value: float = 255.0) -> None:
"""Initialize the engine with embedded alpha maps.
@@ -183,6 +213,24 @@ class GeminiEngine:
# ── Detection ────────────────────────────────────────────────────
def _scan_scales(self, gray: NDArray[Any]) -> Iterator[tuple[int, float, tuple[int, int]]]:
"""Yield ``(scale, max_ncc, max_loc)`` for the alpha template matched at each scale.
Shared multi-scale ``TM_CCOEFF_NORMED`` primitive over a normalized [0, 1]
grayscale region, used by both the size-weighted global search in
``detect_watermark`` and the raw-NCC corner pass in ``_corner_promote`` --
each applies its own scoring/argmax to the yielded values. The 96x96
``_alpha_large`` is the high-quality source downscaled per scale; the range
covers aggressively downscaled to slightly upscaled logos.
"""
for scale in range(16, 120, 2):
if scale > gray.shape[0] or scale > gray.shape[1]:
continue
tmpl = cv2.resize(self._alpha_large, (scale, scale), interpolation=cv2.INTER_AREA)
match_res = cv2.matchTemplate(gray, tmpl, cv2.TM_CCOEFF_NORMED)
_, max_val, _, max_loc = cv2.minMaxLoc(match_res)
yield scale, float(max_val), max_loc
def detect_watermark(
self,
image: NDArray[Any],
@@ -198,9 +246,6 @@ class GeminiEngine:
base_size = force_size or get_watermark_size(w, h)
result.size = base_size
# Use large alpha template (96x96) as the high-quality source for downscaling
source_alpha = self._alpha_large
# Dynamically search bottom-right corner. 512 covers up to 512px from the
# corner -- enough for known Gemini margin variations (standard: 64+96=160px;
# observed variants up to ~300px). 256 was too tight and caused misses.
@@ -216,25 +261,15 @@ class GeminiEngine:
gray_sr_f = gray_sr.astype(np.float32) / 255.0
# Phase 1 & 2: Multi-scale spatial NCC search
# Phase 1 & 2: multi-scale spatial NCC search, size-weighted argmax.
best_scale = 0
best_score = -1.0
best_raw_ncc = -1.0
best_loc = (0, 0)
# Search scales from 16 to 120 (covering aggressively downscaled or slightly upscaled logos)
for scale in range(16, 120, 2):
if scale > search_region.shape[0] or scale > search_region.shape[1]:
continue
tmpl = cv2.resize(source_alpha, (scale, scale), interpolation=cv2.INTER_AREA)
match_res = cv2.matchTemplate(gray_sr_f, tmpl, cv2.TM_CCOEFF_NORMED)
_, max_val, _, max_loc = cv2.minMaxLoc(match_res)
for scale, max_val, max_loc in self._scan_scales(gray_sr_f):
# Size-adjusted score to overcome NCC bias toward tiny patches (mimics C++ weight)
weight = min(1.0, (scale / 96.0) ** 0.5)
adj_val = max_val * weight
if adj_val > best_score:
best_score = adj_val
best_scale = scale
@@ -244,6 +279,14 @@ class GeminiEngine:
# Exact dynamic location & size
pos_x = sx1 + best_loc[0]
pos_y = sy1 + best_loc[1]
# Corner promotion: a near-perfect but small sparkle in the bottom-right
# corner is otherwise outranked by a larger, mediocre size-weighted match
# (see _CORNER_PROMOTE_NCC). Override the global pick with it when present.
promoted = self._corner_promote(image, best_raw_ncc)
if promoted is not None:
best_scale, pos_x, pos_y, best_raw_ncc = promoted
result.region = (pos_x, pos_y, best_scale, best_scale)
result.spatial_score = float(best_raw_ncc)
@@ -317,6 +360,42 @@ class GeminiEngine:
return result
def _corner_promote(
self,
image: NDArray[Any],
current_raw_ncc: float,
) -> tuple[int, int, int, float] | None:
"""Search the bottom-right corner for a very-high-fidelity sparkle match.
Returns ``(scale, x, y, raw_ncc)`` when the corner holds a match with raw
NCC >= ``_CORNER_PROMOTE_NCC`` that beats the global pick's ``current_raw_ncc``,
else None. Used to rescue a small sparkle that the size weight buried under
a larger, lower-fidelity match elsewhere. See ``_CORNER_PROMOTE_NCC`` and
``_CORNER_PROMOTE_FRAC`` for the corner sizing.
"""
h, w = image.shape[:2]
side = max(
self._CORNER_PROMOTE_MIN, min(self._CORNER_PROMOTE_MAX, round(min(w, h) * self._CORNER_PROMOTE_FRAC))
)
cs = int(min(min(w, h), side))
cx1, cy1 = max(0, w - cs), max(0, h - cs)
corner = image[cy1:h, cx1:w]
gray = cv2.cvtColor(corner, cv2.COLOR_BGR2GRAY) if corner.ndim == 3 and corner.shape[2] >= 3 else corner
gray = gray.astype(np.float32) / 255.0
best_raw = -1.0
best_scale = 0
best_loc = (0, 0)
for scale, max_val, max_loc in self._scan_scales(gray):
if max_val > best_raw:
best_raw = max_val
best_scale = scale
best_loc = max_loc
if best_raw >= self._CORNER_PROMOTE_NCC and best_raw > current_raw_ncc:
return best_scale, cx1 + best_loc[0], cy1 + best_loc[1], float(best_raw)
return None
# ── Removal ──────────────────────────────────────────────────────
def remove_watermark(
+65
View File
@@ -284,3 +284,68 @@ class TestOverSubtractionGuard:
dark, dpos = self._composite_sparkle(bg_value=60)
dalpha = self.engine.get_interpolated_alpha(dpos[2])
assert self.engine._reverse_alpha_oversubtracts(dark, dalpha, (dpos[0], dpos[1])) is True
class TestCornerPromotion:
"""Issue #36: a small sparkle in the corner must not be lost to a larger decoy.
The size weight that suppresses tiny-patch false positives also lets a larger,
mediocre match elsewhere outrank a small, near-perfect sparkle in the corner --
so a faint sparkle on a busy background (e.g. a portrait whose bright collar
out-scores it) reads as clean. The corner-promotion override rescues it.
"""
_W, _H = 400, 520
_CORNER = (_W - 40 - 20, _H - 40 - 20, 20) # bottom-right small sparkle (x, y, scale)
_DECOY = (15, 210, 92) # large decoy: inside the search window, left of the corner
@pytest.fixture(autouse=True)
def _setup_engine(self):
self.engine = GeminiEngine()
def _paste(self, img: np.ndarray, scale: int, x: int, y: int, alpha_scale: float) -> None:
tmpl = cv2.resize(self.engine._alpha_large, (scale, scale), interpolation=cv2.INTER_AREA)
a = (tmpl * alpha_scale)[:, :, None]
roi = img[y : y + scale, x : x + scale]
img[y : y + scale, x : x + scale] = a * 255.0 + (1.0 - a) * roi
def _scene(self, bg_value: int = 40) -> np.ndarray:
"""Dark scene with a large decoy on the left and a small sparkle in the corner.
Without the corner-promotion fix the global, size-weighted search locks onto
the larger decoy; with it the small corner sparkle wins.
"""
img = np.full((self._H, self._W, 3), bg_value, dtype=np.float32)
self._paste(img, self._DECOY[2], self._DECOY[0], self._DECOY[1], 0.55)
self._paste(img, self._CORNER[2], self._CORNER[0], self._CORNER[1], 0.55)
return np.clip(img, 0, 255).astype(np.uint8)
def _in_bottom_right(self, region: tuple[int, int, int, int]) -> bool:
x, y = region[0], region[1]
return x >= self._W * 0.6 and y >= self._H * 0.6
def test_small_corner_sparkle_is_detected_and_localized(self):
det = self.engine.detect_watermark(self._scene())
assert det.detected
# Must localize to the planted corner sparkle, not the larger left-side decoy.
assert self._in_bottom_right(det.region), f"localized to decoy, not corner: {det.region}"
assert abs(det.region[0] - self._CORNER[0]) < 16
assert abs(det.region[1] - self._CORNER[1]) < 16
def test_promotion_is_what_rescues_it(self, monkeypatch):
"""Guard the mechanism: disabling the override mislocalizes to the decoy.
Proves the scene genuinely needs the override (so the localization test above
is not a fluke): with the gate set unreachable the larger decoy wins.
"""
scene = self._scene()
assert self._in_bottom_right(self.engine.detect_watermark(scene).region)
monkeypatch.setattr(GeminiEngine, "_CORNER_PROMOTE_NCC", 2.0)
assert not self._in_bottom_right(self.engine.detect_watermark(scene).region), (
"decoy expected to win without the override"
)
def test_no_promotion_on_clean_flat_image(self):
"""A flat image with no sparkle yields no corner match to promote."""
flat = np.full((self._H, self._W, 3), 40, dtype=np.uint8)
assert self.engine._corner_promote(flat, -1.0) is None