fix(gemini): self-verify repair for under-removed sparkles

After reverse-alpha, re-detect the sparkle; when one survives at or above the registry fail line (conf >= 0.5) -- an alpha mismatch the per-image gain estimate could not fully correct -- inpaint the footprint and keep that only when it lowers the re-detect confidence. The footprint inpaint reconstructs the slot from its darker surroundings, so it physically removes the bright sparkle; purely additive, the common clean removal re-detects below 0.5 and is returned untouched. Measured on the spaces visible-removal audit: gemini removal-audit failures drop 15 -> 11 (4 genuine rescues), doubao 65/65 and jimeng 11/11 unchanged, zero regressions on the 468 already-clean removals. An offset+scale alignment search was prototyped on the remaining 11 fails and rejected: an audit "ceiling" suggested +4 more, but those were NCC-gaming -- the lower-scoring placement left the sparkle as bright or brighter, just reshaping the residual so the contrast-invariant shape-NCC scored lower (a5a9: first-pass slot ~76 at background level vs the "aligned win" ~164). A brightness sanity check rejected every one, so it contributed nothing and was removed. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-26 01:11:11 +02:00 · 2026-06-04 16:45:18 -07:00
parent 6d11c11b52
commit 2c0b174dfa
3 changed files with 101 additions and 2 deletions
@@ -171,6 +171,23 @@ class GeminiEngine:
    _SPARKLE_FP_CONF = 0.65
    _SPARKLE_FP_MARGIN = 5.0

+    # Self-verify fallback. The gain estimate corrects most under-subtractions, but
+    # on the spaces corpus a tail of strong sparkles still survived reverse-alpha
+    # (a few px of position jitter or a gain estimate the [1.0, 1.94] clamp could
+    # not fully reach). After the reverse blend, re-detect; if a sparkle this strong
+    # remains, inpaint the footprint and keep that ONLY when it lowers the re-detect
+    # confidence. Purely additive: the common clean removal re-detects below this and
+    # is returned untouched. Threshold matches the registry's real fail line (0.5),
+    # so it triggers exactly on the cases that would otherwise read as not-removed
+    # (rescued 4 of 15 corpus fails, 0 regressions). An offset+scale alignment search
+    # was prototyped on the remaining 11 but REJECTED: it only lowered the shape-NCC by
+    # moving the reverse-alpha to a different placement that left the sparkle as bright
+    # or brighter (NCC-gaming, not removal), so a brightness sanity check rejected every
+    # one. The footprint inpaint physically reconstructs the slot from its surroundings,
+    # so its rescues are genuine; the survivors are near-white ill-conditioning or
+    # detector false positives that no reverse-alpha placement fixes.
+    _VERIFY_FALLBACK_CONF = 0.5
+
    # Corner promotion (issue #36): the size weight that suppresses tiny-patch
    # false positives also buries a small, near-perfect sparkle when a larger,
    # mediocre match sits elsewhere (e.g. a bright collar in a portrait). A small
@@ -521,7 +538,7 @@ class GeminiEngine:
            self._inpaint_footprint(result, alpha_map, pos)
        else:
            self._reverse_alpha_blend(result, alpha_map, pos)
-        return result
+        return self._verify_and_repair(result, alpha_map, pos, size)

    def remove_watermark_custom(
        self,
@@ -708,6 +725,33 @@ class GeminiEngine:
        mask = cv2.dilate(mask, kernel, iterations=2)
        image[cy1:cy2, cx1:cx2] = cv2.inpaint(crop, mask, 6, cv2.INPAINT_NS)

+    def _verify_and_repair(
+        self,
+        result: NDArray[Any],
+        alpha_map: NDArray[Any],
+        position: tuple[int, int],
+        size: WatermarkSize,
+    ) -> NDArray[Any]:
+        """Inpaint-repair a sparkle that survived reverse-alpha, keeping the better.
+
+        Re-detect on the reverse-alpha output; if a sparkle this strong remains (an
+        alpha mismatch the gain estimate could not fully correct), inpaint the
+        footprint and return that ONLY when it lowers the re-detect confidence. The
+        footprint inpaint reconstructs from the (darker) surroundings, so it physically
+        removes the bright sparkle rather than gaming the shape-NCC. Returns ``result``
+        unchanged when the removal is already clean (the common case) or when the
+        inpaint does not improve it, so it can never regress.
+        """
+        residual = self.detect_watermark(result, force_size=size).confidence
+        if residual < self._VERIFY_FALLBACK_CONF:
+            return result
+        candidate = result.copy()
+        self._inpaint_footprint(candidate, alpha_map, position)
+        if self.detect_watermark(candidate, force_size=size).confidence < residual:
+            logger.debug("Sparkle survived reverse-alpha (conf=%.3f); footprint inpaint improved it.", residual)
+            return candidate
+        return result
+
    def _reverse_alpha_blend(
        self,
        image: NDArray[Any],
@@ -346,6 +346,61 @@ class TestUnderSubtractionGain:
        assert abs(float(footprint.mean()) - 80.0) < 20.0


+class TestVerifyAndRepair:
+    """Self-verify fallback: a sparkle that survives reverse-alpha is inpaint-repaired,
+    but only when that lowers the re-detect confidence (so it can never regress).
+
+    The detector NCC is degenerate on flat synthetic backgrounds, so the keep-best
+    control flow is driven through a stubbed ``detect_watermark`` rather than a real
+    re-detect (mirroring the reasoning in TestUnderSubtractionGain).
+    """
+
+    @pytest.fixture(autouse=True)
+    def _setup_engine(self):
+        self.engine = GeminiEngine()
+        self.alpha = self.engine.get_interpolated_alpha(96)
+        self.pos = (200, 200)
+
+    def _stub_detect(self, confidences):
+        """detect_watermark stub yielding the given confidences in order."""
+        seq = iter(confidences)
+
+        def fake(image, force_size=None):
+            return DetectionResult(detected=True, confidence=next(seq))
+
+        return fake
+
+    def _repair(self, result):
+        return self.engine._verify_and_repair(result, self.alpha, self.pos, WatermarkSize.LARGE)
+
+    def test_clean_removal_returned_untouched(self, monkeypatch):
+        """Below the fallback threshold, the input is returned byte-identical."""
+        img = np.full((600, 600, 3), 90, dtype=np.uint8)
+        monkeypatch.setattr(self.engine, "detect_watermark", self._stub_detect([0.2]))
+        out = self._repair(img)
+        assert out is img  # no copy, no inpaint
+
+    def test_keeps_footprint_inpaint_when_it_helps(self, monkeypatch):
+        """A surviving sparkle is footprint-inpainted when that re-detects lower; the
+        footprint pixels change."""
+        img = np.full((600, 600, 3), 90, dtype=np.uint8)
+        # Bright residual block over the footprint so the inpaint visibly changes it.
+        img[self.pos[1] : self.pos[1] + 96, self.pos[0] : self.pos[0] + 96] = 240
+        # residual 0.7 (>= 0.5 triggers), candidate re-detects 0.2 (< residual -> keep).
+        monkeypatch.setattr(self.engine, "detect_watermark", self._stub_detect([0.7, 0.2]))
+        out = self._repair(img)
+        assert out is not img
+        assert not np.array_equal(out, img)  # footprint was inpainted from surroundings
+
+    def test_repair_rejected_when_inpaint_does_not_help(self, monkeypatch):
+        """When the inpaint does not lower the re-detect confidence, keep the original."""
+        img = np.full((600, 600, 3), 90, dtype=np.uint8)
+        # residual 0.7, candidate re-detects 0.75 (>= residual -> reject the inpaint).
+        monkeypatch.setattr(self.engine, "detect_watermark", self._stub_detect([0.7, 0.75]))
+        out = self._repair(img)
+        assert out is img
+
+
 class TestSparkleFalsePositiveGate:
    """False-positive gate: a low-confidence shape match whose core is NOT brighter
    than its surroundings (ornate/flat content, not a white sparkle overlay) is