From d24d8a4b1482cd6a3b802ecda67d8ee079fdda60 Mon Sep 17 00:00:00 2001
From: test-user <kuznetsov.va@gmail.com>
Date: Mon, 25 May 2026 14:09:33 -0700
Subject: [PATCH] Extract _target_size helper + regression-test native
 resolution (v0.5.4)

The native-vs-downscale decision in InvisibleEngine.remove_watermark (the
issue #10/#15 fix: max_resolution=0 must not pre-downscale, since any
downscale both loses quality and lets SynthID survive) had no test. Extract
it into a pure helper invisible_engine._target_size(w, h, max_resolution)
and cover it with tests/test_invisible_engine.py::TestTargetSize so a
re-introduced forced downscale fails CI instead of silently regressing #15.

Also:
- Clamp the short side to >=1 in _target_size: extreme aspect ratios (e.g.
  5000x3 with --max-resolution 1024) truncated it to 0 and crashed
  image.resize(). Pre-existing in the inline math; fixed now that it is a
  named, tested function.
- Consolidate the two duplicated temp-file save blocks into one
  unconditional save (behavior unchanged: the EXIF-transposed image is
  still always persisted before WatermarkRemover reloads it by path), and
  drop the now-redundant `_tmp_path is not None` guard in finally.
- Bump version 0.5.3 -> 0.5.4 (pyproject, __init__, uv.lock); document the
  helper as the regression guard in CLAUDE.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                    |  2 +-
 pyproject.toml                               |  2 +-
 src/remove_ai_watermarks/__init__.py         |  2 +-
 src/remove_ai_watermarks/invisible_engine.py | 58 +++++++++++---------
 tests/test_invisible_engine.py               | 44 ++++++++++++++-
 uv.lock                                      |  2 +-
 6 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index b83a34d..52cbabe 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -44,7 +44,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are
 
 ## Known limitations
 
-- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix. **Concrete MPS data point (verified 2026-05-25 on a 1254x1254 gpt-image SDXL run, fp32, 20 GB MPS ceiling):** native res OOMs at the *UNet* step (peak ~17 GiB), not only the VAE decode, and the auto-fallback in `img2img_runner` reloads on CPU and finishes (slow, ~13 min) -- the output is still weight-identical and defeats SynthID, so "looks hung/crashed" on Mac is usually this CPU fallback, not a pipeline error. Adding `enable_vae_tiling()` alone does NOT prevent it (the peak is the UNet, not the VAE). The fast Mac workarounds are fp16 on MPS (roughly halves memory) or `--max-resolution` to cap the long side; neither is wired as the default.
+- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix. **Concrete MPS data point (verified 2026-05-25 on a 1254x1254 gpt-image SDXL run, fp32, 20 GB MPS ceiling):** native res OOMs at the *UNet* step (peak ~17 GiB), not only the VAE decode, and the auto-fallback in `img2img_runner` reloads on CPU and finishes (slow, ~13 min) -- the output is still weight-identical and defeats SynthID, so "looks hung/crashed" on Mac is usually this CPU fallback, not a pipeline error. Adding `enable_vae_tiling()` alone does NOT prevent it (the peak is the UNet, not the VAE). The fast Mac workarounds are fp16 on MPS (roughly halves memory) or `--max-resolution` to cap the long side; neither is wired as the default. The native-vs-downscale decision lives in the pure helper `invisible_engine._target_size(w, h, max_resolution)` (returns `None` for native, a clamped target tuple otherwise) so it is unit-tested (`tests/test_invisible_engine.py::TestTargetSize`, the #10/#15 regression guard) without loading the model -- keep that logic in the helper, don't re-inline it.
 - Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files.
 - `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`.
 - Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding). EXIF/XMP boxes inside those containers are read for detection but not yet **scrubbed** on removal.
diff --git a/pyproject.toml b/pyproject.toml
index 64956d2..3b972b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "remove-ai-watermarks"
-version = "0.5.3"
+version = "0.5.4"
 description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/remove_ai_watermarks/__init__.py b/src/remove_ai_watermarks/__init__.py
index 66a7eed..5555254 100644
--- a/src/remove_ai_watermarks/__init__.py
+++ b/src/remove_ai_watermarks/__init__.py
@@ -1,3 +1,3 @@
 """Remove-AI-Watermarks: Unified tool for removing visible and invisible AI watermarks."""
 
-__version__ = "0.5.3"
+__version__ = "0.5.4"
diff --git a/src/remove_ai_watermarks/invisible_engine.py b/src/remove_ai_watermarks/invisible_engine.py
index fab6855..8b03689 100644
--- a/src/remove_ai_watermarks/invisible_engine.py
+++ b/src/remove_ai_watermarks/invisible_engine.py
@@ -42,6 +42,24 @@ def is_available() -> bool:
         return False
 
 
+def _target_size(width: int, height: int, max_resolution: int) -> tuple[int, int] | None:
+    """Compute the downscaled (width, height) for a long-side cap, or None for native.
+
+    Returns None when no pre-downscale is needed: ``max_resolution <= 0`` (native
+    resolution, the default that matches the raiw.cc backend -- see issue #10) or
+    the long side already fits the cap. Otherwise scales the long side down to
+    ``max_resolution`` preserving aspect ratio (integer-truncated, matching the
+    PIL ``resize`` call site). Pure function so the native-vs-downscale decision
+    is unit-testable without loading the diffusion model.
+    """
+    if max_resolution > 0 and max(width, height) > max_resolution:
+        ratio = max_resolution / max(width, height)
+        # Clamp the short side to >=1: extreme aspect ratios (e.g. 5000x3 capped
+        # at 1024) would otherwise truncate it to 0 and crash image.resize().
+        return (max(1, int(width * ratio)), max(1, int(height * ratio)))
+    return None
+
+
 class InvisibleEngine:
     """Remove invisible AI watermarks using diffusion model regeneration.
 
@@ -142,37 +160,26 @@ class InvisibleEngine:
         image = Image.open(image_path)
         image = ImageOps.exif_transpose(image)
         orig_size = image.size  # (width, height)
-        _tmp_path = None
 
-        if max_resolution > 0 and max(image.width, image.height) > max_resolution:
-            ratio = max_resolution / max(image.width, image.height)
-            new_size = (int(image.width * ratio), int(image.height * ratio))
+        # Optional long-side downscale; native resolution by default (issue #10).
+        target = _target_size(image.width, image.height, max_resolution)
+        if target is not None:
             if self._progress_callback:
                 self._progress_callback(
                     f"Downscaling {image.width}x{image.height} "
-                    f"to {new_size[0]}x{new_size[1]} "
+                    f"to {target[0]}x{target[1]} "
                     f"(max-resolution cap {max_resolution}px)..."
                 )
-            image = image.resize(new_size, Image.Resampling.LANCZOS)
+            image = image.resize(target, Image.Resampling.LANCZOS)
 
-            # Save to a temp file instead of overwriting the original
-            _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix)
-            _tmp_path = Path(_tmp_str)
-            image.save(_tmp_path)
-            import os as _os
-
-            _os.close(_tmp_fd)
-            image_path = _tmp_path
-        else:
-            # We must save the transposed image back to a tmp file if it was rotated
-            # otherwise WatermarkRemover will reload it without EXIF rotation!
-            _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix)
-            _tmp_path = Path(_tmp_str)
-            image.save(_tmp_path)
-            import os as _os
-
-            _os.close(_tmp_fd)
-            image_path = _tmp_path
+        # Always persist to a temp file, even without downscaling: WatermarkRemover
+        # reloads by path, so the EXIF-transposed pixels must be saved or rotation
+        # is lost. Cleaned up in the finally block via _tmp_path.
+        _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix)
+        _tmp_path = Path(_tmp_str)
+        image.save(_tmp_path)
+        os.close(_tmp_fd)
+        image_path = _tmp_path
 
         try:
             # Optional: Face protection (Phase 1 - Extraction)
@@ -253,7 +260,8 @@ class InvisibleEngine:
 
             return out_path
         finally:
-            if _tmp_path is not None and _tmp_path.exists():
+            # _tmp_path is always set above (we persist the image unconditionally).
+            if _tmp_path.exists():
                 _tmp_path.unlink()
 
     def remove_watermark_batch(
diff --git a/tests/test_invisible_engine.py b/tests/test_invisible_engine.py
index 7f701e1..8b5e244 100644
--- a/tests/test_invisible_engine.py
+++ b/tests/test_invisible_engine.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from remove_ai_watermarks.invisible_engine import InvisibleEngine, is_available
+from remove_ai_watermarks.invisible_engine import InvisibleEngine, _target_size, is_available
 
 
 class TestIsAvailable:
@@ -26,3 +26,45 @@ class TestInvisibleEngineInit:
 
     def test_ctrlregen_model_id(self):
         assert InvisibleEngine.CTRLREGEN_MODEL_ID == "yepengliu/ctrlregen"
+
+
+class TestTargetSize:
+    """Regression guard for the native-resolution decision (issues #10 / #15).
+
+    max_resolution=0 must NOT downscale -- the forced downscale->upscale
+    round-trip was the quality loss in #10, and downscaling at all let SynthID
+    survive in #15 (the native SDXL pass at strength ~0.05 is what defeats it).
+    """
+
+    def test_native_default_no_downscale(self):
+        # The default (0) means native resolution: no resize, regardless of size.
+        assert _target_size(4096, 4096, 0) is None
+        assert _target_size(123, 456, 0) is None
+
+    def test_negative_cap_treated_as_native(self):
+        assert _target_size(4096, 4096, -1) is None
+
+    def test_cap_below_long_side_downscales(self):
+        # 2000x1000, cap 1024 -> long side scaled to 1024, aspect preserved.
+        assert _target_size(2000, 1000, 1024) == (1024, 512)
+
+    def test_cap_uses_long_side_for_portrait(self):
+        # Portrait: height is the long side, so it drives the ratio.
+        assert _target_size(1000, 2000, 1024) == (512, 1024)
+
+    def test_cap_at_or_above_long_side_no_downscale(self):
+        # Already within the cap (and exactly equal) -> no resize.
+        assert _target_size(800, 600, 1024) is None
+        assert _target_size(1024, 768, 1024) is None
+
+    def test_integer_truncation_matches_pil_call_site(self):
+        # 1254x1254 (the gpt-image sample) capped at 1000: int(1254*1000/1254)=1000.
+        assert _target_size(1254, 1254, 1000) == (1000, 1000)
+        # Non-divisible ratio truncates toward zero like int() at the call site.
+        assert _target_size(1000, 333, 500) == (500, 166)
+
+    def test_extreme_aspect_ratio_clamps_short_side_to_one(self):
+        # 5000x3 capped at 1024: int(3 * 1024/5000) = 0 would crash resize();
+        # the short side must clamp to 1, never 0.
+        assert _target_size(5000, 3, 1024) == (1024, 1)
+        assert _target_size(3, 5000, 1024) == (1, 1024)
diff --git a/uv.lock b/uv.lock
index f9e8e16..aa67383 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2150,7 +2150,7 @@ wheels = [
 
 [[package]]
 name = "remove-ai-watermarks"
-version = "0.5.3"
+version = "0.5.4"
 source = { editable = "." }
 dependencies = [
     { name = "click" },