From d24d8a4b1482cd6a3b802ecda67d8ee079fdda60 Mon Sep 17 00:00:00 2001 From: test-user Date: Mon, 25 May 2026 14:09:33 -0700 Subject: [PATCH] Extract _target_size helper + regression-test native resolution (v0.5.4) The native-vs-downscale decision in InvisibleEngine.remove_watermark (the issue #10/#15 fix: max_resolution=0 must not pre-downscale, since any downscale both loses quality and lets SynthID survive) had no test. Extract it into a pure helper invisible_engine._target_size(w, h, max_resolution) and cover it with tests/test_invisible_engine.py::TestTargetSize so a re-introduced forced downscale fails CI instead of silently regressing #15. Also: - Clamp the short side to >=1 in _target_size: extreme aspect ratios (e.g. 5000x3 with --max-resolution 1024) truncated it to 0 and crashed image.resize(). Pre-existing in the inline math; fixed now that it is a named, tested function. - Consolidate the two duplicated temp-file save blocks into one unconditional save (behavior unchanged: the EXIF-transposed image is still always persisted before WatermarkRemover reloads it by path), and drop the now-redundant `_tmp_path is not None` guard in finally. - Bump version 0.5.3 -> 0.5.4 (pyproject, __init__, uv.lock); document the helper as the regression guard in CLAUDE.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 2 +- pyproject.toml | 2 +- src/remove_ai_watermarks/__init__.py | 2 +- src/remove_ai_watermarks/invisible_engine.py | 58 +++++++++++--------- tests/test_invisible_engine.py | 44 ++++++++++++++- uv.lock | 2 +- 6 files changed, 80 insertions(+), 30 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b83a34d..52cbabe 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -44,7 +44,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are ## Known limitations -- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix. **Concrete MPS data point (verified 2026-05-25 on a 1254x1254 gpt-image SDXL run, fp32, 20 GB MPS ceiling):** native res OOMs at the *UNet* step (peak ~17 GiB), not only the VAE decode, and the auto-fallback in `img2img_runner` reloads on CPU and finishes (slow, ~13 min) -- the output is still weight-identical and defeats SynthID, so "looks hung/crashed" on Mac is usually this CPU fallback, not a pipeline error. Adding `enable_vae_tiling()` alone does NOT prevent it (the peak is the UNet, not the VAE). The fast Mac workarounds are fp16 on MPS (roughly halves memory) or `--max-resolution` to cap the long side; neither is wired as the default. +- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix. **Concrete MPS data point (verified 2026-05-25 on a 1254x1254 gpt-image SDXL run, fp32, 20 GB MPS ceiling):** native res OOMs at the *UNet* step (peak ~17 GiB), not only the VAE decode, and the auto-fallback in `img2img_runner` reloads on CPU and finishes (slow, ~13 min) -- the output is still weight-identical and defeats SynthID, so "looks hung/crashed" on Mac is usually this CPU fallback, not a pipeline error. Adding `enable_vae_tiling()` alone does NOT prevent it (the peak is the UNet, not the VAE). The fast Mac workarounds are fp16 on MPS (roughly halves memory) or `--max-resolution` to cap the long side; neither is wired as the default. The native-vs-downscale decision lives in the pure helper `invisible_engine._target_size(w, h, max_resolution)` (returns `None` for native, a clamped target tuple otherwise) so it is unit-tested (`tests/test_invisible_engine.py::TestTargetSize`, the #10/#15 regression guard) without loading the model -- keep that logic in the helper, don't re-inline it. - Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files. - `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`. - Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding). EXIF/XMP boxes inside those containers are read for detection but not yet **scrubbed** on removal. diff --git a/pyproject.toml b/pyproject.toml index 64956d2..3b972b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "remove-ai-watermarks" -version = "0.5.3" +version = "0.5.4" description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)" readme = "README.md" requires-python = ">=3.10" diff --git a/src/remove_ai_watermarks/__init__.py b/src/remove_ai_watermarks/__init__.py index 66a7eed..5555254 100644 --- a/src/remove_ai_watermarks/__init__.py +++ b/src/remove_ai_watermarks/__init__.py @@ -1,3 +1,3 @@ """Remove-AI-Watermarks: Unified tool for removing visible and invisible AI watermarks.""" -__version__ = "0.5.3" +__version__ = "0.5.4" diff --git a/src/remove_ai_watermarks/invisible_engine.py b/src/remove_ai_watermarks/invisible_engine.py index fab6855..8b03689 100644 --- a/src/remove_ai_watermarks/invisible_engine.py +++ b/src/remove_ai_watermarks/invisible_engine.py @@ -42,6 +42,24 @@ def is_available() -> bool: return False +def _target_size(width: int, height: int, max_resolution: int) -> tuple[int, int] | None: + """Compute the downscaled (width, height) for a long-side cap, or None for native. + + Returns None when no pre-downscale is needed: ``max_resolution <= 0`` (native + resolution, the default that matches the raiw.cc backend -- see issue #10) or + the long side already fits the cap. Otherwise scales the long side down to + ``max_resolution`` preserving aspect ratio (integer-truncated, matching the + PIL ``resize`` call site). Pure function so the native-vs-downscale decision + is unit-testable without loading the diffusion model. + """ + if max_resolution > 0 and max(width, height) > max_resolution: + ratio = max_resolution / max(width, height) + # Clamp the short side to >=1: extreme aspect ratios (e.g. 5000x3 capped + # at 1024) would otherwise truncate it to 0 and crash image.resize(). + return (max(1, int(width * ratio)), max(1, int(height * ratio))) + return None + + class InvisibleEngine: """Remove invisible AI watermarks using diffusion model regeneration. @@ -142,37 +160,26 @@ class InvisibleEngine: image = Image.open(image_path) image = ImageOps.exif_transpose(image) orig_size = image.size # (width, height) - _tmp_path = None - if max_resolution > 0 and max(image.width, image.height) > max_resolution: - ratio = max_resolution / max(image.width, image.height) - new_size = (int(image.width * ratio), int(image.height * ratio)) + # Optional long-side downscale; native resolution by default (issue #10). + target = _target_size(image.width, image.height, max_resolution) + if target is not None: if self._progress_callback: self._progress_callback( f"Downscaling {image.width}x{image.height} " - f"to {new_size[0]}x{new_size[1]} " + f"to {target[0]}x{target[1]} " f"(max-resolution cap {max_resolution}px)..." ) - image = image.resize(new_size, Image.Resampling.LANCZOS) + image = image.resize(target, Image.Resampling.LANCZOS) - # Save to a temp file instead of overwriting the original - _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix) - _tmp_path = Path(_tmp_str) - image.save(_tmp_path) - import os as _os - - _os.close(_tmp_fd) - image_path = _tmp_path - else: - # We must save the transposed image back to a tmp file if it was rotated - # otherwise WatermarkRemover will reload it without EXIF rotation! - _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix) - _tmp_path = Path(_tmp_str) - image.save(_tmp_path) - import os as _os - - _os.close(_tmp_fd) - image_path = _tmp_path + # Always persist to a temp file, even without downscaling: WatermarkRemover + # reloads by path, so the EXIF-transposed pixels must be saved or rotation + # is lost. Cleaned up in the finally block via _tmp_path. + _tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix) + _tmp_path = Path(_tmp_str) + image.save(_tmp_path) + os.close(_tmp_fd) + image_path = _tmp_path try: # Optional: Face protection (Phase 1 - Extraction) @@ -253,7 +260,8 @@ class InvisibleEngine: return out_path finally: - if _tmp_path is not None and _tmp_path.exists(): + # _tmp_path is always set above (we persist the image unconditionally). + if _tmp_path.exists(): _tmp_path.unlink() def remove_watermark_batch( diff --git a/tests/test_invisible_engine.py b/tests/test_invisible_engine.py index 7f701e1..8b5e244 100644 --- a/tests/test_invisible_engine.py +++ b/tests/test_invisible_engine.py @@ -2,7 +2,7 @@ from __future__ import annotations -from remove_ai_watermarks.invisible_engine import InvisibleEngine, is_available +from remove_ai_watermarks.invisible_engine import InvisibleEngine, _target_size, is_available class TestIsAvailable: @@ -26,3 +26,45 @@ class TestInvisibleEngineInit: def test_ctrlregen_model_id(self): assert InvisibleEngine.CTRLREGEN_MODEL_ID == "yepengliu/ctrlregen" + + +class TestTargetSize: + """Regression guard for the native-resolution decision (issues #10 / #15). + + max_resolution=0 must NOT downscale -- the forced downscale->upscale + round-trip was the quality loss in #10, and downscaling at all let SynthID + survive in #15 (the native SDXL pass at strength ~0.05 is what defeats it). + """ + + def test_native_default_no_downscale(self): + # The default (0) means native resolution: no resize, regardless of size. + assert _target_size(4096, 4096, 0) is None + assert _target_size(123, 456, 0) is None + + def test_negative_cap_treated_as_native(self): + assert _target_size(4096, 4096, -1) is None + + def test_cap_below_long_side_downscales(self): + # 2000x1000, cap 1024 -> long side scaled to 1024, aspect preserved. + assert _target_size(2000, 1000, 1024) == (1024, 512) + + def test_cap_uses_long_side_for_portrait(self): + # Portrait: height is the long side, so it drives the ratio. + assert _target_size(1000, 2000, 1024) == (512, 1024) + + def test_cap_at_or_above_long_side_no_downscale(self): + # Already within the cap (and exactly equal) -> no resize. + assert _target_size(800, 600, 1024) is None + assert _target_size(1024, 768, 1024) is None + + def test_integer_truncation_matches_pil_call_site(self): + # 1254x1254 (the gpt-image sample) capped at 1000: int(1254*1000/1254)=1000. + assert _target_size(1254, 1254, 1000) == (1000, 1000) + # Non-divisible ratio truncates toward zero like int() at the call site. + assert _target_size(1000, 333, 500) == (500, 166) + + def test_extreme_aspect_ratio_clamps_short_side_to_one(self): + # 5000x3 capped at 1024: int(3 * 1024/5000) = 0 would crash resize(); + # the short side must clamp to 1, never 0. + assert _target_size(5000, 3, 1024) == (1024, 1) + assert _target_size(3, 5000, 1024) == (1, 1024) diff --git a/uv.lock b/uv.lock index f9e8e16..aa67383 100644 --- a/uv.lock +++ b/uv.lock @@ -2150,7 +2150,7 @@ wheels = [ [[package]] name = "remove-ai-watermarks" -version = "0.5.3" +version = "0.5.4" source = { editable = "." } dependencies = [ { name = "click" },