diff --git a/CLAUDE.md b/CLAUDE.md index b0f9250..c5a1433 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -43,7 +43,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are ## Known limitations -- `invisible` pipeline downscales to model-native resolution (1024 px for SDXL) before diffusion. Degrades fine text in infographics. Tracked; fix is tile-based diffusion. +- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix. - Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files. - `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`. - Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding). EXIF/XMP boxes inside those containers are read for detection but not yet **scrubbed** on removal. diff --git a/src/remove_ai_watermarks/cli.py b/src/remove_ai_watermarks/cli.py index 1f70878..21e70a5 100644 --- a/src/remove_ai_watermarks/cli.py +++ b/src/remove_ai_watermarks/cli.py @@ -278,6 +278,12 @@ def cmd_visible( @click.option( "--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)." ) +@click.option( + "--max-resolution", + type=int, + default=0, + help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.", +) @click.pass_context def cmd_invisible( ctx: click.Context, @@ -290,6 +296,7 @@ def cmd_invisible( seed: int | None, hf_token: str | None, humanize: float, + max_resolution: int, ) -> None: """Remove invisible AI watermarks (SynthID, StableSignature, TreeRing). @@ -336,6 +343,7 @@ def cmd_invisible( guidance_scale=None, seed=seed, humanize=humanize, + max_resolution=max_resolution, ) elapsed = time.monotonic() - t0 @@ -476,6 +484,12 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo @click.option( "--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)." ) +@click.option( + "--max-resolution", + type=int, + default=0, + help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.", +) @click.pass_context def cmd_all( ctx: click.Context, @@ -491,6 +505,7 @@ def cmd_all( seed: int | None, hf_token: str | None, humanize: float, + max_resolution: int, ) -> None: """Remove ALL watermarks: visible + invisible + metadata. @@ -582,6 +597,7 @@ def cmd_all( num_inference_steps=steps, seed=seed, humanize=humanize, + max_resolution=max_resolution, ) console.print(" [green]✓[/] Invisible watermark removed") @@ -633,6 +649,7 @@ def _process_batch_image( seed: int | None, hf_token: str | None, humanize: float, + max_resolution: int = 0, ) -> None: """Process a single image for batch mode. @@ -695,6 +712,7 @@ def _process_batch_image( num_inference_steps=steps, seed=seed, humanize=humanize, + max_resolution=max_resolution, ) if mode in ("metadata", "all"): @@ -737,6 +755,12 @@ def _process_batch_image( @click.option("--device", type=click.Choice(["auto", "cpu", "mps", "cuda"]), default="auto", help="Inference device.") @click.option("--seed", type=int, default=None, help="Random seed for reproducibility.") @click.option("--hf-token", type=str, default=None, help="HuggingFace API token.") +@click.option( + "--max-resolution", + type=int, + default=0, + help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.", +) @click.pass_context def cmd_batch( ctx: click.Context, @@ -751,6 +775,7 @@ def cmd_batch( hf_token: str | None, inpaint: bool, humanize: float, + max_resolution: int, ) -> None: """Process all images in a directory.""" _banner() @@ -800,6 +825,7 @@ def cmd_batch( seed=seed, hf_token=hf_token, humanize=humanize, + max_resolution=max_resolution, ) processed += 1 diff --git a/src/remove_ai_watermarks/invisible_engine.py b/src/remove_ai_watermarks/invisible_engine.py index 649fb5f..fab6855 100644 --- a/src/remove_ai_watermarks/invisible_engine.py +++ b/src/remove_ai_watermarks/invisible_engine.py @@ -107,6 +107,7 @@ class InvisibleEngine: seed: int | None = None, humanize: float = 0.0, protect_faces: bool = True, + max_resolution: int = 0, ) -> Path: """Remove invisible watermark from an image. @@ -119,6 +120,11 @@ class InvisibleEngine: seed: Random seed for reproducibility. humanize: Intensity of Analog Humanizer film grain (0 = off). protect_faces: Boolean to extract and restore faces intact. + max_resolution: Cap the long side (px) before diffusion. 0 (default) + = native resolution, no pre-downscale -- matches the hosted + raiw.cc backend. Set a positive value only to bound GPU/MPS + memory on very large inputs (it reintroduces a lossy + downscale->upscale round-trip). Returns: Path to the cleaned image. @@ -127,22 +133,25 @@ class InvisibleEngine: from PIL import Image, ImageOps - # SDXL is trained at 1024px and degrades both quality and watermark-removal - # efficacy below that. - max_dimension = 1024 + # Process at native resolution by default (max_resolution=0). The hosted + # raiw.cc backend (fal fast-sdxl) does NO pre-downscale either, and at + # strength ~0.05 SDXL img2img does not need the input shrunk to ~1024 -- + # the old forced downscale->upscale round-trip was the main quality loss + # (see issue #10). A positive max_resolution caps the long side only to + # bound GPU/MPS memory on very large inputs. image = Image.open(image_path) image = ImageOps.exif_transpose(image) orig_size = image.size # (width, height) _tmp_path = None - if max(image.width, image.height) > max_dimension: - ratio = max_dimension / max(image.width, image.height) + if max_resolution > 0 and max(image.width, image.height) > max_resolution: + ratio = max_resolution / max(image.width, image.height) new_size = (int(image.width * ratio), int(image.height * ratio)) if self._progress_callback: self._progress_callback( f"Downscaling {image.width}x{image.height} " f"to {new_size[0]}x{new_size[1]} " - f"(model trained at {max_dimension}px)..." + f"(max-resolution cap {max_resolution}px)..." ) image = image.resize(new_size, Image.Resampling.LANCZOS)