fix(invisible): process at native resolution by default

The invisible pipeline force-downscaled inputs >1024px to 1024 before diffusion, then upscaled the result back -- a lossy round-trip that was the main cause of the quality loss reported in #10. The hosted raiw.cc backend (fal fast-sdxl) does no pre-downscale, and at strength ~0.05 SDXL img2img doesn't need it. Default is now native resolution (max_resolution=0). New --max-resolution flag (invisible / all / batch) re-introduces an opt-in long-side cap only to bound GPU/MPS memory on very large inputs. Addresses #10. End-to-end quality/removal not re-verified locally (no GPU here); matches raiw-app's proven production config. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-11 10:43:42 +02:00 · 2026-05-25 09:50:06 -07:00
parent 93c664f7fb
commit 18740969ae
3 changed files with 42 additions and 7 deletions
@@ -43,7 +43,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are

 ## Known limitations

- `invisible` pipeline downscales to model-native resolution (1024 px for SDXL) before diffusion. Degrades fine text in infographics. Tracked; fix is tile-based diffusion.
+- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix.
 - Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files.
 - `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`.
 - Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding). EXIF/XMP boxes inside those containers are read for detection but not yet **scrubbed** on removal.
@@ -278,6 +278,12 @@ def cmd_visible(
@click.option(
    "--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)."
 )
+@click.option(
+    "--max-resolution",
+    type=int,
+    default=0,
+    help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
+)
@click.pass_context
 def cmd_invisible(
    ctx: click.Context,
@@ -290,6 +296,7 @@ def cmd_invisible(
    seed: int | None,
    hf_token: str | None,
    humanize: float,
+    max_resolution: int,
 ) -> None:
    """Remove invisible AI watermarks (SynthID, StableSignature, TreeRing).

@@ -336,6 +343,7 @@ def cmd_invisible(
        guidance_scale=None,
        seed=seed,
        humanize=humanize,
+        max_resolution=max_resolution,
    )
    elapsed = time.monotonic() - t0

@@ -476,6 +484,12 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo
@click.option(
    "--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)."
 )
+@click.option(
+    "--max-resolution",
+    type=int,
+    default=0,
+    help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
+)
@click.pass_context
 def cmd_all(
    ctx: click.Context,
@@ -491,6 +505,7 @@ def cmd_all(
    seed: int | None,
    hf_token: str | None,
    humanize: float,
+    max_resolution: int,
 ) -> None:
    """Remove ALL watermarks: visible + invisible + metadata.

@@ -582,6 +597,7 @@ def cmd_all(
                num_inference_steps=steps,
                seed=seed,
                humanize=humanize,
+                max_resolution=max_resolution,
            )
            console.print("    [green]✓[/] Invisible watermark removed")

@@ -633,6 +649,7 @@ def _process_batch_image(
    seed: int | None,
    hf_token: str | None,
    humanize: float,
+    max_resolution: int = 0,
 ) -> None:
    """Process a single image for batch mode.

@@ -695,6 +712,7 @@ def _process_batch_image(
                num_inference_steps=steps,
                seed=seed,
                humanize=humanize,
+                max_resolution=max_resolution,
            )

    if mode in ("metadata", "all"):
@@ -737,6 +755,12 @@ def _process_batch_image(
@click.option("--device", type=click.Choice(["auto", "cpu", "mps", "cuda"]), default="auto", help="Inference device.")
@click.option("--seed", type=int, default=None, help="Random seed for reproducibility.")
@click.option("--hf-token", type=str, default=None, help="HuggingFace API token.")
+@click.option(
+    "--max-resolution",
+    type=int,
+    default=0,
+    help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
+)
@click.pass_context
 def cmd_batch(
    ctx: click.Context,
@@ -751,6 +775,7 @@ def cmd_batch(
    hf_token: str | None,
    inpaint: bool,
    humanize: float,
+    max_resolution: int,
 ) -> None:
    """Process all images in a directory."""
    _banner()
@@ -800,6 +825,7 @@ def cmd_batch(
                    seed=seed,
                    hf_token=hf_token,
                    humanize=humanize,
+                    max_resolution=max_resolution,
                )
                processed += 1

@@ -107,6 +107,7 @@ class InvisibleEngine:
        seed: int | None = None,
        humanize: float = 0.0,
        protect_faces: bool = True,
+        max_resolution: int = 0,
    ) -> Path:
        """Remove invisible watermark from an image.

@@ -119,6 +120,11 @@ class InvisibleEngine:
            seed: Random seed for reproducibility.
            humanize: Intensity of Analog Humanizer film grain (0 = off).
            protect_faces: Boolean to extract and restore faces intact.
+            max_resolution: Cap the long side (px) before diffusion. 0 (default)
+                = native resolution, no pre-downscale -- matches the hosted
+                raiw.cc backend. Set a positive value only to bound GPU/MPS
+                memory on very large inputs (it reintroduces a lossy
+                downscale->upscale round-trip).

        Returns:
            Path to the cleaned image.
@@ -127,22 +133,25 @@ class InvisibleEngine:

        from PIL import Image, ImageOps

-        # SDXL is trained at 1024px and degrades both quality and watermark-removal
-        # efficacy below that.
-        max_dimension = 1024
+        # Process at native resolution by default (max_resolution=0). The hosted
+        # raiw.cc backend (fal fast-sdxl) does NO pre-downscale either, and at
+        # strength ~0.05 SDXL img2img does not need the input shrunk to ~1024 --
+        # the old forced downscale->upscale round-trip was the main quality loss
+        # (see issue #10). A positive max_resolution caps the long side only to
+        # bound GPU/MPS memory on very large inputs.
        image = Image.open(image_path)
        image = ImageOps.exif_transpose(image)
        orig_size = image.size  # (width, height)
        _tmp_path = None

-        if max(image.width, image.height) > max_dimension:
-            ratio = max_dimension / max(image.width, image.height)
+        if max_resolution > 0 and max(image.width, image.height) > max_resolution:
+            ratio = max_resolution / max(image.width, image.height)
            new_size = (int(image.width * ratio), int(image.height * ratio))
            if self._progress_callback:
                self._progress_callback(
                    f"Downscaling {image.width}x{image.height} "
                    f"to {new_size[0]}x{new_size[1]} "
-                    f"(model trained at {max_dimension}px)..."
+                    f"(max-resolution cap {max_resolution}px)..."
                )
            image = image.resize(new_size, Image.Resampling.LANCZOS)