fix(invisible): process at native resolution by default

The invisible pipeline force-downscaled inputs >1024px to 1024 before
diffusion, then upscaled the result back -- a lossy round-trip that was
the main cause of the quality loss reported in #10. The hosted raiw.cc
backend (fal fast-sdxl) does no pre-downscale, and at strength ~0.05
SDXL img2img doesn't need it.

Default is now native resolution (max_resolution=0). New --max-resolution
flag (invisible / all / batch) re-introduces an opt-in long-side cap only
to bound GPU/MPS memory on very large inputs.

Addresses #10. End-to-end quality/removal not re-verified locally (no GPU
here); matches raiw-app's proven production config.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
test-user
2026-05-25 09:50:06 -07:00
parent 93c664f7fb
commit 18740969ae
3 changed files with 42 additions and 7 deletions
+1 -1
View File
@@ -43,7 +43,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are
## Known limitations
- `invisible` pipeline downscales to model-native resolution (1024 px for SDXL) before diffusion. Degrades fine text in infographics. Tracked; fix is tile-based diffusion.
- `invisible` pipeline processes at **native resolution by default** (`max_resolution=0`), matching the hosted raiw.cc backend (fal fast-sdxl, no pre-downscale). The old forced downscale-to-1024 -> upscale-back round-trip was the main quality loss (issue #10) and is gone; at strength ~0.05 SDXL img2img does not need the ~1024 downscale. `--max-resolution N` re-introduces an opt-in long-side cap purely to bound GPU/MPS memory on very large inputs (it reintroduces the lossy round-trip). For huge images that OOM at native, tile-based diffusion is still the proper long-term fix.
- Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files.
- `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`.
- Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding). EXIF/XMP boxes inside those containers are read for detection but not yet **scrubbed** on removal.
+26
View File
@@ -278,6 +278,12 @@ def cmd_visible(
@click.option(
"--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)."
)
@click.option(
"--max-resolution",
type=int,
default=0,
help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
)
@click.pass_context
def cmd_invisible(
ctx: click.Context,
@@ -290,6 +296,7 @@ def cmd_invisible(
seed: int | None,
hf_token: str | None,
humanize: float,
max_resolution: int,
) -> None:
"""Remove invisible AI watermarks (SynthID, StableSignature, TreeRing).
@@ -336,6 +343,7 @@ def cmd_invisible(
guidance_scale=None,
seed=seed,
humanize=humanize,
max_resolution=max_resolution,
)
elapsed = time.monotonic() - t0
@@ -476,6 +484,12 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo
@click.option(
"--humanize", type=float, default=0.0, help="Analog Humanizer film grain intensity (0 = off, typical: 2.0-6.0)."
)
@click.option(
"--max-resolution",
type=int,
default=0,
help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
)
@click.pass_context
def cmd_all(
ctx: click.Context,
@@ -491,6 +505,7 @@ def cmd_all(
seed: int | None,
hf_token: str | None,
humanize: float,
max_resolution: int,
) -> None:
"""Remove ALL watermarks: visible + invisible + metadata.
@@ -582,6 +597,7 @@ def cmd_all(
num_inference_steps=steps,
seed=seed,
humanize=humanize,
max_resolution=max_resolution,
)
console.print(" [green]✓[/] Invisible watermark removed")
@@ -633,6 +649,7 @@ def _process_batch_image(
seed: int | None,
hf_token: str | None,
humanize: float,
max_resolution: int = 0,
) -> None:
"""Process a single image for batch mode.
@@ -695,6 +712,7 @@ def _process_batch_image(
num_inference_steps=steps,
seed=seed,
humanize=humanize,
max_resolution=max_resolution,
)
if mode in ("metadata", "all"):
@@ -737,6 +755,12 @@ def _process_batch_image(
@click.option("--device", type=click.Choice(["auto", "cpu", "mps", "cuda"]), default="auto", help="Inference device.")
@click.option("--seed", type=int, default=None, help="Random seed for reproducibility.")
@click.option("--hf-token", type=str, default=None, help="HuggingFace API token.")
@click.option(
"--max-resolution",
type=int,
default=0,
help="Cap long side (px) before diffusion; 0 = native (best quality, like raiw.cc). Raise only on GPU/MPS OOM.",
)
@click.pass_context
def cmd_batch(
ctx: click.Context,
@@ -751,6 +775,7 @@ def cmd_batch(
hf_token: str | None,
inpaint: bool,
humanize: float,
max_resolution: int,
) -> None:
"""Process all images in a directory."""
_banner()
@@ -800,6 +825,7 @@ def cmd_batch(
seed=seed,
hf_token=hf_token,
humanize=humanize,
max_resolution=max_resolution,
)
processed += 1
+15 -6
View File
@@ -107,6 +107,7 @@ class InvisibleEngine:
seed: int | None = None,
humanize: float = 0.0,
protect_faces: bool = True,
max_resolution: int = 0,
) -> Path:
"""Remove invisible watermark from an image.
@@ -119,6 +120,11 @@ class InvisibleEngine:
seed: Random seed for reproducibility.
humanize: Intensity of Analog Humanizer film grain (0 = off).
protect_faces: Boolean to extract and restore faces intact.
max_resolution: Cap the long side (px) before diffusion. 0 (default)
= native resolution, no pre-downscale -- matches the hosted
raiw.cc backend. Set a positive value only to bound GPU/MPS
memory on very large inputs (it reintroduces a lossy
downscale->upscale round-trip).
Returns:
Path to the cleaned image.
@@ -127,22 +133,25 @@ class InvisibleEngine:
from PIL import Image, ImageOps
# SDXL is trained at 1024px and degrades both quality and watermark-removal
# efficacy below that.
max_dimension = 1024
# Process at native resolution by default (max_resolution=0). The hosted
# raiw.cc backend (fal fast-sdxl) does NO pre-downscale either, and at
# strength ~0.05 SDXL img2img does not need the input shrunk to ~1024 --
# the old forced downscale->upscale round-trip was the main quality loss
# (see issue #10). A positive max_resolution caps the long side only to
# bound GPU/MPS memory on very large inputs.
image = Image.open(image_path)
image = ImageOps.exif_transpose(image)
orig_size = image.size # (width, height)
_tmp_path = None
if max(image.width, image.height) > max_dimension:
ratio = max_dimension / max(image.width, image.height)
if max_resolution > 0 and max(image.width, image.height) > max_resolution:
ratio = max_resolution / max(image.width, image.height)
new_size = (int(image.width * ratio), int(image.height * ratio))
if self._progress_callback:
self._progress_callback(
f"Downscaling {image.width}x{image.height} "
f"to {new_size[0]}x{new_size[1]} "
f"(model trained at {max_dimension}px)..."
f"(max-resolution cap {max_resolution}px)..."
)
image = image.resize(new_size, Image.Resampling.LANCZOS)