mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-10 04:43:54 +02:00
01fe98bf54
After 7 cascading upstream-compat fixes (insightface dep, peft dep, pm_version, device, etc.), the PhotoMaker V1 cert sweep still hit a CFG batch-dim mismatch inside the denoising loop. The upstream PhotoMaker `pipeline.py` is forked from diffusers v0.29.1 and our env runs 0.38; SDXL prompt-encoder handling changed significantly between those versions, so making PhotoMaker work end-to-end needs a proper fork or a diffusers downgrade — both expensive. Not worth shipping today. Pivot: restore `face_restore.py` (GFPGAN) with a single-line fix that makes it SynthID-safe by construction. The previous design ran GFPGAN.enhance on the ORIGINAL watermarked image and was oracle-confirmed to re-add SynthID via the weight-0.5 pixel blend. The fix is to run GFPGAN on the diffusion-CLEANED image — whatever pixels GFPGAN derives from are already SynthID-free, so the partial blend cannot transport the watermark. Identity fidelity is lower than a true identity-as-embedding stack would deliver, but it ships and works. Changes: - `src/remove_ai_watermarks/face_restore.py` restored from pre-wipe state with one line changed: `restorer.enhance(cleaned_bgr, ...)` instead of `restorer.enhance(original_bgr, ...)`. `original_bgr` is kept as an unused positional argument for API stability. - `src/remove_ai_watermarks/photomaker_restore.py` and its tests REMOVED. The research note (`docs/synthid-robust-identity-research.md`) keeps a "status notice" documenting why PhotoMaker is parked for now and what the path back in would look like. - `pyproject.toml` `restore` extra restored (gfpgan/facexlib/basicsr + scipy<1.18 + numba<0.60 pins + the basicsr setuptools<69 build pin), plus `photomaker` extra (with its einops/insightface/peft pile) and the `[tool.hatch.metadata] allow-direct-references = true` block REMOVED. - `InvisibleEngine._restore_faces_photomaker` removed; `_restore_faces` restored. The `--restore-faces` CLI flag and its plumbing through cmd_* signatures are unchanged. - CLAUDE.md, README.md, docs/synthid.md, docs/controlnet-removal-pipeline- research.md updated to describe the shipped GFPGAN-on-cleaned design and to reference PhotoMaker only as the parked alternative. ruff + strict pyright(src/) clean; 578 tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
409 lines
20 KiB
Python
409 lines
20 KiB
Python
"""Invisible watermark removal engine.
|
|
|
|
Wraps the vendored noai-watermark code for removing invisible AI watermarks
|
|
(SynthID, StableSignature, TreeRing) via diffusion-based regeneration.
|
|
|
|
This module requires the 'gpu' extra dependencies:
|
|
uv pip install 'remove-ai-watermarks[gpu]'
|
|
"""
|
|
|
|
# cv2/torch boundary: this engine wraps cv2 (resize/imwrite/cvtColor) and the
|
|
# humanizer, none of which carry usable element types; relax the unknown-type
|
|
# rules for this file only.
|
|
# pyright: reportUnknownMemberType=false, reportUnknownArgumentType=false, reportUnknownVariableType=false, reportUnknownParameterType=false, reportMissingTypeArgument=false, reportMissingTypeStubs=false, reportMissingImports=false, reportArgumentType=false, reportAssignmentType=false, reportReturnType=false, reportCallIssue=false, reportIndexIssue=false, reportOperatorIssue=false, reportOptionalMemberAccess=false, reportOptionalCall=false, reportOptionalSubscript=false, reportOptionalOperand=false, reportAttributeAccessIssue=false, reportPrivateImportUsage=false, reportPrivateUsage=false, reportInvalidTypeForm=false, reportConstantRedefinition=false, reportUnnecessaryComparison=false
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import warnings
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Callable
|
|
|
|
# Suppress verbose deprecation warnings from diffusers/transformers/huggingface_hub
|
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
warnings.filterwarnings("ignore", category=UserWarning, module="huggingface_hub")
|
|
warnings.filterwarnings("ignore", category=UserWarning, module="diffusers")
|
|
warnings.filterwarnings("ignore", module="transformers")
|
|
|
|
# Suppress HuggingFace internal logging
|
|
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
os.environ["DIFFUSERS_VERBOSITY"] = "error"
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def is_available() -> bool:
|
|
"""Check if invisible watermark removal dependencies are installed."""
|
|
import importlib.util
|
|
|
|
return importlib.util.find_spec("diffusers") is not None and importlib.util.find_spec("torch") is not None
|
|
|
|
|
|
def _target_size(width: int, height: int, max_resolution: int, min_resolution: int = 0) -> tuple[int, int] | None:
|
|
"""Compute the (width, height) to process at, or None for native.
|
|
|
|
Two opposite long-side adjustments, in precedence order:
|
|
|
|
- ``max_resolution`` (cap): if the long side exceeds it, scale DOWN to it
|
|
(integer-truncated, matching the PIL ``resize`` call site). 0/negative = no
|
|
cap. Set only to bound GPU/MPS memory on very large inputs (issue #10).
|
|
- ``min_resolution`` (floor): else if the long side is below it, scale UP to it
|
|
(rounded) so SDXL img2img runs near its ~1024 training resolution instead of
|
|
degrading on a tiny latent (a 381x512 portrait distorts badly at native).
|
|
The output is restored to the original size by the caller, so the floor is a
|
|
transparent quality boost. 0 = no floor. Skipped on a ``min > max`` misconfig.
|
|
|
|
Returns None when neither applies (native resolution). Pure function so the
|
|
resolution decision is unit-testable without loading the diffusion model.
|
|
"""
|
|
long_side = max(width, height)
|
|
if max_resolution > 0 and long_side > max_resolution:
|
|
ratio = max_resolution / long_side
|
|
# Clamp the short side to >=1: extreme aspect ratios (e.g. 5000x3 capped
|
|
# at 1024) would otherwise truncate it to 0 and crash image.resize().
|
|
return (max(1, int(width * ratio)), max(1, int(height * ratio)))
|
|
if min_resolution > 0 and long_side < min_resolution and (max_resolution <= 0 or min_resolution <= max_resolution):
|
|
ratio = min_resolution / long_side
|
|
return (max(1, round(width * ratio)), max(1, round(height * ratio)))
|
|
return None
|
|
|
|
|
|
class InvisibleEngine:
|
|
"""Remove invisible AI watermarks using diffusion model regeneration.
|
|
|
|
Based on noai-watermark by mertizci:
|
|
https://github.com/mertizci/noai-watermark
|
|
|
|
The approach encodes the image into latent space, injects controlled noise
|
|
to break watermark patterns, and reconstructs via reverse diffusion.
|
|
"""
|
|
|
|
# SDXL base is the default since May 2026; the vendor-adaptive strength
|
|
# removes the current SynthID (see watermark_profiles + docs/synthid.md).
|
|
DEFAULT_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
|
|
|
|
def __init__(
|
|
self,
|
|
model_id: str | None = None,
|
|
device: str | None = None,
|
|
pipeline: str = "default",
|
|
hf_token: str | None = None,
|
|
progress_callback: Callable[[str], None] | None = None,
|
|
controlnet_conditioning_scale: float = 1.0,
|
|
) -> None:
|
|
"""Initialize the invisible watermark removal engine.
|
|
|
|
Args:
|
|
model_id: HuggingFace model ID. None = use the SDXL base default.
|
|
device: Device for inference (auto/cpu/mps/cuda/xpu). None = auto.
|
|
pipeline: Pipeline profile. "default" (plain SDXL img2img) or
|
|
"controlnet" (SDXL + canny ControlNet that preserves text/face
|
|
structure via edge conditioning while removing SynthID).
|
|
hf_token: HuggingFace API token.
|
|
progress_callback: Optional callback for progress messages.
|
|
controlnet_conditioning_scale: ControlNet structure-preservation
|
|
strength (controlnet pipeline only).
|
|
"""
|
|
|
|
from remove_ai_watermarks.noai.watermark_remover import WatermarkRemover
|
|
|
|
effective_model = model_id or self.DEFAULT_MODEL_ID
|
|
|
|
self._remover = WatermarkRemover(
|
|
model_id=effective_model,
|
|
device=device,
|
|
progress_callback=progress_callback,
|
|
hf_token=hf_token,
|
|
pipeline=pipeline,
|
|
controlnet_conditioning_scale=controlnet_conditioning_scale,
|
|
)
|
|
self._progress_callback = progress_callback
|
|
|
|
def preload(self) -> None:
|
|
"""Eagerly load the pipeline so download progress is visible."""
|
|
self._remover.preload()
|
|
|
|
def _esrgan_upscale(self, image: Any, target: tuple[int, int]) -> Any:
|
|
"""Upscale a PIL image to ``target`` with Real-ESRGAN, else Lanczos.
|
|
|
|
Runs Real-ESRGAN at its native factor (on the remover's device, CPU fallback),
|
|
then resizes to the exact ``target`` with Lanczos. Falls back to a plain Lanczos
|
|
resize when the ``esrgan`` extra is absent or the model errors.
|
|
"""
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from remove_ai_watermarks import upscaler
|
|
|
|
if not upscaler.is_available():
|
|
logger.debug("esrgan upscaler requested but the extra is absent; using Lanczos")
|
|
return image.resize(target, Image.Resampling.LANCZOS)
|
|
try:
|
|
bgr = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
|
|
big = upscaler.upscale(bgr, device=self._remover.device)
|
|
if (big.shape[1], big.shape[0]) != target:
|
|
big = cv2.resize(big, target, interpolation=cv2.INTER_LANCZOS4)
|
|
return Image.fromarray(cv2.cvtColor(big, cv2.COLOR_BGR2RGB))
|
|
except Exception as e: # never let an optional upscaler break removal
|
|
logger.warning("Real-ESRGAN upscale failed (%s); using Lanczos", e)
|
|
return image.resize(target, Image.Resampling.LANCZOS)
|
|
|
|
def remove_watermark(
|
|
self,
|
|
image_path: Path,
|
|
output_path: Path | None = None,
|
|
strength: float | None = None,
|
|
num_inference_steps: int = 100,
|
|
guidance_scale: float | None = None,
|
|
seed: int | None = None,
|
|
humanize: float = 0.0,
|
|
max_resolution: int = 0,
|
|
min_resolution: int = 1024,
|
|
vendor: str | None = None,
|
|
restore_faces: bool = False,
|
|
unsharp: float = 0.0,
|
|
adaptive_polish: bool = False,
|
|
upscaler: str = "lanczos",
|
|
) -> Path:
|
|
"""Remove invisible watermark from an image.
|
|
|
|
Args:
|
|
image_path: Path to the watermarked image.
|
|
output_path: Output path (None = overwrite source).
|
|
strength: Denoising strength (0.0-1.0). None -> the vendor-adaptive
|
|
default.
|
|
steps: Number of denoising steps.
|
|
guidance_scale: Classifier-free guidance scale.
|
|
seed: Random seed for reproducibility.
|
|
humanize: Intensity of Analog Humanizer film grain (0 = off).
|
|
restore_faces: EXPERIMENTAL, opt-in (default False). Run the GFPGAN
|
|
face-polish post-pass when faces are present (needs the ``restore``
|
|
extra). Runs on the diffusion-CLEANED image (not the original), so
|
|
SynthID is not re-introduced. Auto-skips with a debug log when the
|
|
extra is absent or no face is detected.
|
|
unsharp: Final unsharp-mask sharpening strength (0 = off, default).
|
|
Applied last (after face restoration) to counter the soft,
|
|
over-smoothed look of the diffusion + restoration; ~0.5-0.8 is a
|
|
safe range, higher risks edge halos.
|
|
adaptive_polish: When True (the --auto mode default), restore the input's
|
|
detail level in the softened output instead of fixed unsharp/humanize:
|
|
a capped unsharp + edge-masked grain targeting the input's Laplacian
|
|
variance (self-limiting on text/graphics). Runs LAST, after face
|
|
restoration. The fixed ``humanize``/``unsharp`` knobs are normally 0
|
|
when this is on.
|
|
max_resolution: Cap the long side (px) before diffusion. 0 (default)
|
|
= no cap. Set a positive value only to bound GPU/MPS memory on
|
|
very large inputs (it reintroduces a lossy downscale->upscale
|
|
round-trip).
|
|
min_resolution: Upscale the long side UP to this (px) before diffusion
|
|
when the input is smaller, so SDXL runs near its ~1024 training
|
|
resolution (small inputs degrade/distort badly at native). 1024
|
|
(default) = on; 0 = off. The output is restored to the original
|
|
input size, so this is a transparent quality boost; it adds time
|
|
and memory on small inputs. Ignored on a min > max misconfig.
|
|
upscaler: How to upscale a small input to the ``min_resolution`` floor:
|
|
``"lanczos"`` (default, cv2, no deps) or ``"esrgan"`` (Real-ESRGAN
|
|
via the ``esrgan`` extra). Only applies when UPscaling (the floor
|
|
case); a ``max_resolution`` downscale always uses Lanczos. Falls back
|
|
to Lanczos if the extra is absent.
|
|
|
|
Returns:
|
|
Path to the cleaned image.
|
|
"""
|
|
import tempfile
|
|
|
|
from PIL import Image, ImageOps
|
|
|
|
# Resolution policy: a max_resolution cap (0 = none) bounds memory on huge
|
|
# inputs, and a min_resolution floor (1024 = default) upscales tiny inputs so
|
|
# SDXL img2img runs near its ~1024 training size instead of distorting on a
|
|
# tiny latent (a 381x512 portrait wrecks at native -- issue #36 follow-up).
|
|
# The output is restored to orig_size below, so the floor is transparent.
|
|
image = Image.open(image_path)
|
|
image = ImageOps.exif_transpose(image)
|
|
orig_size = image.size # (width, height)
|
|
# Full-res original, kept for the adaptive-polish detail target (image is
|
|
# reassigned to the resized copy below; PIL resize returns a new object).
|
|
reference_pil = image
|
|
|
|
target = _target_size(image.width, image.height, max_resolution, min_resolution)
|
|
if target is not None:
|
|
upscaling = max(target) > max(image.width, image.height)
|
|
if self._progress_callback:
|
|
reason = (
|
|
f"min-resolution floor {min_resolution}px"
|
|
if upscaling
|
|
else f"max-resolution cap {max_resolution}px"
|
|
)
|
|
verb = "Upscaling" if upscaling else "Downscaling"
|
|
self._progress_callback(f"{verb} {image.width}x{image.height} to {target[0]}x{target[1]} ({reason})...")
|
|
# Real-ESRGAN only helps when UPscaling (the floor case); a downscale cap
|
|
# always uses Lanczos. _esrgan_upscale falls back to Lanczos if the extra is absent.
|
|
if upscaling and upscaler == "esrgan":
|
|
image = self._esrgan_upscale(image, target)
|
|
else:
|
|
image = image.resize(target, Image.Resampling.LANCZOS)
|
|
|
|
# Always persist to a temp file, even without downscaling: WatermarkRemover
|
|
# reloads by path, so the EXIF-transposed pixels must be saved or rotation
|
|
# is lost. Cleaned up in the finally block via _tmp_path.
|
|
_tmp_fd, _tmp_str = tempfile.mkstemp(suffix=image_path.suffix)
|
|
_tmp_path = Path(_tmp_str)
|
|
image.save(_tmp_path)
|
|
os.close(_tmp_fd)
|
|
image_path = _tmp_path
|
|
|
|
try:
|
|
out_path = self._remover.remove_watermark(
|
|
image_path=image_path,
|
|
output_path=output_path,
|
|
strength=strength,
|
|
num_inference_steps=num_inference_steps,
|
|
guidance_scale=guidance_scale,
|
|
seed=seed,
|
|
vendor=vendor,
|
|
)
|
|
|
|
# Post-processing: optional Humanizer, then restore original resolution.
|
|
if humanize > 0.0:
|
|
import cv2
|
|
|
|
from remove_ai_watermarks import image_io
|
|
|
|
out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
|
if out_cv is None:
|
|
return out_path
|
|
|
|
if self._progress_callback:
|
|
self._progress_callback(f"Applying Analog Humanizer (grain: {humanize})...")
|
|
from remove_ai_watermarks.humanizer import apply_analog_humanizer
|
|
|
|
out_cv = apply_analog_humanizer(out_cv, grain_intensity=humanize, chromatic_shift=1)
|
|
|
|
# Restore original resolution
|
|
if (out_cv.shape[1], out_cv.shape[0]) != orig_size:
|
|
if self._progress_callback:
|
|
self._progress_callback(
|
|
f"Upscaling result back to original resolution {orig_size[0]}x{orig_size[1]}..."
|
|
)
|
|
# Using INTER_LANCZOS4 for high-quality upscaling back to original
|
|
out_cv = cv2.resize(out_cv, orig_size, interpolation=cv2.INTER_LANCZOS4)
|
|
|
|
image_io.imwrite(out_path, out_cv)
|
|
|
|
else:
|
|
# No humanize: still restore the original size if it was capped.
|
|
import cv2
|
|
|
|
from remove_ai_watermarks import image_io
|
|
|
|
out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
|
if out_cv is not None and (out_cv.shape[1], out_cv.shape[0]) != orig_size:
|
|
if self._progress_callback:
|
|
self._progress_callback(
|
|
f"Upscaling result back to original resolution {orig_size[0]}x{orig_size[1]}..."
|
|
)
|
|
out_cv = cv2.resize(out_cv, orig_size, interpolation=cv2.INTER_LANCZOS4)
|
|
image_io.imwrite(out_path, out_cv)
|
|
|
|
# Optional GFPGAN face-polish post-pass: sharpens and re-synthesizes each
|
|
# face from GFPGAN's StyleGAN2 prior, running on the DIFFUSION-CLEANED image
|
|
# (not the original) -- so SynthID is not re-introduced (the input pixels
|
|
# GFPGAN derives from are already SynthID-free). Auto-skips when faces are
|
|
# absent or the optional `restore` extra is not installed.
|
|
if restore_faces:
|
|
self._restore_faces(out_path)
|
|
|
|
# Final sharpening, LAST so it crisps the face-restored result too (a
|
|
# pre-restore sharpen would be smoothed back over by the face pass).
|
|
if unsharp > 0.0:
|
|
import cv2
|
|
|
|
from remove_ai_watermarks import image_io
|
|
from remove_ai_watermarks.humanizer import unsharp_mask
|
|
|
|
out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
|
if out_cv is not None:
|
|
if self._progress_callback:
|
|
self._progress_callback(f"Sharpening (unsharp mask: {unsharp})...")
|
|
image_io.imwrite(out_path, unsharp_mask(out_cv, amount=unsharp))
|
|
|
|
# Adaptive polish (--auto): restore the input's detail level in the softened
|
|
# output, sparing text/edges. Replaces the fixed unsharp/humanize knobs.
|
|
if adaptive_polish:
|
|
import cv2
|
|
import numpy as np
|
|
|
|
from remove_ai_watermarks import humanizer, image_io
|
|
|
|
out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
|
if out_cv is not None:
|
|
ref = cv2.cvtColor(np.array(reference_pil.convert("RGB")), cv2.COLOR_RGB2BGR)
|
|
if (ref.shape[1], ref.shape[0]) != (out_cv.shape[1], out_cv.shape[0]):
|
|
ref = cv2.resize(ref, (out_cv.shape[1], out_cv.shape[0]), interpolation=cv2.INTER_LANCZOS4)
|
|
if self._progress_callback:
|
|
self._progress_callback("Adaptive polish (sharpen + grain to the input's detail level)...")
|
|
image_io.imwrite(out_path, humanizer.adaptive_polish(out_cv, ref, seed=seed))
|
|
|
|
return out_path
|
|
finally:
|
|
# _tmp_path is always set above (we persist the image unconditionally).
|
|
if _tmp_path.exists():
|
|
_tmp_path.unlink()
|
|
|
|
def _restore_faces(self, out_path: Path) -> None:
|
|
"""Run the GFPGAN face-polish post-pass on the cleaned ``out_path``.
|
|
|
|
SynthID-safe: GFPGAN is run on the diffusion-CLEANED image (not the original),
|
|
so the partial pixel-blend it does at fidelity weight 0.5 cannot re-introduce
|
|
the watermark -- the input pixels GFPGAN derives from are already SynthID-free.
|
|
Best-effort: any failure logs a warning and leaves the un-restored cleaned
|
|
output in place; a missing ``restore`` extra is logged at debug and skipped
|
|
(the flag must never error when the extra is absent or no face is present).
|
|
"""
|
|
from remove_ai_watermarks import face_restore
|
|
|
|
if not face_restore.is_available():
|
|
logger.debug("restore_faces requested but the 'restore' extra is not installed; skipping")
|
|
return
|
|
|
|
try:
|
|
import cv2
|
|
|
|
from remove_ai_watermarks import image_io
|
|
|
|
cleaned_bgr = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
|
if cleaned_bgr is None:
|
|
logger.warning("restore_faces: could not read cleaned output %s; skipping", out_path)
|
|
return
|
|
|
|
if self._progress_callback:
|
|
self._progress_callback("Polishing face identity (GFPGAN on cleaned image)...")
|
|
# original_bgr is unused (GFPGAN runs on cleaned_bgr); pass an empty array
|
|
# for positional API stability with the legacy signature.
|
|
import numpy as np
|
|
|
|
restored = face_restore.restore_faces(np.empty((0, 0, 3), dtype=np.uint8), cleaned_bgr)
|
|
image_io.imwrite(out_path, restored)
|
|
except Exception as e:
|
|
logger.warning("restore_faces post-pass failed (%s); keeping un-restored output", e)
|
|
|
|
def remove_watermark_batch(
|
|
self,
|
|
input_dir: Path,
|
|
output_dir: Path,
|
|
strength: float | None = None,
|
|
steps: int = 50,
|
|
) -> list[Path]:
|
|
"""Remove invisible watermarks from all images in a directory."""
|
|
return self._remover.remove_watermark_batch(
|
|
input_dir=input_dir,
|
|
output_dir=output_dir,
|
|
strength=strength,
|
|
num_inference_steps=steps,
|
|
)
|