mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-10 12:53:56 +02:00
feat(auto): adaptive detail-targeting polish + --adaptive-polish flag
The fixed mild auto polish (unsharp 0.5 / grain 2.0) under-corrected soft photo/face output (gemini_3 stayed at lap-var 84 vs its 592 original) and its grain speckled small text. Replace it with humanizer.adaptive_polish: target the input's Laplacian variance with a capped unsharp scaled to the deficit + edge- masked grain (smooth regions only), calibrated by a short sigma search. Self- limiting on text/graphics -- already high-frequency, so almost no polish lands and text edges are masked out. Validated on the spaces corpus (gemini_3 84 -> 334 end-to-end; openai_1 text near-untouched). Interface: every --auto decision is now independently overridable -- add --adaptive-polish/--no-adaptive-polish (matching --restore-faces; works without --auto too) so the polish can be disabled or used manually. _apply_auto overrides exactly the three content-adaptive modes (pipeline, restore-faces, adaptive- polish); --unsharp/--humanize stay independent fixed filters. cv2-only, no new deps. Threaded through invisible/all (not batch). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -9,16 +9,18 @@ host (image work there OOM-crashes the container).
|
||||
Routing is **quality-priority**: ControlNet (text/face-structure preservation) is the
|
||||
default; it is only skipped for a clearly structure-less image (no face, no text,
|
||||
near-zero edges), where plain SDXL is cheaper and just as good. GFPGAN face
|
||||
restoration is enabled when a face is present. A mild sharpen + grain polish is added
|
||||
when a smoothing pass (controlnet or face restore) ran, to counter the over-smoothed
|
||||
"AI look".
|
||||
restoration is enabled when a face is present. When a smoothing pass (controlnet or
|
||||
face restore) ran, the **adaptive polish** (``humanizer.adaptive_polish``) restores
|
||||
the input's detail level -- a capped unsharp + edge-masked grain targeting the input's
|
||||
Laplacian variance -- to counter the over-smoothed "AI look". It is self-limiting on
|
||||
text/graphics (already high-frequency, so almost no polish) and spares text/edges by
|
||||
masking the grain.
|
||||
|
||||
Detection is **cv2-only and torch-free**: OpenCV YuNet (``cv2.FaceDetectorYN``) for
|
||||
faces -- a 232 KB MIT-licensed model bundled in ``assets/`` -- plus a Canny
|
||||
edge-density + MSER region heuristic for text/structure. The whole planner peaks
|
||||
~100 MB RSS in a few ms, so it adds nothing meaningful to a GPU run and runs anywhere
|
||||
the pipeline runs. (Phase 1 applies a fixed mild polish; an adaptive Laplacian-variance
|
||||
polish that measures the OUTPUT is a later phase.)
|
||||
the pipeline runs.
|
||||
|
||||
The text heuristic is a deliberately rough Phase-1 placeholder (DBNet via cv2.dnn is
|
||||
the planned precision upgrade); it only ever ADDS controlnet, so a miss is backstopped
|
||||
@@ -54,10 +56,9 @@ _FACE_SCORE = 0.6 # YuNet confidence for a face to count
|
||||
# ~10px, and this bounds YuNet/MSER cost on huge inputs). Removal runs at full res.
|
||||
_DETECT_MAX_SIDE = 1024
|
||||
|
||||
# Auto polish applied only when a smoothing pass ran (controlnet or face restore),
|
||||
# to counter the soft "AI look". Conservative defaults; the user can override.
|
||||
_AUTO_UNSHARP = 0.5
|
||||
_AUTO_HUMANIZE = 2.0
|
||||
# When a smoothing pass ran (controlnet or face restore), the adaptive polish
|
||||
# (humanizer.adaptive_polish) restores the input's detail level, sparing text --
|
||||
# replacing the old fixed unsharp/grain which over-/under-corrected and speckled text.
|
||||
_UPSCALE_FLOOR = 1024
|
||||
|
||||
_YUNET_ASSET = "face_detection_yunet_2023mar.onnx" # MIT (Shiqi Yu), OpenCV Zoo
|
||||
@@ -70,7 +71,8 @@ class AutoConfig:
|
||||
|
||||
pipeline: str # "default" | "controlnet"
|
||||
restore_faces: bool
|
||||
unsharp: float
|
||||
adaptive_polish: bool # restore the input's detail level (sharpen + masked grain), sparing text
|
||||
unsharp: float # fixed-polish knobs, 0 in auto (the adaptive polish replaces them)
|
||||
humanize: float
|
||||
min_resolution: int
|
||||
# signals retained for logging / debugging a bad pick
|
||||
@@ -88,7 +90,12 @@ class AutoConfig:
|
||||
bits.append("text")
|
||||
bits.append(f"edges={self.edge_density:.3f}")
|
||||
rf = ", face-restore on" if self.restore_faces else ""
|
||||
polish = f", unsharp {self.unsharp}/grain {self.humanize}" if (self.unsharp or self.humanize) else ""
|
||||
if self.adaptive_polish:
|
||||
polish = ", adaptive polish"
|
||||
elif self.unsharp or self.humanize:
|
||||
polish = f", unsharp {self.unsharp}/grain {self.humanize}"
|
||||
else:
|
||||
polish = ""
|
||||
return f"{'+'.join(bits)} -> {self.pipeline} pipeline{rf}{polish}"
|
||||
|
||||
|
||||
@@ -196,8 +203,9 @@ def plan(image_path: Path) -> AutoConfig | None:
|
||||
cfg = AutoConfig(
|
||||
pipeline=pipeline,
|
||||
restore_faces=restore_faces,
|
||||
unsharp=_AUTO_UNSHARP if smoothing else 0.0,
|
||||
humanize=_AUTO_HUMANIZE if smoothing else 0.0,
|
||||
adaptive_polish=smoothing, # adaptive (detail-targeted) polish when a smoothing pass ran
|
||||
unsharp=0.0,
|
||||
humanize=0.0,
|
||||
min_resolution=_UPSCALE_FLOOR,
|
||||
has_face=has_face,
|
||||
has_text=has_text,
|
||||
|
||||
@@ -163,8 +163,18 @@ _auto_option = click.option(
|
||||
"--auto",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Auto-pick quality modes (pipeline, face restore, sharpen/grain) from image content. "
|
||||
"Explicit flags override. EXPERIMENTAL.",
|
||||
help="Auto-pick the pipeline, face restore, and adaptive polish from image content. "
|
||||
"Every choice is overridable -- an explicit --pipeline / --restore-faces / --adaptive-polish "
|
||||
"always wins. EXPERIMENTAL.",
|
||||
)
|
||||
|
||||
_adaptive_polish_option = click.option(
|
||||
"--adaptive-polish/--no-adaptive-polish",
|
||||
default=False,
|
||||
help="Restore the input's detail level after removal (capped unsharp + edge-masked grain "
|
||||
"targeting the input's sharpness, sparing text). On by default under --auto; pass "
|
||||
"--no-adaptive-polish to disable it there, or --adaptive-polish to use it without --auto. "
|
||||
"Independent of the fixed --unsharp/--humanize. EXPERIMENTAL.",
|
||||
)
|
||||
|
||||
|
||||
@@ -173,19 +183,19 @@ def _apply_auto(
|
||||
source: Path,
|
||||
pipeline: str,
|
||||
restore_faces: bool,
|
||||
unsharp: float,
|
||||
humanize: float,
|
||||
) -> tuple[str, bool, float, float]:
|
||||
"""Resolve ``--auto``: plan modes from the image, overriding only the flags the
|
||||
user left at their default (an explicit flag always wins). Returns the resolved
|
||||
``(pipeline, restore_faces, unsharp, humanize)`` and prints the chosen plan.
|
||||
adaptive_polish: bool,
|
||||
) -> tuple[str, bool, bool]:
|
||||
"""Resolve ``--auto``: plan the three content-adaptive modes (pipeline, face
|
||||
restore, adaptive polish) from the image, overriding only the ones the user left
|
||||
at their default (an explicit flag always wins). The fixed ``--unsharp``/
|
||||
``--humanize`` filters are independent and untouched. Prints the chosen plan.
|
||||
"""
|
||||
from remove_ai_watermarks import auto_config
|
||||
|
||||
cfg = auto_config.plan(source)
|
||||
if cfg is None:
|
||||
console.print(" Auto: could not read image; using defaults")
|
||||
return pipeline, restore_faces, unsharp, humanize
|
||||
return pipeline, restore_faces, adaptive_polish
|
||||
|
||||
def _is_default(name: str) -> bool:
|
||||
return ctx.get_parameter_source(name) == click.core.ParameterSource.DEFAULT
|
||||
@@ -194,12 +204,10 @@ def _apply_auto(
|
||||
pipeline = cfg.pipeline
|
||||
if _is_default("restore_faces"):
|
||||
restore_faces = cfg.restore_faces
|
||||
if _is_default("unsharp"):
|
||||
unsharp = cfg.unsharp
|
||||
if _is_default("humanize"):
|
||||
humanize = cfg.humanize
|
||||
if _is_default("adaptive_polish"):
|
||||
adaptive_polish = cfg.adaptive_polish
|
||||
console.print(f" Auto: {cfg.reason}")
|
||||
return pipeline, restore_faces, unsharp, humanize
|
||||
return pipeline, restore_faces, adaptive_polish
|
||||
|
||||
|
||||
def _restore_faces_options(f: Any) -> Any:
|
||||
@@ -550,6 +558,7 @@ def cmd_erase(
|
||||
@_min_resolution_option
|
||||
@_unsharp_option
|
||||
@_auto_option
|
||||
@_adaptive_polish_option
|
||||
@click.pass_context
|
||||
def cmd_invisible(
|
||||
ctx: click.Context,
|
||||
@@ -569,6 +578,7 @@ def cmd_invisible(
|
||||
restore_faces: bool,
|
||||
restore_faces_weight: float,
|
||||
auto: bool,
|
||||
adaptive_polish: bool,
|
||||
) -> None:
|
||||
"""Remove invisible AI watermarks (SynthID, StableSignature, TreeRing).
|
||||
|
||||
@@ -587,9 +597,7 @@ def cmd_invisible(
|
||||
|
||||
source = _validate_image(source)
|
||||
if auto:
|
||||
pipeline, restore_faces, unsharp, humanize = _apply_auto(
|
||||
ctx, source, pipeline, restore_faces, unsharp, humanize
|
||||
)
|
||||
pipeline, restore_faces, adaptive_polish = _apply_auto(ctx, source, pipeline, restore_faces, adaptive_polish)
|
||||
if output is None:
|
||||
output = source.with_stem(source.stem + "_clean")
|
||||
|
||||
@@ -623,6 +631,7 @@ def cmd_invisible(
|
||||
seed=seed,
|
||||
humanize=humanize,
|
||||
unsharp=unsharp,
|
||||
adaptive_polish=adaptive_polish,
|
||||
max_resolution=max_resolution,
|
||||
min_resolution=min_resolution,
|
||||
vendor=vendor,
|
||||
@@ -807,6 +816,7 @@ def cmd_identify(ctx: click.Context, source: Path, no_visible: bool, as_json: bo
|
||||
@_min_resolution_option
|
||||
@_unsharp_option
|
||||
@_auto_option
|
||||
@_adaptive_polish_option
|
||||
@click.pass_context
|
||||
def cmd_all(
|
||||
ctx: click.Context,
|
||||
@@ -829,6 +839,7 @@ def cmd_all(
|
||||
restore_faces: bool,
|
||||
restore_faces_weight: float,
|
||||
auto: bool,
|
||||
adaptive_polish: bool,
|
||||
) -> None:
|
||||
"""Remove ALL watermarks: visible + invisible + metadata.
|
||||
|
||||
@@ -844,9 +855,7 @@ def cmd_all(
|
||||
_banner()
|
||||
source = _validate_image(source)
|
||||
if auto:
|
||||
pipeline, restore_faces, unsharp, humanize = _apply_auto(
|
||||
ctx, source, pipeline, restore_faces, unsharp, humanize
|
||||
)
|
||||
pipeline, restore_faces, adaptive_polish = _apply_auto(ctx, source, pipeline, restore_faces, adaptive_polish)
|
||||
|
||||
if output is None:
|
||||
output = source.with_stem(source.stem + "_clean")
|
||||
@@ -929,6 +938,7 @@ def cmd_all(
|
||||
seed=seed,
|
||||
humanize=humanize,
|
||||
unsharp=unsharp,
|
||||
adaptive_polish=adaptive_polish,
|
||||
max_resolution=max_resolution,
|
||||
min_resolution=min_resolution,
|
||||
vendor=vendor,
|
||||
|
||||
@@ -82,3 +82,87 @@ def unsharp_mask(image: NDArray, amount: float = 0.5, sigma: float = 1.0) -> NDA
|
||||
blurred = cv2.GaussianBlur(img_f, (0, 0), sigmaX=sigma, sigmaY=sigma)
|
||||
sharpened = cv2.addWeighted(img_f, 1.0 + amount, blurred, -amount, 0.0)
|
||||
return np.clip(sharpened, 0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
# ── Adaptive polish (target the input's detail level; spare text) ──────────────
|
||||
# A capped unsharp scaled to the sharpness deficit, then edge-masked grain to close
|
||||
# the rest -- tunable constants. Validated 2026-06-03 on the spaces corpus: a soft
|
||||
# gemini_3 face/photo (lap-var 84 vs the 592 of its original) is pulled up to ~327
|
||||
# with full polish, while a sharp openai_1 text card (1175 vs 1644) gets near-zero
|
||||
# (the deficit is tiny) so text is left alone -- the polish self-limits on text.
|
||||
_ADAPTIVE_MAX_UNSHARP = 1.0
|
||||
_ADAPTIVE_UNSHARP_GAIN = 0.4 # unsharp amount per unit of (deficit - 1), before the cap
|
||||
_ADAPTIVE_MAX_GRAIN = 8.0
|
||||
_MASK_EDGE_PERCENTILE = 85.0 # local-energy percentile above which a pixel is an "edge/text"
|
||||
_MASK_EDGE_DILATE = 5 # grow the edge mask so grain is suppressed in a margin around text
|
||||
_MASK_GAMMA = 2.0 # push the smooth weight toward 0 except in genuinely flat areas
|
||||
|
||||
|
||||
def _to_gray(image: NDArray) -> NDArray:
|
||||
"""Single-channel grayscale; passes a 2D (already-gray) input through unchanged."""
|
||||
return image if image.ndim == 2 else cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
|
||||
def _laplacian_variance(image: NDArray) -> float:
|
||||
"""Variance of the Laplacian -- a cheap proxy for high-frequency detail/sharpness."""
|
||||
return float(cv2.Laplacian(_to_gray(image), cv2.CV_64F).var())
|
||||
|
||||
|
||||
def _smooth_grain_mask(image: NDArray) -> NDArray:
|
||||
"""Per-pixel weight ~1 in flat/smooth regions, ~0 over text and hard edges.
|
||||
|
||||
Grain in smooth ("AI-plastic") regions reads as natural sensor noise; grain over
|
||||
text/edges just speckles them, so this masks grain to the smooth regions only.
|
||||
"""
|
||||
energy = cv2.GaussianBlur(np.abs(cv2.Laplacian(_to_gray(image).astype(np.float32), cv2.CV_32F)), (0, 0), sigmaX=2.0)
|
||||
thr = float(np.percentile(energy, _MASK_EDGE_PERCENTILE))
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (_MASK_EDGE_DILATE, _MASK_EDGE_DILATE))
|
||||
edges = cv2.dilate((energy > thr).astype(np.uint8), kernel)
|
||||
mask = np.clip(1.0 - energy / (thr + 1e-6), 0.0, 1.0) ** _MASK_GAMMA
|
||||
mask[edges > 0] = 0.0
|
||||
return cv2.GaussianBlur(mask, (0, 0), sigmaX=1.5)
|
||||
|
||||
|
||||
def adaptive_polish(image: NDArray, reference: NDArray, seed: int | None = None) -> NDArray:
|
||||
"""Restore the detail level of ``reference`` in a softened ``image``, sparing text.
|
||||
|
||||
Diffusion + face restoration leave an over-smoothed "AI-plastic" look, worst on
|
||||
photo/face regions. This targets the reference's Laplacian variance (the input's
|
||||
detail level): a capped unsharp scaled to the deficit, then edge-masked grain
|
||||
(smooth regions only) calibrated to close the remaining gap. **Self-limiting on
|
||||
text/graphics** -- they are already high-frequency, so the deficit is small and
|
||||
almost no polish is applied (text legibility is a generation-side concern, not a
|
||||
filter one). No-op when the image already meets the reference's detail level.
|
||||
|
||||
Args:
|
||||
image: the cleaned BGR output (uint8).
|
||||
reference: the original input BGR at the same resolution (the detail target).
|
||||
seed: optional RNG seed for reproducible grain.
|
||||
|
||||
Returns:
|
||||
Polished BGR image (uint8).
|
||||
"""
|
||||
target = _laplacian_variance(reference)
|
||||
current = _laplacian_variance(image)
|
||||
if target <= 0.0 or current >= target:
|
||||
return image.copy()
|
||||
|
||||
deficit = target / max(current, 1.0)
|
||||
amount = min(_ADAPTIVE_MAX_UNSHARP, _ADAPTIVE_UNSHARP_GAIN * (deficit - 1.0))
|
||||
work = unsharp_mask(image, amount=amount, sigma=1.2) if amount > 0.0 else image.copy()
|
||||
if _laplacian_variance(work) >= target:
|
||||
return work
|
||||
|
||||
# Calibrate the grain sigma by a short search: its lap-var contribution depends on
|
||||
# the per-pixel mask (no closed form), so step it up until the target is met. A few
|
||||
# full-image Laplacians here are negligible against the diffusion pass that precedes.
|
||||
mask = _smooth_grain_mask(work)
|
||||
noise = np.random.default_rng(seed).normal(0.0, 1.0, work.shape[:2]).astype(np.float32) * mask
|
||||
best = work
|
||||
sigma = 2.0
|
||||
while sigma <= _ADAPTIVE_MAX_GRAIN:
|
||||
best = np.clip(work.astype(np.float32) + (noise * sigma)[:, :, np.newaxis], 0.0, 255.0).astype(np.uint8)
|
||||
if _laplacian_variance(best) >= target:
|
||||
break
|
||||
sigma += 1.0
|
||||
return best
|
||||
|
||||
@@ -141,6 +141,7 @@ class InvisibleEngine:
|
||||
restore_faces: bool = False,
|
||||
restore_faces_weight: float = 0.5,
|
||||
unsharp: float = 0.0,
|
||||
adaptive_polish: bool = False,
|
||||
) -> Path:
|
||||
"""Remove invisible watermark from an image.
|
||||
|
||||
@@ -163,6 +164,12 @@ class InvisibleEngine:
|
||||
Applied last (after face restoration) to counter the soft,
|
||||
over-smoothed look of the diffusion/GFPGAN passes; ~0.5-0.8 is a
|
||||
safe range, higher risks edge halos.
|
||||
adaptive_polish: When True (the --auto mode default), restore the input's
|
||||
detail level in the softened output instead of fixed unsharp/humanize:
|
||||
a capped unsharp + edge-masked grain targeting the input's Laplacian
|
||||
variance (self-limiting on text/graphics). Runs LAST, after face
|
||||
restoration. The fixed ``humanize``/``unsharp`` knobs are normally 0
|
||||
when this is on.
|
||||
max_resolution: Cap the long side (px) before diffusion. 0 (default)
|
||||
= no cap. Set a positive value only to bound GPU/MPS memory on
|
||||
very large inputs (it reintroduces a lossy downscale->upscale
|
||||
@@ -189,6 +196,9 @@ class InvisibleEngine:
|
||||
image = Image.open(image_path)
|
||||
image = ImageOps.exif_transpose(image)
|
||||
orig_size = image.size # (width, height)
|
||||
# Full-res original, kept for the adaptive-polish detail target (image is
|
||||
# reassigned to the resized copy below; PIL resize returns a new object).
|
||||
reference_pil = image
|
||||
|
||||
target = _target_size(image.width, image.height, max_resolution, min_resolution)
|
||||
if target is not None:
|
||||
@@ -287,6 +297,23 @@ class InvisibleEngine:
|
||||
self._progress_callback(f"Sharpening (unsharp mask: {unsharp})...")
|
||||
image_io.imwrite(out_path, unsharp_mask(out_cv, amount=unsharp))
|
||||
|
||||
# Adaptive polish (--auto): restore the input's detail level in the softened
|
||||
# output, sparing text/edges. Replaces the fixed unsharp/humanize knobs.
|
||||
if adaptive_polish:
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from remove_ai_watermarks import humanizer, image_io
|
||||
|
||||
out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
|
||||
if out_cv is not None:
|
||||
ref = cv2.cvtColor(np.array(reference_pil.convert("RGB")), cv2.COLOR_RGB2BGR)
|
||||
if (ref.shape[1], ref.shape[0]) != (out_cv.shape[1], out_cv.shape[0]):
|
||||
ref = cv2.resize(ref, (out_cv.shape[1], out_cv.shape[0]), interpolation=cv2.INTER_LANCZOS4)
|
||||
if self._progress_callback:
|
||||
self._progress_callback("Adaptive polish (sharpen + grain to the input's detail level)...")
|
||||
image_io.imwrite(out_path, humanizer.adaptive_polish(out_cv, ref, seed=seed))
|
||||
|
||||
return out_path
|
||||
finally:
|
||||
# _tmp_path is always set above (we persist the image unconditionally).
|
||||
|
||||
Reference in New Issue
Block a user