mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-10 20:57:47 +02:00
2fcd00ced0
Nine findings from a high-effort project-wide review, fixed and verified (571 passed, ruff/pyright clean): Correctness: - all/batch now remove Doubao/Jimeng/Samsung visible text marks: the visible step routes through the registry (new cli._remove_visible_auto) instead of a hardcoded GeminiEngine, so they no longer leave the wordmark intact. - batch always reads the original source (dropped the out_path-reuse that re-processed already-cleaned outputs on a re-run). - img2img_runner only retries the diffusion call on the deprecated-callback TypeError; any other TypeError now propagates instead of double-running. - gemini detect/remove and the reverse-alpha engines normalize channels via a new image_io.to_bgr, fixing a grayscale/BGRA crash in the FP-gate path. - _png_late_metadata advances its cursor by the clamped length, so a malformed chunk length no longer aborts the late AI-label scan. Cleanup / efficiency: - Consolidate the ~90%-identical Doubao/Jimeng/Samsung engines into a shared config-driven _text_mark_engine.TextMarkEngine base; each engine is now a thin subclass (TextMarkConfig + test shims). Behavior is byte-exact (the three engine test suites pass unchanged). Registry adapters collapse to one _text_mark(...) row each. Gemini stays a separate engine. - scan_head is memoized per (path, size, mtime), so identify() reads the file head once instead of ~8 times. - invisible_engine post-processing decodes/encodes the output once (chained in memory) instead of 2-4 times across stages. - Remove the orphaned get_model_id_for_profile (+ CONTROLNET_PROFILE); derive the --strength help from the strength constants (strength_default_help) so it cannot drift; share the --pipeline/--strength click options; simplify the retired --auto resolver. Net -835 lines. Tests added for the registry-routed visible pass, to_bgr, the polish/model/guidance wiring, and strength_default_help. CLAUDE.md updated for the new base module, the engine/registry changes, image_io.to_bgr, and the scan_head cache. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
110 lines
4.2 KiB
Python
110 lines
4.2 KiB
Python
"""Jimeng / Dreamina visible watermark removal engine.
|
|
|
|
Jimeng (即梦AI, ByteDance) stamps generated images with a visible "★ 即梦AI" wordmark
|
|
in the bottom-right corner -- a near-white semi-transparent overlay, the same overlay
|
|
class as the Doubao text strip.
|
|
|
|
Removal is **reverse-alpha blending** against a captured alpha map
|
|
(``original = (wm - a*logo)/(1-a)``), always NCC-aligned to the actual mark plus a thin
|
|
residual inpaint over the glyph footprint. This is one of the three text-mark engines
|
|
that share :class:`remove_ai_watermarks._text_mark_engine.TextMarkEngine`; this module
|
|
supplies only Jimeng's tuned :class:`TextMarkConfig` (bottom-right corner,
|
|
``assets/jimeng_alpha.png`` rebuilt by ``scripts/visible_alpha_solve.py`` from the gray
|
|
capture). Jimeng images are also caught by the China TC260 AIGC metadata label, so this
|
|
is the visible-mark *removal* path, not a new ``identify`` signal.
|
|
"""
|
|
# The module-level _alpha_template / _glyph_silhouette / _template_match_score below
|
|
# are thin test-facing shims (imported by tests/), so pyright's src-only pass sees them
|
|
# as unused; the use is cross-module.
|
|
# pyright: reportUnusedFunction=false
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
from remove_ai_watermarks import _text_mark_engine
|
|
from remove_ai_watermarks._text_mark_engine import TextMarkConfig, TextMarkDetection, TextMarkEngine
|
|
|
|
if TYPE_CHECKING:
|
|
from numpy.typing import NDArray
|
|
|
|
# Locate geometry as a fraction of image WIDTH (mark scales with width, bottom-right).
|
|
WM_WIDTH_FRAC = 0.27
|
|
WM_HEIGHT_FRAC = 0.092
|
|
MARGIN_RIGHT_FRAC = 0.008
|
|
MARGIN_BOTTOM_FRAC = 0.010
|
|
|
|
# Glyph appearance: a light, low-saturation gray brighter than the local background.
|
|
MAX_SATURATION = 55
|
|
LOGO_MIN_LUMA = 150
|
|
TOPHAT_DELTA = 12
|
|
|
|
# Shape-consistent detection. Threshold 0.45 cleanly separates real Jimeng marks
|
|
# (>=0.81) from the Doubao strip (0.21), so the two ByteDance marks do not cross-fire.
|
|
DETECT_MIN_COVERAGE = 0.02
|
|
DETECT_NCC_THRESHOLD = 0.45
|
|
|
|
# Reverse-alpha geometry, emitted by scripts/visible_alpha_solve.py from the gray
|
|
# capture at the captured width.
|
|
_ALPHA_NATIVE_WIDTH = 2048
|
|
_ALPHA_LOGO_BGR: tuple[float, float, float] = (255.0, 255.0, 255.0)
|
|
_ALPHA_WIDTH_FRAC = 0.2021 # asset width / image width -- the alignment scale seed
|
|
_ALPHA_HEIGHT_FRAC = 0.0576
|
|
_ALPHA_MARGIN_RIGHT_FRAC = 0.0288
|
|
_ALPHA_MARGIN_BOTTOM_FRAC = 0.0288
|
|
_ALPHA_ALIGN_SEARCH = (0.90, 1.12, 23)
|
|
_RESIDUAL_ALPHA_FLOOR = 0.05
|
|
_RESIDUAL_DILATE = 5
|
|
_RESIDUAL_INPAINT_RADIUS = 2
|
|
|
|
_CONFIG = TextMarkConfig(
|
|
name="Jimeng",
|
|
asset_name="jimeng_alpha.png",
|
|
corner="br",
|
|
margin_floor=4,
|
|
width_frac=WM_WIDTH_FRAC,
|
|
height_frac=WM_HEIGHT_FRAC,
|
|
margin_x_frac=MARGIN_RIGHT_FRAC,
|
|
margin_bottom_frac=MARGIN_BOTTOM_FRAC,
|
|
max_saturation=MAX_SATURATION,
|
|
logo_min_luma=LOGO_MIN_LUMA,
|
|
tophat_delta=TOPHAT_DELTA,
|
|
morph_open_size=5,
|
|
detect_min_coverage=DETECT_MIN_COVERAGE,
|
|
detect_ncc_threshold=DETECT_NCC_THRESHOLD,
|
|
alpha_width_frac=_ALPHA_WIDTH_FRAC,
|
|
alpha_height_frac=_ALPHA_HEIGHT_FRAC,
|
|
alpha_margin_x_frac=_ALPHA_MARGIN_RIGHT_FRAC,
|
|
alpha_margin_bottom_frac=_ALPHA_MARGIN_BOTTOM_FRAC,
|
|
alpha_align_search=_ALPHA_ALIGN_SEARCH,
|
|
min_gw=8,
|
|
alpha_logo_bgr=_ALPHA_LOGO_BGR,
|
|
residual_alpha_floor=_RESIDUAL_ALPHA_FLOOR,
|
|
residual_dilate=_RESIDUAL_DILATE,
|
|
residual_inpaint_radius=_RESIDUAL_INPAINT_RADIUS,
|
|
)
|
|
|
|
JimengDetection = TextMarkDetection
|
|
|
|
|
|
def _alpha_template() -> NDArray[Any] | None:
|
|
"""The bundled Jimeng alpha template (float [0,1]), or None."""
|
|
return _text_mark_engine.load_alpha_template(_CONFIG.asset_name)
|
|
|
|
|
|
def _glyph_silhouette() -> NDArray[Any] | None:
|
|
"""Binary "即梦AI" silhouette (255 = glyph) from the alpha map, or None."""
|
|
return _text_mark_engine.glyph_silhouette(_CONFIG.asset_name)
|
|
|
|
|
|
def _template_match_score(box_mask: NDArray[Any], image_width: int) -> float:
|
|
"""TM_CCOEFF_NORMED of the Jimeng glyph silhouette against ``box_mask``."""
|
|
return _text_mark_engine.template_match_score(box_mask, image_width, _CONFIG)
|
|
|
|
|
|
class JimengEngine(TextMarkEngine):
|
|
"""Remove the visible Jimeng "★ 即梦AI" watermark (locate -> mask -> reverse-alpha)."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__(_CONFIG)
|