mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-05 02:28:00 +02:00
e572767555
Visible-watermark work across all three corner-mark engines plus a committed,
reproducible alpha-build pipeline (scripts/visible_alpha_solve.py) fed by committed
solid black/gray/white captures.
- jimeng: new "即梦AI" wordmark remover (reverse-alpha + thin residual inpaint,
always NCC-aligned -- the mark re-rasterizes/jitters per image). Detect via glyph
silhouette NCC (0.45 threshold; does not cross-fire with Doubao). Registered in the
visible-mark catalog; `visible --mark jimeng` / `--mark auto`.
- doubao: fix a real production defect -- the shipped remover left a READABLE
"豆包AI生成" outline on real samples while detect() returned conf 0.0 (fooled by a
thin outline), so the test passed and the "56/56 clean" claim was detector-measured,
not visual. Root cause: under-estimated alpha + fixed-geometry-no-inpaint + tight
locate box. Rebuilt alpha (careful gray-self solve), always-align, thin inpaint,
widened locate box -> readable outline becomes faint texture-level traces.
- gemini: rebuild gemini_bg_{96,48} from our own controlled captures (validated NCC
0.9998 vs the prior third-party asset); removal re-verified clean, no behaviour change.
- tests: add textured-shift regression to both engines (guards the align-on-shift path
the Doubao defect exposed; lesson: a detector-only removal test is insufficient,
assert visual residual).
- docs: CLAUDE.md, README, capture READMEs and docstrings synced; stale
"exact/pixel-exact/56-clean" claims removed.
Also includes a SynthID label-wording clarification in identify.py/cli.py
("SynthID pixel watermark" -> "SynthID watermark, inferred from C2PA metadata").
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
248 lines
11 KiB
Python
248 lines
11 KiB
Python
"""Rebuild the visible-watermark alpha assets from controlled captures.
|
|
|
|
The committed, reproducible build of the bundled visible-mark assets -- the inputs
|
|
live in ``data/<engine>_capture/captures/`` (committed solid-colour captures run
|
|
through the generator). Re-run after re-capturing.
|
|
|
|
**Doubao "豆包AI生成" strip and Jimeng "★ 即梦AI" wordmark** are fixed
|
|
semi-transparent white overlays; the asset is their recovered per-pixel alpha map
|
|
(``assets/<engine>_alpha.png``). The "careful" solve (issue #13) -- a naive build
|
|
(max-over-channels, coarse background, blur, truncated halo, or a black-dominated
|
|
least-squares fit) leaves a visible outline because the alpha is wrong at the glyph
|
|
edges:
|
|
|
|
1. Locate the mark on the BLACK capture (bright pixels in the bottom-right).
|
|
2. Fit a smooth CUBIC background per channel over the GRAY capture's non-glyph
|
|
pixels (a cubic captures the gentle gradient without bleeding glyph values).
|
|
3. Solve ``a = (I - B) / (255 - B)`` on the gray capture, AVERAGED over channels,
|
|
at FULL halo extent (down to a~0.02) and UNBLURRED. Gray (background ~130-200)
|
|
is the reference because the mark sits on bright photo content in real use, not
|
|
on black; the white capture only confirms the logo is white.
|
|
|
|
**Gemini sparkle** is a different type: a single icon stamped on PURE BLACK, so the
|
|
engine reads ``alpha = max(R,G,B)/255`` directly (no background fit). Its assets are
|
|
the sparkle-on-black capture cropped to two fixed logo sizes (``gemini_bg_{96,48}.png``).
|
|
|
|
Usage::
|
|
|
|
uv run python scripts/visible_alpha_solve.py doubao
|
|
uv run python scripts/visible_alpha_solve.py jimeng
|
|
uv run python scripts/visible_alpha_solve.py gemini
|
|
uv run python scripts/visible_alpha_solve.py all
|
|
"""
|
|
|
|
# cv2/numpy boundary: third-party libs ship no usable element types; relax the
|
|
# unknown-type rules for this file only (mirrors the engine modules).
|
|
# pyright: reportUnknownMemberType=false, reportUnknownArgumentType=false, reportUnknownVariableType=false, reportUnknownParameterType=false, reportMissingTypeArgument=false, reportMissingTypeStubs=false, reportMissingImports=false, reportArgumentType=false, reportAssignmentType=false, reportReturnType=false, reportCallIssue=false, reportIndexIssue=false, reportOperatorIssue=false
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
import click
|
|
import cv2
|
|
import numpy as np
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from remove_ai_watermarks import image_io
|
|
|
|
if TYPE_CHECKING:
|
|
from numpy.typing import NDArray
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EngineSpec:
|
|
"""Per-engine capture inputs and the alpha asset to rebuild."""
|
|
|
|
name: str
|
|
capture_dir: Path
|
|
black: str
|
|
gray: str
|
|
asset: Path
|
|
native_width: int = 2048
|
|
|
|
|
|
_SPECS: dict[str, EngineSpec] = {
|
|
"doubao": EngineSpec(
|
|
"doubao",
|
|
_ROOT / "data" / "doubao_capture" / "captures",
|
|
"doubao_black_1x1_1.png",
|
|
"doubao_gray_1x1_1.png",
|
|
_ROOT / "src" / "remove_ai_watermarks" / "assets" / "doubao_alpha.png",
|
|
),
|
|
"jimeng": EngineSpec(
|
|
"jimeng",
|
|
_ROOT / "data" / "jimeng_capture" / "captures",
|
|
"jimeng_cap_A.png", # black seed
|
|
"jimeng_cap_C.png", # gray seed
|
|
_ROOT / "src" / "remove_ai_watermarks" / "assets" / "jimeng_alpha.png",
|
|
),
|
|
}
|
|
|
|
_CUBIC_BG_PAD = 30 # px of background margin around the mark for the cubic fit
|
|
_GLYPH_BODY = 0.08 # alpha above this is the solid glyph body (for the bbox)
|
|
_MIN_PART_AREA = 25 # drop connected glyph-mask blobs smaller than this (cubic-fit specks)
|
|
_HALO_PAD = 7 # keep this many px of halo around the glyph body in the saved asset
|
|
|
|
# Gemini is a different watermark TYPE: a single sparkle icon stamped on a
|
|
# PURE-BLACK background (so the engine reads alpha = max(R,G,B)/255 directly, no
|
|
# background fit). Its assets are the sparkle-on-black CAPTURE at two fixed logo
|
|
# sizes (the engine interpolates between them), not an alpha map.
|
|
_GEMINI_CAPTURE = _ROOT / "data" / "gemini_capture" / "captures" / "gemini_black_2048.png"
|
|
_GEMINI_ASSETS: dict[int, Path] = {
|
|
96: _ROOT / "src" / "remove_ai_watermarks" / "assets" / "gemini_bg_96.png",
|
|
48: _ROOT / "src" / "remove_ai_watermarks" / "assets" / "gemini_bg_48.png",
|
|
}
|
|
|
|
|
|
def _union_bbox(mask: NDArray[np.uint8], err: str) -> tuple[int, int, int, int]:
|
|
"""Union bbox ``(x0, x1, y0, y1)`` of ``mask``'s connected components with area
|
|
>= ``_MIN_PART_AREA``. The mark is several separate glyphs, so the union spans
|
|
the whole word while a stray small speck/blotch is dropped by the area filter.
|
|
Raises ``ValueError(err)`` if nothing qualifies."""
|
|
n, _labels, stats, _c = cv2.connectedComponentsWithStats(mask, connectivity=8)
|
|
parts = [i for i in range(1, n) if stats[i, cv2.CC_STAT_AREA] >= _MIN_PART_AREA]
|
|
if not parts:
|
|
raise ValueError(err)
|
|
x0 = min(int(stats[i, cv2.CC_STAT_LEFT]) for i in parts)
|
|
y0 = min(int(stats[i, cv2.CC_STAT_TOP]) for i in parts)
|
|
x1 = max(int(stats[i, cv2.CC_STAT_LEFT] + stats[i, cv2.CC_STAT_WIDTH]) for i in parts)
|
|
y1 = max(int(stats[i, cv2.CC_STAT_TOP] + stats[i, cv2.CC_STAT_HEIGHT]) for i in parts)
|
|
return x0, x1, y0, y1
|
|
|
|
|
|
def _locate_on_black(black: NDArray[np.float32]) -> tuple[int, int, int, int]:
|
|
"""Bounding box of the white mark on the black capture (bottom-right).
|
|
|
|
Thresholds well above the blotchy near-black background, then unions the
|
|
sufficiently-large bright components so the box spans the whole word.
|
|
"""
|
|
h, w = black.shape[:2]
|
|
lum = black.mean(axis=2)
|
|
br = lum > 40 # comfortably above the ~5-30 background blotches
|
|
br[: h * 3 // 4, :] = False # bottom quarter only
|
|
br[:, : w * 3 // 4] = False # right quarter only
|
|
bright = cv2.morphologyEx(br.astype(np.uint8) * 255, cv2.MORPH_CLOSE, np.ones((9, 9), np.uint8))
|
|
return _union_bbox(bright, "no mark found on the black capture (bottom-right is empty)")
|
|
|
|
|
|
def _cubic_background(crop: NDArray[np.float32], glyph: NDArray[np.bool_]) -> NDArray[np.float32]:
|
|
"""Per-channel cubic surface fit over the non-glyph pixels of ``crop``."""
|
|
h, w = crop.shape[:2]
|
|
yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
|
|
yy /= h
|
|
xx /= w
|
|
terms = [np.ones_like(xx), xx, yy, xx * xx, xx * yy, yy * yy, xx**3, xx * xx * yy, xx * yy * yy, yy**3]
|
|
basis = np.stack(terms, axis=-1).reshape(-1, len(terms))
|
|
keep = (~glyph).reshape(-1)
|
|
out = np.zeros_like(crop)
|
|
for ch in range(3):
|
|
values = crop[..., ch].reshape(-1)
|
|
coef, *_ = np.linalg.lstsq(basis[keep], values[keep], rcond=None)
|
|
out[..., ch] = (basis @ coef).reshape(h, w)
|
|
return out
|
|
|
|
|
|
def solve_alpha(spec: EngineSpec) -> NDArray[np.uint8]:
|
|
"""Solve the careful gray-self alpha map for one engine (uint8, a*255)."""
|
|
black = image_io.imread(str(spec.capture_dir / spec.black), cv2.IMREAD_COLOR)
|
|
gray = image_io.imread(str(spec.capture_dir / spec.gray), cv2.IMREAD_COLOR)
|
|
if black is None or gray is None:
|
|
raise FileNotFoundError(f"missing captures in {spec.capture_dir} (expected {spec.black}, {spec.gray})")
|
|
black_f = black.astype(np.float32)
|
|
gray_f = gray.astype(np.float32)
|
|
|
|
img_h, img_w = black_f.shape[:2]
|
|
mx0, mx1, my0, my1 = _locate_on_black(black_f)
|
|
pad = _CUBIC_BG_PAD
|
|
rx0, rx1 = max(0, mx0 - pad), min(img_w, mx1 + pad)
|
|
ry0, ry1 = max(0, my0 - pad), min(img_h, my1 + pad)
|
|
cg = gray_f[ry0:ry1, rx0:rx1]
|
|
cb = black_f[ry0:ry1, rx0:rx1]
|
|
|
|
glyph = cv2.dilate((cb.mean(axis=2) > 8).astype(np.uint8), np.ones((9, 9), np.uint8)) > 0
|
|
bg = _cubic_background(cg, glyph)
|
|
alpha = np.clip((cg - bg).mean(axis=2) / np.clip(255.0 - bg.mean(axis=2), 1e-3, None), 0.0, 1.0)
|
|
|
|
# Crop to the UNION of the glyph parts (the mark is several disconnected
|
|
# glyphs), padded by _HALO_PAD -- this keeps the real anti-aliased halo while
|
|
# dropping the small cubic-fit specks at the crop edges (< _MIN_PART_AREA) that
|
|
# a bare a>floor box would otherwise inflate the asset with.
|
|
body = (alpha > _GLYPH_BODY).astype(np.uint8)
|
|
bx, bex, by, bey = _union_bbox(body, "solved alpha has no glyph body -- check the gray capture background")
|
|
cx0 = max(0, bx - _HALO_PAD)
|
|
cy0 = max(0, by - _HALO_PAD)
|
|
cx1 = min(alpha.shape[1], bex + _HALO_PAD)
|
|
cy1 = min(alpha.shape[0], bey + _HALO_PAD)
|
|
tight = alpha[cy0:cy1, cx0:cx1]
|
|
aw, ah = tight.shape[1], tight.shape[0]
|
|
# Absolute asset position in the capture, for the engine's geometry constants.
|
|
abs_x0, abs_y0 = rx0 + cx0, ry0 + cy0
|
|
log.info(
|
|
"%s: alpha %dx%d max %.3f | WIDTH_FRAC %.4f HEIGHT_FRAC %.4f "
|
|
"MARGIN_RIGHT_FRAC %.4f MARGIN_BOTTOM_FRAC %.4f (native_width %d)",
|
|
spec.name,
|
|
aw,
|
|
ah,
|
|
float(tight.max()),
|
|
aw / spec.native_width,
|
|
ah / spec.native_width,
|
|
(img_w - (abs_x0 + aw)) / spec.native_width,
|
|
(img_h - (abs_y0 + ah)) / spec.native_width,
|
|
spec.native_width,
|
|
)
|
|
return (np.clip(tight, 0.0, 1.0) * 255.0).astype(np.uint8)
|
|
|
|
|
|
def solve_gemini() -> dict[int, NDArray[np.uint8]]:
|
|
"""Extract the Gemini sparkle-on-black region from the black capture at each
|
|
bundled logo size (the bg-capture asset format; the engine derives the alpha).
|
|
Returns ``{size: bgr_image}``."""
|
|
black = image_io.imread(str(_GEMINI_CAPTURE), cv2.IMREAD_COLOR)
|
|
if black is None:
|
|
raise FileNotFoundError(f"missing Gemini capture {_GEMINI_CAPTURE}")
|
|
h, w = black.shape[:2]
|
|
bright = np.zeros((h, w), np.uint8)
|
|
reg = black.astype(np.float32).mean(axis=2) > 60 # sparkle is ~0.5*255 on black
|
|
reg[: h * 3 // 4, :] = False
|
|
reg[:, : w * 3 // 4] = False
|
|
bright[reg] = 255
|
|
bright = cv2.morphologyEx(bright, cv2.MORPH_CLOSE, np.ones((7, 7), np.uint8))
|
|
x0, x1, y0, y1 = _union_bbox(bright, "no sparkle found on the Gemini black capture")
|
|
crop = black[y0:y1, x0:x1]
|
|
log.info("gemini: sparkle %dx%d at margin_frac %.4f", x1 - x0, y1 - y0, (w - x1) / w)
|
|
return {size: cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA) for size in _GEMINI_ASSETS}
|
|
|
|
|
|
@click.command()
|
|
@click.argument("engine", type=click.Choice([*_SPECS, "gemini", "all"]))
|
|
def main(engine: str) -> None:
|
|
"""Rebuild the alpha asset(s) for ENGINE (doubao / jimeng / gemini / all)."""
|
|
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
|
|
def _write(path: Path, img: NDArray[np.uint8], label: str) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
if not image_io.imwrite(str(path), img):
|
|
raise OSError(f"failed to write {path}")
|
|
log.info("%s: wrote %s", label, path.relative_to(_ROOT))
|
|
|
|
if engine in ("doubao", "jimeng", "all"):
|
|
specs = list(_SPECS.values()) if engine == "all" else [_SPECS[engine]]
|
|
for spec in specs:
|
|
_write(spec.asset, solve_alpha(spec), spec.name)
|
|
if engine in ("gemini", "all"):
|
|
for size, img in solve_gemini().items():
|
|
_write(_GEMINI_ASSETS[size], img, f"gemini-{size}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|