From bc3228d38731241d5439e088cc802a3b96a9c448 Mon Sep 17 00:00:00 2001 From: test-user Date: Tue, 26 May 2026 21:31:51 -0700 Subject: [PATCH] feat(visible): Doubao text-mark removal + universal region eraser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add deterministic, CPU-only removal of the visible Doubao "豆包AI生成" mark and a position-agnostic region eraser for any other visible watermark/logo. - doubao_engine.py: locate (geometry, scales with width) + polarity-aware white-top-hat glyph mask + cv2 inpaint; coverage-gated detection and a dense-text safety guard. No GPU, ~30ms. - region_eraser.py + `erase` command: inpaint arbitrary --region box(es). Default cv2 backend (no deps); optional big-LaMa via onnxruntime (`lama` extra, Carve/LaMa-ONNX, model downloaded on first use, never bundled). - cli `visible --mark auto|gemini|doubao`: auto routes by detector confidence. - tests for both engines; seed previously-unseeded CLI image fixtures to stop the Doubao detector flaking on random corners. - .gitignore: doubao_capture/{seeds,captures} scratch (alpha-map calibration). Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 7 + README.md | 25 ++- data/doubao_capture/README.md | 78 +++++++ src/remove_ai_watermarks/cli.py | 181 +++++++++++++++- src/remove_ai_watermarks/doubao_engine.py | 245 ++++++++++++++++++++++ src/remove_ai_watermarks/region_eraser.py | 179 ++++++++++++++++ tests/test_cli.py | 10 +- tests/test_doubao_engine.py | 98 +++++++++ tests/test_region_eraser.py | 75 +++++++ 9 files changed, 887 insertions(+), 11 deletions(-) create mode 100644 data/doubao_capture/README.md create mode 100644 src/remove_ai_watermarks/doubao_engine.py create mode 100644 src/remove_ai_watermarks/region_eraser.py create mode 100644 tests/test_doubao_engine.py create mode 100644 tests/test_region_eraser.py diff --git a/.gitignore b/.gitignore index a277e61..7fdaba1 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,10 @@ yolov8n.pt # Claude Code local settings .claude/settings.local.json + +# Doubao watermark calibration (local only; ship only the derived alpha-map asset). +# Synthetic seeds + raw Doubao captures are regenerable and not committed. +# Non-ours reference artifacts go in any _refs/ dir (already ignored above): usable +# locally for bootstrap/validation, never redistributed in the repo. +data/doubao_capture/seeds/ +data/doubao_capture/captures/ diff --git a/README.md b/README.md index 155378d..cb2baf0 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ If this tool saves you time, consider [sponsoring its development](https://githu ## Features -- **Visible watermark removal** — Gemini / Nano Banana sparkle logo via reverse alpha blending (fast, offline, deterministic) +- **Visible watermark removal** — Gemini / Nano Banana sparkle logo (reverse alpha blending) and the Doubao "豆包AI生成" text strip (locate + mask + inpaint); fast, offline, deterministic, no GPU. `visible --mark auto` picks the right one +- **Universal region eraser (`erase`)** — remove any logo / watermark / object inside boxes you specify, regardless of position or colour. Default cv2 inpainting (CPU, instant); optional big-LaMa via onnxruntime (`lama` extra) for higher quality - **Invisible watermark removal** — SynthID, StableSignature, TreeRing via diffusion-based regeneration (needs a local GPU, or run it with no setup on [raiw.cc](https://raiw.cc)) - **AI metadata stripping** — EXIF, PNG text chunks, C2PA provenance manifests (PNG / JPEG / AVIF / HEIF / JPEG-XL, and **MP4 / MOV / M4V video** at the container level), XMP DigitalSourceType - **"Made with AI" label removal** — removes the metadata that triggers AI labels on Instagram, Facebook, X (Twitter) @@ -45,11 +46,11 @@ If this tool saves you time, consider [sponsoring its development](https://githu | **xAI Grok (Aurora)** | — | — | ✅ EXIF signature scheme (no C2PA): `Signature:` blob + UUID `Artist` | Detected (`identify`); metadata strip | | **Midjourney** | — | — | ✅ EXIF + XMP (prompt, model, seed) | Metadata strip | | **Meta AI** | — | — | ✅ IPTC "Made with AI" (digitalSourceType) | Metadata strip (removes the label) | -| **Doubao** (ByteDance) / China AIGC generators | — | — | ✅ TC260 `` XMP label (China's mandatory AI labeling) | Metadata strip | +| **Doubao** (ByteDance) / China AIGC generators | ✅ "豆包AI生成" text strip (bottom-right) | — | ✅ TC260 `` XMP label (China's mandatory AI labeling) | Locate + mask + inpaint (cv2, CPU) + metadata strip | | **StableSignature** (Meta) | — | ✅ In-model watermark | — | Diffusion regeneration | | **TreeRing** | — | ✅ Latent space watermark | — | Diffusion regeneration | -> Visible watermarks (logo overlays) are currently used only by Google Gemini / Nano Banana. Other services rely on invisible watermarks and/or metadata. Our diffusion-based regeneration works against any invisible watermark in pixel or frequency domain. +> Visible overlays are used by Google Gemini / Nano Banana (sparkle logo) and by Doubao / China AIGC generators (the mandated "...AI生成" corner text). Both are removed deterministically on CPU. Other services rely on invisible watermarks and/or metadata; our diffusion-based regeneration works against any invisible watermark in pixel or frequency domain. For a visible mark from any other source (any position, any colour), use the universal `erase --region` command. > **Detection:** `remove-ai-watermarks identify ` reports the origin platform and watermark inventory for all the signals above — C2PA issuer, the C2PA soft-binding forensic-watermark vendor (TrustMark / Digimarc / Imatag / ...), IPTC "Made with AI" plus the IPTC 2025.1 `AISystemUsed` field, the China TC260 AIGC label, embedded generation params, EXIF/XMP generator tags, the xAI/Grok EXIF signature, the SynthID metadata proxy, the visible sparkle, and (with the `[detect]` / `[trustmark]` extras) the open SD/SDXL/FLUX and Adobe TrustMark invisible watermarks. SynthID and the proprietary soft-binding watermarks (Digimarc etc.) have no local decoder, so they are reported by metadata proxy / vendor name only. @@ -73,6 +74,16 @@ A three-stage NCC (Normalized Cross-Correlation) detector finds the watermark po **Speed**: ~0.05s per image. No GPU needed. +### Removing the Doubao "豆包AI生成" text watermark + +Doubao (ByteDance) stamps every output with a light, semi-transparent "豆包AI生成" text strip in the bottom-right corner — the visible AIGC label mandated by China's TC260 standard. Unlike the fixed-size Gemini sparkle, it is a text strip that scales with image width, so we anchor a generous bottom-right box by geometry, extract the light low-saturation glyph pixels with a polarity-aware white top-hat mask, and inpaint them (cv2 Telea/NS). The mask is background-relative, so it leaves white-paper documents untouched instead of smearing their text. On dense-text backgrounds where the mask would explode, removal is skipped rather than guessed. + +**Speed**: ~0.03s per image. No GPU needed. Best on photo / illustration backgrounds; on high-contrast edges a faint residue can remain (use `erase --backend lama` for neural-quality fill). + +### Universal region eraser + +For any visible mark the dedicated engines do not cover — a logo anywhere, any colour — `erase --region x,y,w,h` inpaints the box you specify. The default `cv2` backend is instant and dependency-free; the optional `lama` backend (big-LaMa via onnxruntime, `lama` extra, ~200 MB model downloaded on first use) gives much cleaner fills on textured regions at the cost of ~3-4 GB RAM per call. + ### Removing SynthID and other invisible watermarks Google embeds **SynthID** into every image generated by Gemini / Nano Banana. Other AI services use StableSignature, TreeRing, and similar schemes. These imperceptible frequency-domain patterns survive cropping, resizing, and JPEG compression. @@ -221,9 +232,15 @@ remove-ai-watermarks batch ./images/ --mode all # of a clean origin. Add --json for machine-readable output. remove-ai-watermarks identify image.png -# Visible watermark only (Gemini / Nano Banana sparkle) — fast, offline +# Visible watermark only — fast, offline, CPU. --mark auto (default) picks +# between the Gemini sparkle and the Doubao "豆包AI生成" text strip; force one +# with --mark gemini / --mark doubao. remove-ai-watermarks visible image.png -o clean.png +# Erase arbitrary region(s) — universal, any logo/watermark/object, any position. +# Default cv2 inpainting (CPU). --backend lama uses big-LaMa (extra 'lama'). +remove-ai-watermarks erase image.png --region 1640,1930,400,100 -o clean.png + # Invisible watermark only (SynthID etc.) — requires GPU remove-ai-watermarks invisible image.png -o clean.png --humanize 4.0 # Runs at native resolution by default. On a very large image that OOMs the diff --git a/data/doubao_capture/README.md b/data/doubao_capture/README.md new file mode 100644 index 0000000..ec3a8d1 --- /dev/null +++ b/data/doubao_capture/README.md @@ -0,0 +1,78 @@ +# Doubao visible watermark capture + +Goal: capture the Doubao "豆包AI生成" visible watermark over known flat backgrounds so we can +build a per-pixel alpha map and a reverse-alpha-blend remover, the same way the Gemini sparkle +engine works (`src/remove_ai_watermarks/gemini_engine.py`). + +## What we already know (verified from prior art, 2026-05-26) + +- Blend model: **alpha compositing with a white logo** `watermarked = a*logo + (1-a)*original`, + `logo = (255,255,255)`. Inversion: `original = (watermarked - a*logo) / (1-a)`. + Confirmed by two independent sources (an open-source remover's algorithm doc + aiwatermarkremover.dev, + both say "alpha map"). One commercial blog (pixelcleanai) claims "screen blend" instead; the gray + capture below settles it empirically. +- Position: **bottom-right corner**, small margins (right ~8-20px, bottom ~5px), scales with image size. + Confirmed by our sample `data/samples/doubao-1.png` (2048x2048) plus three sources. +- Size **scales with resolution**. Third-party numbers (~90x18 at <=1024, ~180x40 at >1024) are + approximate and calibrated for ~1024-1280 outputs; at 2048 the strip is much larger. A shipped + third-party alpha map is only 120x20, too small for our 2K/4K target -> capture fresh. +- In practice clean inversion leaves residue on textured backgrounds, so the remover pairs the alpha + map with inpainting (our Gemini engine already does gradient-masked inpainting for residual edges). + +## Use doubao.com specifically + +The "豆包AI生成" mark is Doubao's. Jimeng / Dreamina use a different mark. Generate on doubao.com so +the captured template matches our target. + +## How to capture (image-edit path, most reliable) + +For each seed in `seeds/`: + +1. Open Doubao image generation, use the image-edit / reference mode, upload the seed. +2. Prompt (Chinese preferred): + `请完全按照原图重新生成这张图片,保持完全一致,不要添加或修改任何内容` + (English: `Recreate this image exactly as it is, keep it identical, do not add or change anything`) +3. Download the ORIGINAL output file (not a screenshot). Do not crop / edit / re-save. + +Prior art confirms uploading a pure-black image and letting Doubao stamp it works. + +If edit mode is unavailable and text-to-image refuses a solid color, fall back to generating 10-12 +normal-content images at one fixed resolution; the mark is the only constant across them and can be +extracted by per-pixel min/median. + +## What to capture (priority top to bottom) + +| Aspect | black | white | gray128 | why | +|--------|-------|-------|---------|-----| +| 1:1 | 3 | 1 | 1 | primary alpha map + confirm the stamp is pixel-identical across runs + settle blend mode | +| 16:9 | 2 | 1 | 1 | anchor rule in landscape | +| 9:16 | 2 | 1 | 1 | anchor rule in portrait | +| 4:3, 3:4 | 1 each | - | - | optional, refines anchor rule | + +- 3 blacks on 1:1: if the first two are byte-identical in the watermark region, the third is optional. +- gray128 is the blend-mode test: predict the gray result from the black capture under alpha vs screen; + whichever matches the real gray output is the true blend. +- If the UI offers multiple output resolutions (1K / 2K / 4K), capture one black per resolution on 1:1 - + needed to learn how the watermark scales. +- Also grab 3-5 normal-content images on 1:1 for end-to-end removal validation. + +## Hygiene + +- Original download, never a screenshot. PNG preferred; if Doubao only gives JPEG, note it. +- No crop / edit / re-save. Default settings, watermark left ON. + +## Naming, drop into `captures/` + +``` +doubao_black_1x1_1.png +doubao_white_1x1_1.png +doubao_gray128_1x1_1.png +doubao_black_16x9_1.png +doubao_content_1x1_1.png +``` + +## Also report back + +1. Which resolutions and aspect ratios the Doubao UI actually offers. +2. Whether there is a watermark on/off toggle in the UI. +3. Download format (PNG or JPEG). diff --git a/src/remove_ai_watermarks/cli.py b/src/remove_ai_watermarks/cli.py index 21e70a5..e4506dd 100644 --- a/src/remove_ai_watermarks/cli.py +++ b/src/remove_ai_watermarks/cli.py @@ -12,7 +12,7 @@ import json import logging import time from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import click from rich.console import Console @@ -25,7 +25,7 @@ from remove_ai_watermarks import __version__ if TYPE_CHECKING: import numpy as np - from remove_ai_watermarks.gemini_engine import DetectionResult + from remove_ai_watermarks.gemini_engine import DetectionResult, GeminiEngine console = Console() @@ -130,6 +130,72 @@ def _write_bgr_with_alpha( cv2.imwrite(str(path), bgra) +def _run_doubao_if_selected( + ctx: click.Context, + image: np.ndarray, + alpha: np.ndarray | None, + output: Path, + mark: str, + gemini_engine: GeminiEngine, + detect: bool, + detect_threshold: float, + inpaint_method: str, + strip_metadata: bool, +) -> bool: + """Run the Doubao text-strip removal path when it is the selected mark. + + Returns True when this path handled the image (caller should stop). In + ``auto`` mode the Doubao detector competes with the Gemini detector and wins + only when it is both positive and at least as confident. + """ + from remove_ai_watermarks.doubao_engine import DoubaoEngine + + doubao = DoubaoEngine() + d_det = doubao.detect(image) + + if mark == "auto": + g_det = gemini_engine.detect_watermark(image) + use_doubao = d_det.detected and d_det.confidence >= g_det.confidence + console.print( + f" [dim]Mark auto:[/] gemini={g_det.confidence:.2f} doubao={d_det.confidence:.2f} " + f"-> {'doubao' if use_doubao else 'gemini'}" + ) + else: + use_doubao = mark == "doubao" + + if not use_doubao: + return False + + if detect and not d_det.detected and d_det.confidence < detect_threshold: + console.print( + f" [yellow]⚠[/] Doubao mark not detected [dim](coverage {d_det.coverage:.1%}). " + f"Use --no-detect to force.[/]" + ) + raise SystemExit(0) + + method: Literal["telea", "ns"] = "ns" if inpaint_method == "ns" else "telea" + t0 = time.monotonic() + with console.status("[cyan]Removing Doubao watermark…[/]"): + result = doubao.remove_watermark(image, inpaint_method=method) + elapsed = time.monotonic() - t0 + + output.parent.mkdir(parents=True, exist_ok=True) + _write_bgr_with_alpha(output, result, alpha, clear_region=d_det.region) + + if strip_metadata: + try: + from remove_ai_watermarks.metadata import remove_ai_metadata + + remove_ai_metadata(output, output) + except Exception as e: + if ctx.obj.get("verbose"): + console.print(f" [yellow]⚠[/] Failed to strip metadata: {e}") + + size_kb = output.stat().st_size / 1024 + console.print(f" [green]✓[/] Doubao mark removed → {output} [dim]({size_kb:.0f} KB, {elapsed:.2f}s)[/]") + return True + + # ── Main group ─────────────────────────────────────────────────────── @@ -167,6 +233,12 @@ def main(ctx: click.Context, verbose: bool) -> None: @click.option("--inpaint-strength", type=float, default=0.85, help="Inpainting blend strength (0.0-1.0).") @click.option("--detect/--no-detect", default=True, help="Detect watermark before removal.") @click.option("--detect-threshold", type=float, default=0.25, help="Detection confidence threshold.") +@click.option( + "--mark", + type=click.Choice(["auto", "gemini", "doubao"]), + default="auto", + help="Which visible mark to target. auto picks the stronger of the two detectors.", +) @click.option("--strip-metadata/--keep-metadata", default=True, help="Strip AI metadata from output.") @click.pass_context def cmd_visible( @@ -178,11 +250,14 @@ def cmd_visible( inpaint_strength: float, detect: bool, detect_threshold: float, + mark: str, strip_metadata: bool, ) -> None: - """Remove visible Gemini watermark (sparkle logo) from an image. + """Remove a visible AI watermark from an image. - Uses reverse alpha blending — fast, deterministic, offline. + Targets the Gemini sparkle logo (reverse alpha blending) or the Doubao + "豆包AI生成" text strip (locate -> mask -> inpaint). Fast, deterministic, + offline. ``--mark auto`` picks whichever detector fires stronger. """ from remove_ai_watermarks.gemini_engine import GeminiEngine @@ -203,6 +278,12 @@ def cmd_visible( h, w = image.shape[:2] console.print(f" [dim]Input:[/] {source.name} ({w}x{h})") + # Resolve which visible mark to target, then run the Doubao path if chosen. + if _run_doubao_if_selected( + ctx, image, alpha, output, mark, engine, detect, detect_threshold, inpaint_method, strip_metadata + ): + return + # Detection (we always detect softly, to find dynamic region for inpainting) with console.status("[cyan]Detecting watermark…[/]"): det = engine.detect_watermark(image) @@ -256,6 +337,98 @@ def cmd_visible( console.print(f" [green]✓[/] Saved: {output} [dim]({size_kb:.0f} KB, {elapsed:.2f}s)[/]") +# ── Universal region eraser ───────────────────────────────────────── + + +def _parse_region(spec: str) -> tuple[int, int, int, int]: + """Parse an ``x,y,w,h`` region string into a 4-int tuple.""" + parts = spec.replace(" ", "").split(",") + if len(parts) != 4: + raise click.BadParameter(f"region must be 'x,y,w,h', got: {spec!r}") + try: + x, y, w, h = (int(p) for p in parts) + except ValueError as e: + raise click.BadParameter(f"region values must be integers: {spec!r}") from e + if w <= 0 or h <= 0: + raise click.BadParameter(f"region width/height must be positive: {spec!r}") + return x, y, w, h + + +@main.command("erase") +@click.argument("source", type=click.Path(exists=True, path_type=Path)) +@click.option("--region", "regions", multiple=True, required=True, help="x,y,w,h box to erase (repeatable).") +@click.option( + "-o", "--output", type=click.Path(path_type=Path), default=None, help="Output path (default: _clean.)." +) +@click.option( + "--backend", + type=click.Choice(["cv2", "lama"]), + default="cv2", + help="Inpaint backend. cv2: instant, no deps. lama: onnxruntime big-LaMa, better quality (extra 'lama').", +) +@click.option("--inpaint-method", type=click.Choice(["telea", "ns"]), default="telea", help="cv2 inpaint method.") +@click.option("--dilate", type=int, default=3, help="Grow the box by this many px before inpainting.") +@click.option("--strip-metadata/--keep-metadata", default=True, help="Strip AI metadata from output.") +@click.pass_context +def cmd_erase( + ctx: click.Context, + source: Path, + regions: tuple[str, ...], + output: Path | None, + backend: str, + inpaint_method: str, + dilate: int, + strip_metadata: bool, +) -> None: + """Erase arbitrary region(s) from an image via inpainting. + + Universal and position-agnostic: removes any logo / watermark / object inside + the boxes you pass, regardless of colour or location. Runs on CPU. Use this + for marks the dedicated ``visible`` engines (Gemini, Doubao) do not cover. + """ + from remove_ai_watermarks.region_eraser import erase + + _banner() + source = _validate_image(source) + if output is None: + output = source.with_stem(source.stem + "_clean") + + boxes = [_parse_region(r) for r in regions] + + image, alpha = _read_bgr_and_alpha(source) + if image is None: + console.print(f"[red]Error:[/] Failed to read image: {source}") + raise SystemExit(1) + h, w = image.shape[:2] + console.print(f" [dim]Input:[/] {source.name} ({w}x{h}) [dim]{len(boxes)} region(s), backend={backend}[/]") + + t0 = time.monotonic() + method: Literal["telea", "ns"] = "ns" if inpaint_method == "ns" else "telea" + try: + with console.status(f"[cyan]Erasing ({backend})…[/]"): + result = erase(image, boxes=boxes, backend=backend, dilate=dilate, cv2_method=method) + except RuntimeError as e: + console.print(f" [red]Error:[/] {e}") + raise SystemExit(1) from e + elapsed = time.monotonic() - t0 + + output.parent.mkdir(parents=True, exist_ok=True) + clear = boxes[0] if len(boxes) == 1 else None + _write_bgr_with_alpha(output, result, alpha, clear_region=clear) + + if strip_metadata: + try: + from remove_ai_watermarks.metadata import remove_ai_metadata + + remove_ai_metadata(output, output) + except Exception as e: + if ctx.obj.get("verbose"): + console.print(f" [yellow]⚠[/] Failed to strip metadata: {e}") + + size_kb = output.stat().st_size / 1024 + console.print(f" [green]✓[/] Erased {len(boxes)} region(s) → {output} [dim]({size_kb:.0f} KB, {elapsed:.2f}s)[/]") + + # ── Invisible watermark removal ───────────────────────────────────── diff --git a/src/remove_ai_watermarks/doubao_engine.py b/src/remove_ai_watermarks/doubao_engine.py new file mode 100644 index 0000000..19a27a7 --- /dev/null +++ b/src/remove_ai_watermarks/doubao_engine.py @@ -0,0 +1,245 @@ +"""Doubao visible watermark removal engine. + +Doubao (ByteDance) stamps every generated image with a visible "豆包AI生成" +(Doubao AI generated) text strip in the bottom-right corner. This is the +explicit AIGC label mandated by China's TC260 standard, rendered as a +near-white / light-gray, low-saturation text overlay. + +Unlike the Gemini sparkle (a fixed square logo removed by reverse alpha +blending against a captured alpha map), the Doubao mark is a text strip whose +exact alpha map we do not yet have. This engine therefore removes it by: + + locate -> mask -> inpaint + +1. Locate: the mark scales with image WIDTH and sits in the bottom-right at a + fixed margin, so we anchor a generous box there (geometry only -- no bundled + template). Constants below are derived from measured Doubao output. +2. Mask: within the box, extract the light, low-saturation glyph pixels with a + polarity-aware rule (the mark is brighter than dark backgrounds and a + distinct off-white gray against light backgrounds). +3. Inpaint: cv2 inpainting (TELEA / NS) reconstructs the covered pixels. + +This is fast, offline, deterministic, and needs no GPU. A future upgrade path +is per-pixel reverse alpha blending once a Doubao alpha map is captured on a +controlled black background (see data/doubao_capture/), which would recover the +true pixels instead of hallucinating them -- the same approach as the Gemini +engine. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal + +import cv2 +import numpy as np + +if TYPE_CHECKING: + from pathlib import Path + + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + + +# Geometry as a fraction of image WIDTH. The Doubao mark scales with width and +# is anchored bottom-right. The box is intentionally generous (the glyph mask +# tightens it); values cover measured outputs across resolutions and aspect +# ratios (square 2048, portrait, ultra-wide). Margins are width-relative too. +WM_WIDTH_FRAC = 0.185 +WM_HEIGHT_FRAC = 0.065 +MARGIN_RIGHT_FRAC = 0.012 +MARGIN_BOTTOM_FRAC = 0.014 + +# Glyph appearance: the label is a low-saturation light gray, rendered brighter +# than the surrounding content (the common case: a generated photo/illustration). +# We detect it as a local bright feature (white top-hat: brighter than a blurred +# local background) intersected with the grayish + minimum-brightness tests. +# This is polarity-correct for bright-on-darker backgrounds and, crucially, +# leaves white-paper documents untouched (there the mark is not brighter than +# its surroundings, so nothing is masked rather than damaging the document text). +MAX_SATURATION = 55 # max channel spread to count a pixel as "grayish" +LOGO_MIN_LUMA = 150 # glyphs are at least this bright in absolute terms +TOPHAT_DELTA = 12 # glyph must exceed the local background by this many levels + +# Detection: a genuine label fills a meaningful fraction of the box. Measured +# coverage is >=0.20 on real Doubao outputs; random/textured corners stay <=0.06 +# on large images but can spike to ~0.15 on tiny ones (small box -> high variance), +# so the threshold sits above that spike and below the real-mark floor. +DETECT_MIN_COVERAGE = 0.16 + +# Safety: a text strip fills a modest slice of the (generous) box. When the box +# is over a dense-text / document background the mask explodes and cv2 inpainting +# would smear the real content. Above this coverage we refuse to inpaint and +# leave the image untouched -- that hard case needs the neural path, not a guess. +MAX_INPAINT_COVERAGE = 0.50 + + +@dataclass(frozen=True) +class DoubaoLocation: + """Located watermark box (bottom-right), in absolute pixel coordinates.""" + + x: int + y: int + w: int + h: int + is_fallback: bool = True # geometry anchor (no template match) -> always True for now + + @property + def bbox(self) -> tuple[int, int, int, int]: + return self.x, self.y, self.w, self.h + + +@dataclass +class DoubaoDetection: + """Result of visible Doubao watermark detection.""" + + detected: bool = False + confidence: float = 0.0 + region: tuple[int, int, int, int] = (0, 0, 0, 0) + coverage: float = 0.0 # fraction of the box occupied by glyph pixels + + +class DoubaoEngine: + """Remove the visible Doubao "豆包AI生成" watermark (locate -> mask -> inpaint).""" + + def __init__( + self, + *, + width_frac: float = WM_WIDTH_FRAC, + height_frac: float = WM_HEIGHT_FRAC, + margin_right_frac: float = MARGIN_RIGHT_FRAC, + margin_bottom_frac: float = MARGIN_BOTTOM_FRAC, + ) -> None: + self.width_frac = width_frac + self.height_frac = height_frac + self.margin_right_frac = margin_right_frac + self.margin_bottom_frac = margin_bottom_frac + + # ── Locate ──────────────────────────────────────────────────────── + + def locate(self, image: NDArray) -> DoubaoLocation: + """Anchor the watermark box in the bottom-right corner by geometry.""" + h, w = image.shape[:2] + wm_w = max(40, int(w * self.width_frac)) + wm_h = max(16, int(w * self.height_frac)) + margin_r = max(4, int(w * self.margin_right_frac)) + margin_b = max(4, int(w * self.margin_bottom_frac)) + x = max(0, w - margin_r - wm_w) + y = max(0, h - margin_b - wm_h) + wm_w = min(wm_w, w - x) + wm_h = min(wm_h, h - y) + return DoubaoLocation(x=x, y=y, w=wm_w, h=wm_h, is_fallback=True) + + # ── Mask ────────────────────────────────────────────────────────── + + def extract_mask(self, image: NDArray, loc: DoubaoLocation) -> NDArray: + """Build a full-image uint8 mask (255 = watermark glyph) for the box. + + Polarity-aware: the mark is a light, low-saturation gray. On a dark + background it is the bright region; on a light background it is the + off-white gray below paper-white. Both cases are captured by the logo + luminance band intersected with the grayish constraint, plus a + brighter-than-local-background test on dark backgrounds. + """ + h, w = image.shape[:2] + x, y, bw, bh = loc.bbox + roi = image[y : y + bh, x : x + bw].astype(np.float32) + + luma = roi.mean(axis=2) + sat = roi.max(axis=2) - roi.min(axis=2) + grayish = sat < MAX_SATURATION + + # Local background model: a strong Gaussian blur (sigma ~ box height) + # approximates the content under the glyphs. The white top-hat + # (luma - local_bg) lights up bright thin strokes regardless of the + # absolute background level. + sigma = max(4.0, bh * 0.4) + local_bg = cv2.GaussianBlur(luma, (0, 0), sigmaX=sigma, sigmaY=sigma) + tophat = luma - local_bg + + cand = grayish & (tophat > TOPHAT_DELTA) & (luma > LOGO_MIN_LUMA) + glyph = cand.astype(np.uint8) * 255 + # Connect glyph parts, then drop isolated specks (5x5 open clears the + # scattered grayish pixels that random/textured corners produce). + glyph = cv2.morphologyEx(glyph, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) + glyph = cv2.morphologyEx(glyph, cv2.MORPH_OPEN, np.ones((5, 5), np.uint8)) + + mask = np.zeros((h, w), np.uint8) + mask[y : y + bh, x : x + bw] = glyph + return mask + + # ── Detect ──────────────────────────────────────────────────────── + + def detect(self, image: NDArray) -> DoubaoDetection: + """Detect the visible Doubao mark by glyph coverage in the corner box. + + Heuristic: a genuine label fills a meaningful fraction of the box with + text-like glyph pixels. Coverage maps to a confidence score. + """ + det = DoubaoDetection() + if image is None or image.size == 0: + return det + loc = self.locate(image) + mask = self.extract_mask(image, loc) + x, y, bw, bh = loc.bbox + box = mask[y : y + bh, x : x + bw] + coverage = float((box > 0).sum()) / float(max(1, bw * bh)) + det.region = loc.bbox + det.coverage = coverage + # Map coverage to a 0-1 confidence: ~0.06 (noise floor) -> 0, ~0.26 -> 1. + det.confidence = float(max(0.0, min(1.0, (coverage - 0.06) / 0.20))) + det.detected = coverage >= DETECT_MIN_COVERAGE + logger.debug("Doubao detect: coverage=%.3f conf=%.3f", coverage, det.confidence) + return det + + # ── Remove ──────────────────────────────────────────────────────── + + def remove_watermark( + self, + image: NDArray, + *, + inpaint_method: Literal["telea", "ns"] = "telea", + inpaint_radius: int = 6, + dilate: int = 3, + ) -> NDArray: + """Remove the visible Doubao watermark by inpainting the glyph mask. + + Returns an unmodified copy when no glyph pixels are found (so we never + smear a clean corner). ``dilate`` grows the mask to cover anti-aliased + glyph edges before inpainting. + """ + if image is None or image.size == 0: + return image + loc = self.locate(image) + mask = self.extract_mask(image, loc) + if not mask.any(): + logger.debug("Doubao remove: no glyph pixels found; returning copy") + return image.copy() + + x, y, bw, bh = loc.bbox + coverage = float((mask[y : y + bh, x : x + bw] > 0).sum()) / float(max(1, bw * bh)) + if coverage > MAX_INPAINT_COVERAGE: + logger.warning( + "Doubao remove: box coverage %.2f exceeds %.2f (dense-text/document " + "background); leaving image untouched to avoid smearing content", + coverage, + MAX_INPAINT_COVERAGE, + ) + return image.copy() + + if dilate > 0: + k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * dilate + 1, 2 * dilate + 1)) + mask = cv2.dilate(mask, k) + + flag = cv2.INPAINT_TELEA if inpaint_method == "telea" else cv2.INPAINT_NS + return cv2.inpaint(image, mask, inpaint_radius, flag) + + +def load_image_bgr(path: str | Path) -> NDArray: + """Read an image as BGR ndarray (helper for scripts/tests).""" + img = cv2.imread(str(path), cv2.IMREAD_COLOR) + if img is None: + raise FileNotFoundError(f"Failed to read image: {path}") + return img diff --git a/src/remove_ai_watermarks/region_eraser.py b/src/remove_ai_watermarks/region_eraser.py new file mode 100644 index 0000000..2c60489 --- /dev/null +++ b/src/remove_ai_watermarks/region_eraser.py @@ -0,0 +1,179 @@ +"""Universal region eraser: remove anything inside user-given boxes via inpainting. + +Position- and content-agnostic. You supply the rectangle(s); the eraser inpaints +whatever is inside, so it removes any visible logo / watermark / object regardless +of colour, style, or location. Localisation is the user's responsibility (pass the +box); restoration runs on CPU. This is the universal fallback for marks the +deterministic per-generator engines (Gemini sparkle, Doubao) do not cover. + +Backends: + - ``cv2`` (default): ``cv2.inpaint`` (Telea / Navier-Stokes). Instant, no extra + dependencies, lower quality on large or textured regions. + - ``lama`` (optional, extra ``lama``): big-LaMa via onnxruntime + (``Carve/LaMa-ONNX``, Apache-2.0). CPU, resolution-robust, much better on + texture. The model (~200 MB) is downloaded on first use and cached by + huggingface_hub; it is never bundled in this repo. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Literal + +import cv2 +import numpy as np + +if TYPE_CHECKING: + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + +Backend = Literal["cv2", "lama"] + +_LAMA_REPO = "Carve/LaMa-ONNX" +_LAMA_FILE = "lama_fp32.onnx" + +# Cached onnxruntime session (loading is expensive; reuse across calls). +_lama_session: object | None = None + + +def boxes_to_mask( + shape: tuple[int, int], + boxes: list[tuple[int, int, int, int]], + dilate: int = 3, +) -> NDArray: + """Build a uint8 mask (255 inside boxes) from ``(x, y, w, h)`` rectangles.""" + h, w = shape + mask = np.zeros((h, w), np.uint8) + for x, y, bw, bh in boxes: + x0, y0 = max(0, x), max(0, y) + x1, y1 = min(w, x + bw), min(h, y + bh) + if x1 > x0 and y1 > y0: + mask[y0:y1, x0:x1] = 255 + if dilate > 0 and mask.any(): + k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * dilate + 1, 2 * dilate + 1)) + mask = cv2.dilate(mask, k) + return mask + + +def erase_cv2( + image_bgr: NDArray, + mask: NDArray, + *, + method: Literal["telea", "ns"] = "telea", + radius: int = 6, +) -> NDArray: + """Inpaint ``mask`` with classical cv2 inpainting (CPU, no extra deps).""" + flag = cv2.INPAINT_TELEA if method == "telea" else cv2.INPAINT_NS + return cv2.inpaint(image_bgr, mask, radius, flag) + + +def lama_available() -> bool: + """True when the optional LaMa-ONNX backend can run (onnxruntime installed).""" + try: + import onnxruntime # noqa: F401 + + return True + except ImportError: + return False + + +def _get_lama_session() -> object: + """Load (once) the big-LaMa ONNX session, downloading the model on first use.""" + global _lama_session + if _lama_session is not None: + return _lama_session + + import onnxruntime as ort + from huggingface_hub import hf_hub_download + + model_path = hf_hub_download(repo_id=_LAMA_REPO, filename=_LAMA_FILE) + logger.info("Loading LaMa-ONNX model: %s", model_path) + _lama_session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) + return _lama_session + + +def erase_lama(image_bgr: NDArray, mask: NDArray) -> NDArray: + """Inpaint ``mask`` with big-LaMa via onnxruntime (CPU). + + LaMa runs at a fixed square input size. To preserve full-image resolution we + crop a padded region around the mask, inpaint that crop at the model size, + and paste only the masked pixels back -- so untouched areas stay pixel-exact. + """ + session = _get_lama_session() + inp = session.get_inputs() # type: ignore[attr-defined] + img_name = inp[0].name + mask_name = inp[1].name + # Model declares a fixed square spatial size (e.g. 512); fall back to 512. + dims = inp[0].shape + size = next((d for d in reversed(dims) if isinstance(d, int) and d > 1), 512) + + h, w = image_bgr.shape[:2] + ys, xs = np.where(mask > 0) + if len(xs) == 0: + return image_bgr.copy() + + # Padded crop around the mask (context for the inpainter). + pad = max(16, int(0.4 * max(xs.max() - xs.min() + 1, ys.max() - ys.min() + 1))) + cx0, cy0 = max(0, int(xs.min()) - pad), max(0, int(ys.min()) - pad) + cx1, cy1 = min(w, int(xs.max()) + 1 + pad), min(h, int(ys.max()) + 1 + pad) + crop = image_bgr[cy0:cy1, cx0:cx1] + crop_mask = mask[cy0:cy1, cx0:cx1] + ch, cw = crop.shape[:2] + + # Resize crop + mask to the model size, normalise to [0,1] RGB CHW. + crop_rs = cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA) + mask_rs = cv2.resize(crop_mask, (size, size), interpolation=cv2.INTER_NEAREST) + img_in = cv2.cvtColor(crop_rs, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + img_in = np.transpose(img_in, (2, 0, 1))[None] # (1,3,size,size) + mask_in = (mask_rs > 127).astype(np.float32)[None, None] # (1,1,size,size), 1=hole + + out = session.run(None, {img_name: img_in, mask_name: mask_in})[0] # type: ignore[attr-defined] + out = np.asarray(out)[0] # (3,size,size) + out = np.transpose(out, (1, 2, 0)) + if float(out.max()) <= 1.5: # model emits [0,1]; otherwise already [0,255] + out = out * 255.0 + out = np.clip(out, 0, 255).astype(np.uint8) + out_bgr = cv2.cvtColor(out, cv2.COLOR_RGB2BGR) + + # Resize back to crop size and paste only the masked pixels. + out_crop = cv2.resize(out_bgr, (cw, ch), interpolation=cv2.INTER_LINEAR) + result = image_bgr.copy() + region = result[cy0:cy1, cx0:cx1] + paste = crop_mask > 127 + region[paste] = out_crop[paste] + result[cy0:cy1, cx0:cx1] = region + return result + + +def erase( + image_bgr: NDArray, + *, + boxes: list[tuple[int, int, int, int]] | None = None, + mask: NDArray | None = None, + backend: Backend = "cv2", + dilate: int = 3, + cv2_method: Literal["telea", "ns"] = "telea", + cv2_radius: int = 6, +) -> NDArray: + """Erase the given boxes (or mask) via the chosen inpainting backend. + + Provide either ``boxes`` (list of ``(x, y, w, h)``) or a precomputed ``mask`` + (uint8, 255 = erase). Returns an unmodified copy when nothing is selected. + """ + if image_bgr is None or image_bgr.size == 0: + return image_bgr + if mask is None: + if not boxes: + return image_bgr.copy() + mask = boxes_to_mask(image_bgr.shape[:2], boxes, dilate=dilate) + if not mask.any(): + return image_bgr.copy() + + if backend == "lama": + if not lama_available(): + raise RuntimeError( + "LaMa backend requires onnxruntime. Install the extra: pip install 'remove-ai-watermarks[lama]'" + ) + return erase_lama(image_bgr, mask) + return erase_cv2(image_bgr, mask, method=cv2_method, radius=cv2_radius) diff --git a/tests/test_cli.py b/tests/test_cli.py index ecb1431..0f20175 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -27,7 +27,9 @@ def runner(): @pytest.fixture def sample_png(tmp_path: Path) -> Path: """Create a sample PNG for CLI testing.""" - img = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8) + # Seeded: an unseeded random corner can occasionally trip the Doubao + # visible-mark detector, making `visible --mark auto` flaky. + img = np.random.default_rng(0).integers(0, 255, (200, 200, 3), dtype=np.uint8) path = tmp_path / "input.png" cv2.imwrite(str(path), img) return path @@ -37,8 +39,9 @@ def _make_batch_dir(tmp_path: Path, count: int = 3) -> Path: """Create a directory with test images for batch testing.""" input_dir = tmp_path / "input" input_dir.mkdir() + rng = np.random.default_rng(0) for i in range(count): - img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + img = rng.integers(0, 255, (100, 100, 3), dtype=np.uint8) cv2.imwrite(str(input_dir / f"img_{i}.png"), img) return input_dir @@ -119,7 +122,8 @@ class TestVisibleCommand: def test_visible_help(self, runner): result = runner.invoke(main, ["visible", "--help"]) assert result.exit_code == 0 - assert "Gemini watermark" in result.output + assert "visible AI watermark" in result.output + assert "--mark" in result.output def test_visible_basic(self, runner, sample_png, tmp_path): output = tmp_path / "clean.png" diff --git a/tests/test_doubao_engine.py b/tests/test_doubao_engine.py new file mode 100644 index 0000000..6682990 --- /dev/null +++ b/tests/test_doubao_engine.py @@ -0,0 +1,98 @@ +"""Tests for the Doubao visible-watermark engine.""" + +from __future__ import annotations + +from pathlib import Path + +import cv2 +import numpy as np +import pytest + +from remove_ai_watermarks.doubao_engine import DoubaoEngine, load_image_bgr + +SAMPLE = Path(__file__).resolve().parents[1] / "data" / "samples" / "doubao-1.png" + + +# ── Locate ────────────────────────────────────────────────────────── + + +class TestLocate: + def test_box_anchored_bottom_right(self): + eng = DoubaoEngine() + img = np.zeros((2048, 2048, 3), np.uint8) + loc = eng.locate(img) + # right and bottom edges sit close to the image corner (within margins) + assert 2048 - (loc.x + loc.w) < int(2048 * 0.03) + assert 2048 - (loc.y + loc.h) < int(2048 * 0.03) + assert loc.is_fallback # geometry anchor, no bundled template yet + + def test_box_scales_with_width(self): + eng = DoubaoEngine() + small = eng.locate(np.zeros((1024, 1024, 3), np.uint8)) + large = eng.locate(np.zeros((2048, 2048, 3), np.uint8)) + # width-relative geometry: 2x wider image -> ~2x wider box + assert large.w == pytest.approx(small.w * 2, rel=0.1) + + +# ── Detect + remove on the real sample ────────────────────────────── + + +@pytest.mark.skipif(not SAMPLE.exists(), reason="sample image not present") +class TestRealSample: + def test_detects_watermark(self): + eng = DoubaoEngine() + det = eng.detect(load_image_bgr(SAMPLE)) + assert det.detected + assert det.confidence > 0.0 + assert det.coverage > 0.04 + + def test_remove_reduces_glyph_coverage(self): + eng = DoubaoEngine() + img = load_image_bgr(SAMPLE) + before = eng.detect(img).coverage + out = eng.remove_watermark(img) + after = eng.detect(out).coverage + # the inpaint should clear most glyph pixels from the corner box + assert after < before * 0.5 + + def test_pixels_outside_box_untouched(self): + eng = DoubaoEngine() + img = load_image_bgr(SAMPLE) + out = eng.remove_watermark(img) + # top-left quadrant is far from the bottom-right mark: must be identical + h, w = img.shape[:2] + assert np.array_equal(img[: h // 2, : w // 2], out[: h // 2, : w // 2]) + + +# ── Negative + safety guard ───────────────────────────────────────── + + +class TestNegativeAndGuard: + def test_clean_image_not_detected(self): + eng = DoubaoEngine() + # smooth gradient, no watermark + ramp = np.tile(np.linspace(0, 255, 1024, dtype=np.uint8), (1024, 1)) + img = cv2.cvtColor(ramp, cv2.COLOR_GRAY2BGR) + det = eng.detect(img) + assert not det.detected + + def test_clean_image_returned_unchanged(self): + eng = DoubaoEngine() + ramp = np.tile(np.linspace(0, 255, 1024, dtype=np.uint8), (1024, 1)) + img = cv2.cvtColor(ramp, cv2.COLOR_GRAY2BGR) + out = eng.remove_watermark(img) + assert np.array_equal(img, out) + + def test_document_background_guard(self): + """A dense high-frequency corner (document-like) trips the coverage + guard, so the image is left untouched rather than smeared.""" + eng = DoubaoEngine() + rng = np.random.default_rng(0) + img = np.full((1024, 1024, 3), 255, np.uint8) + # fill the bottom-right box area with random grayish text-like noise + loc = eng.locate(img) + x, y, bw, bh = loc.bbox + noise = rng.integers(150, 246, size=(bh, bw), dtype=np.uint8) + img[y : y + bh, x : x + bw] = noise[:, :, None] + out = eng.remove_watermark(img) + assert np.array_equal(img, out) diff --git a/tests/test_region_eraser.py b/tests/test_region_eraser.py new file mode 100644 index 0000000..181b378 --- /dev/null +++ b/tests/test_region_eraser.py @@ -0,0 +1,75 @@ +"""Tests for the universal region eraser.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from remove_ai_watermarks.region_eraser import boxes_to_mask, erase, lama_available + + +class TestBoxesToMask: + def test_mask_set_inside_box(self): + mask = boxes_to_mask((100, 100), [(10, 20, 30, 40)], dilate=0) + assert mask[25, 15] == 255 # inside + assert mask[0, 0] == 0 # outside + assert mask.shape == (100, 100) + + def test_multiple_boxes(self): + mask = boxes_to_mask((100, 100), [(0, 0, 10, 10), (90, 90, 10, 10)], dilate=0) + assert mask[5, 5] == 255 + assert mask[95, 95] == 255 + assert mask[50, 50] == 0 + + def test_dilate_grows_mask(self): + m0 = boxes_to_mask((100, 100), [(40, 40, 10, 10)], dilate=0) + m5 = boxes_to_mask((100, 100), [(40, 40, 10, 10)], dilate=5) + assert m5.sum() > m0.sum() + + def test_box_clipped_to_bounds(self): + # box partly outside the image must not raise and stays in-bounds + mask = boxes_to_mask((50, 50), [(40, 40, 100, 100)], dilate=0) + assert mask[45, 45] == 255 + + +class TestEraseCv2: + def _image_with_logo(self) -> tuple[np.ndarray, tuple[int, int, int, int]]: + img = np.full((200, 200, 3), 120, np.uint8) # flat gray background + box = (140, 160, 50, 30) + x, y, w, h = box + img[y : y + h, x : x + w] = (255, 255, 255) # bright "logo" + return img, box + + def test_erase_changes_region(self): + img, box = self._image_with_logo() + out = erase(img, boxes=[box], backend="cv2") + x, y, w, h = box + # on a flat background the logo region should be repainted near gray + region = out[y : y + h, x : x + w] + assert abs(float(region.mean()) - 120) < 20 + assert not np.array_equal(out, img) + + def test_pixels_outside_box_untouched(self): + img, box = self._image_with_logo() + out = erase(img, boxes=[box], backend="cv2", dilate=0) + # a far corner must be identical + assert np.array_equal(img[:50, :50], out[:50, :50]) + + def test_no_boxes_returns_copy(self): + img = np.full((100, 100, 3), 50, np.uint8) + out = erase(img, boxes=[], backend="cv2") + assert np.array_equal(img, out) + + def test_empty_mask_returns_copy(self): + img = np.full((100, 100, 3), 50, np.uint8) + out = erase(img, mask=np.zeros((100, 100), np.uint8), backend="cv2") + assert np.array_equal(img, out) + + +class TestLamaBackend: + def test_lama_raises_when_unavailable(self): + img = np.full((100, 100, 3), 50, np.uint8) + if lama_available(): + pytest.skip("onnxruntime installed; cannot test the unavailable path") + with pytest.raises(RuntimeError, match="onnxruntime"): + erase(img, boxes=[(10, 10, 20, 20)], backend="lama")