From b0aad476fbb6047a6610bba99f00f89844cb8bf2 Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Sun, 31 May 2026 15:41:50 -0700 Subject: [PATCH] fix(scripts): drop rich import from analysis scripts (red CI after rich removal) The cli refactor dropped rich from dependencies, but four scripts still did `from rich.console import Console` / `rich.table import Table`. Their test modules import the scripts, so a clean `uv sync --frozen` (CI: core+dev, no rich) failed at collection with ModuleNotFoundError on macOS/Windows/Linux. Add a shared plain-text shim `scripts/_plain_console.py` (Console/Table via click.echo, markup stripped) and switch all four scripts to it. Verified: all four import with rich blocked, and tests/test_synthid_corpus.py + tests/test_synthid_pixel_probe.py pass. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 2 +- scripts/_plain_console.py | 61 +++++++++++++++++++++++++++++ scripts/corpus_gap_scan.py | 3 +- scripts/synthid_corpus.py | 3 +- scripts/synthid_pixel_probe.py | 2 +- scripts/text_detection_benchmark.py | 2 +- 6 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 scripts/_plain_console.py diff --git a/CLAUDE.md b/CLAUDE.md index 1a1b1cc..13e644c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,7 +80,7 @@ Who embeds what, and whether it is locally detectable (so we know which gaps are - **fp16 VAE black-output fix (issue #29, 2026-05-30):** on a **CUDA/XPU fp16** backend the stock SDXL VAE overflows to NaN and the *plain* img2img path decodes to an **all-black** image (reproduced on the raiw.cc result: a 1086x1448 input -> a uniformly black 4.6 KB PNG, mean 0). `watermark_remover._load_pipeline` now swaps in the fp16-fixed SDXL VAE (`madebyollin/sdxl-vae-fp16-fix` = `_SDXL_FP16_VAE_ID`) when `_needs_fp16_vae_fix(model_id, DEFAULT_MODEL_ID, is_fp16)` is true -- only the default SDXL checkpoint on fp16. **cpu/mps run fp32** (the stock VAE is fine there, which is why the bug never reproduces on Mac), and the **differential / region-hires** pipeline already upcasts the VAE itself (see the `text_protector` bullet). A custom non-SDXL `model_id` keeps its own VAE (the fp16-fix VAE is SDXL-architecture-specific). The decision is a pure helper, unit-tested without a download (`tests/test_platform.py::TestFp16VaeFix`); the actual black->clean recovery needs a CUDA GPU and was NOT verifiable on this MPS machine -- confirm on the backend / an NVIDIA box. - Pyright first run is slow (2-3 min) due to ML deps (torch/diffusers/transformers stubs); full-project `uv run pyright` can stall for many minutes — scope it to changed files. - `ultralytics` monkey-patches `PIL.Image.open` and tries to autoload `pi_heif`. When `pi_heif` is missing, opening files raises `ModuleNotFoundError`, not `UnidentifiedImageError`. Code that opens user-supplied or unknown-format files should `except Exception`, not just `OSError`/`UnidentifiedImageError`. -- **rich `console.print` parses `[word]` as a style tag and silently drops unknown ones.** A literal bracketed token in a print string disappears: `pip install 'remove-ai-watermarks[gpu]'` rendered as `...remove-ai-watermarks'` (the `[gpu]` extra eaten), which sent users a broken install command (surfaced via #19). Escape the literal bracket as `\[gpu]` (in a normal Python string that is `"\\[gpu]"`) in any rich string carrying user-facing brackets. Regression-guarded by `tests/test_cli.py::TestGpuHintMarkup`. +- **rich was dropped (CLI + scripts print plain text via `click.echo`).** `cli.py` renders through small `_Console`/`_Table`/`_Progress` shims; the analysis scripts (`scripts/synthid_corpus.py`, `synthid_pixel_probe.py`, `text_detection_benchmark.py`, `corpus_gap_scan.py`) import `Console`/`Table` from the shared `scripts/_plain_console.py` shim (markup like `[bold]`/`[/]` is stripped, tables render aligned). Consequences: (1) `rich` is NOT a dependency, so anything that imports it breaks a clean `uv sync --frozen` (CI installs core+dev only) — this exact gap red-failed CI after the refactor when those 4 scripts still imported rich; if you add a script, use the `_plain_console` shim, not rich. (2) The old `[gpu]`-bracket-eaten bug (#19) is gone — plain `click.echo` prints `pip install 'remove-ai-watermarks[gpu]'` verbatim, no escaping needed (regression-guarded by `tests/test_cli.py::TestGpuHintMarkup`). (3) No Unicode glyphs / colors / progress bars in CLI output by design. - Metadata detection for AVIF/HEIF/JPEG-XL relies on a binary scan for `C2PA_UUID` + `IPTC_AI_MARKERS`, plus EXIF `Software` / XMP `CreatorTool` generator tags via `metadata.exif_generator` (validated with synthesized AVIF/JPEG fixtures + an XMP raw-scan fixture). C2PA removal in those containers is implemented via `noai/isobmff.py` (top-level ``uuid`` / ``jumb`` box stripper, no re-encoding), which now also drops a top-level XMP ``uuid`` box that carries an AI label (matched by AI-marker content, not by the XMP UUID, so byte-order-robust) and covers MP4/MOV/M4V/M4A by content sniff. **Non-ISOBMFF audio/video removal is via ffmpeg** (`_FFMPEG_STRIP_EXTS` -> `_strip_with_ffmpeg`): WebM/Matroska (EBML), MP3 (ID3), WAV/FLAC/OGG (RIFF/Vorbis) are stripped losslessly with `ffmpeg -map_metadata -1 -map_chapters -1 -c copy` (codec data untouched). Requires ffmpeg on PATH; raises `RuntimeError` if absent or if ffmpeg can't parse the file. Verified end-to-end (a real ffmpeg-made WAV/MP3 with a `title=Suno AI` tag -> tag gone, audio bytes preserved). **Meta-box XMP now handled (`isobmff.blank_ai_xmp_packets`, v0.6.9):** an AI-label XMP packet stored as a meta-box `mime` item (AVIF/HEIF) is blanked in place (overwritten with spaces of the same length, so `iloc` offsets and the coded image stay valid). **Still NOT built:** an `Exif` *item* inside the `meta` box (rare -- AI labels are XMP) needs full `iinf`/`iloc` surgery (offset rewrite) with corruption risk -- exiftool (R/W/C for HEIC/AVIF EXIF+XMP, verified on exiftool.org 2026-05-27) would do it but is a non-installed binary dep, so it stays a documented gap. **Audio watermark DETECTION (Resemble PerTh) was evaluated and NOT built (2026-05-26):** `resemble-perth`'s `PerthImplicitWatermarker.get_watermark()` returns a raw bit-array with **no presence/confidence flag** (clean audio decodes to arbitrary bits too), so reliably distinguishing watermarked-from-clean needs either Resemble's fixed payload or a confidence API -- neither is public, and there's no real Resemble sample to calibrate against. Same wall-class as the SynthID pixel detector: the decode exists, reliable presence-detection does not. (perth's top-level `PerthImplicitWatermarker` is also gated to None unless `librosa` is importable.) - **SynthID detection is metadata-only.** There is no reliable *local* detector of the SynthID *pixel* watermark — Google's decoder is proprietary, no public spec or API (only a waitlisted portal). Authoritative confirmation: Google DeepMind's own paper "SynthID-Image: Image watermarking at internet scale" (Gowal et al., arXiv:2510.09263) states the verification service is restricted to "trusted testers" and does not release detector weights or a reproducible algorithm — so a local pixel detector is infeasible by design, not just unbuilt. https://arxiv.org/abs/2510.09263 We detect SynthID by its C2PA companion (`synthid_source` / `SYNTHID_C2PA_ISSUERS`), which is reliable while the manifest is intact but says nothing once C2PA is stripped. **Surface-dependent blind spot (verified 2026-05-24):** the same Google model emits different metadata per surface -- the Gemini *app* wraps outputs in Google C2PA, but the *API/playground* (AI Studio, Nano Banana / gemini-2.5-flash-image) emits the SynthID *pixel* watermark (confirmed via the Gemini-app oracle) + the visible sparkle but **no C2PA/IPTC at all**, so `synthid_source` returns None despite SynthID being present. Only the pixel oracle or the visible-sparkle detector catches those. (Meta AI is another surface mismatch: it writes the IPTC `digitalSourceType=trainedAlgorithmicMedia` marker, not C2PA and not SynthID.) Google→SynthID is long-standing; OpenAI→SynthID is confirmed by OpenAI's Help Center (ChatGPT/Codex/API "include both C2PA metadata and SynthID watermarks", updated 2026-05-21) but time-gated (pre-rollout OpenAI images carry C2PA without SynthID), so the OpenAI verdict is hedged "likely". Oracles: Gemini app "Verify with SynthID" (Google), openai.com/verify (OpenAI). **Each vendor's oracle detects only its OWN content (verified on the page 2026-05-31):** `openai.com/research/verify` states verbatim "OpenAI generation signals will only be detected if the image was generated with our tools" and "Content could also still be AI-generated by another company's model, which the tool currently does not detect" -- SynthID is shared tech but the verifier is keyed to its own vendor's payload, so a Google-SynthID image reads clean on OpenAI's verifier and vice-versa. **This explains the recurring "oracle says clean but `identify` still flags SynthID" report (#14):** the oracle reads the *pixel* watermark (gone after our SDXL pass), while `identify` reads the *C2PA-metadata proxy* (still present if the manifest survived). Different signals, not a contradiction -- strip the metadata too (`metadata --remove` / `all`) and the proxy goes quiet, but a quiet proxy is not proof the pixel watermark is gone. The spectral phase-coherence approach from `github.com/aloshdenny/reverse-SynthID` was evaluated (May 2026) and **does not work for real-content detection**: on its own shipped codebook + validation set, watermarked and cleaned images were indistinguishable (conf within noise, cleaned often higher); it only fires on pure-black 1024x1024 reference images at exact resolution (the controlled case it was calibrated on). The README's "90% / conf=0.91" reproduces only in that lab condition. Do not build a production detector on it; if revisited, it is experimental/diagnostic only and needs a per-resolution, per-model reference corpus. A from-scratch gpt-image pilot (2026-05-24) confirmed this independently: 5 independent solid-black gpt-image outputs share a near-identical fixed signature (pairwise residual correlation **0.92**, avg-template retains 97% energy), so the watermark/carrier IS strongly present and consistent on flat content — but the carrier frequencies extracted from it do NOT discriminate real content (carrier-to-random ratio: cleaned 1.86 > watermarked 1.53; a non-gpt-image image scored highest at 3.67). The signature drowns in content texture. Net: a perfectly consistent solid-color signature still yields no real-content pixel detector with magnitude/carrier methods. A corpus discrimination test (2026-05-24, `scripts/synthid_pixel_probe.py`, raw zero-mean residual NCC) independently re-confirms this: at matched resolution, SynthID positives do NOT cluster apart from negatives (within-Gemini 0.07; at 1024 px pos-vs-neg >= pos-vs-pos). The only high correlations were near-duplicate *content* (5 ChatGPT renders of one prompt at ~0.92, while a distinct ChatGPT image scored ~0 against them) — content, not a carrier. The probe is solid-fills-only and EXPERIMENTAL/DIAGNOSTIC; do not use it on real content. **Correction (deeper re-examination 2026-05-25):** the carrier IS real on solid fills — the earlier "no carrier" was a *method* artifact of using spatial / FFT-magnitude NCC, which can't see it. The carrier is a fixed *phase* at specific low frequencies, so the right metric is **per-bin phase coherence**. On 8 white `gemini-2.5-flash-image` fills (generated via the reverse-SynthID trick: identity-edit prompt "Recreate this image exactly as it is" on a synthetic pure-white PNG — this bypasses the recitation block that rejects text prompts for pure colors), phase coherence at the white carriers `(0,±7..±12,±20..±23)` = **0.86** vs **0.31** random; single-image leave-one-out phase-match **+0.83** vs real photos **-0.24**. (Black `2.5-flash` fills clip to std≈0 — SynthID can't push values below 0, so no carrier in black; the repo's dark carriers come from nano-banana-pro.) **But it does not generalize:** (a) carriers are model-version + resolution + color specific — the repo's v4 codebook (built for `gemini-3.1-flash-image-preview` + `nano-banana-pro-preview`) scores ~0.527 on my 2.5-flash white fills, indistinguishable from negatives (~0.50), i.e. carriers shift across model versions and need a per-model codebook; (b) on real content (30 `2.5-flash` images) the carrier collapses — set phase coherence at carriers 0.37 ≈ random 0.42, and the repo's v4 detector gives content 0.518 ≈ negatives 0.504 (no separation; a faint +0.24 single-image lean is likely a brightness confound). Net: the spectral/phase approach is a real *controlled-fill* characterizer, NOT an arbitrary-real-content detector, and is brittle to model version. Metadata proxy + visible sparkle + online oracles remain the ceiling for real content. - **External AI-vs-real classifier models are out of scope (decided 2026-05-24).** Generic HuggingFace detectors (`Organika/sdxl-detector` Swin Transformer, `umm-maybe/AI-image-detector`, and fine-tunes) exist and report ~0.98 on their *own* SDXL-vs-real validation sets, but they are per-generator and the model cards themselves note degraded accuracy off-distribution; they are untested on gpt-image / Gemini Nano Banana (the metadata-stripped surfaces we care about), and our own light SDXL pass would likely defeat them the same way it defeats SynthID. Detection here stays local + signal-based (metadata + visible sparkle); do not add a bundled classifier dependency. diff --git a/scripts/_plain_console.py b/scripts/_plain_console.py new file mode 100644 index 0000000..fe740ce --- /dev/null +++ b/scripts/_plain_console.py @@ -0,0 +1,61 @@ +"""Minimal plain-text stand-ins for the rich Console/Table API. + +rich was dropped as a project dependency (see the CLI plain-text refactor), but +the analysis scripts still printed through it. These shims keep the scripts +runnable without rich: ``[bold]``/``[/]``-style markup is stripped and tables +render as aligned plain text. Output goes through ``click.echo`` to match the +package CLI (no bare ``print`` in tooling). +""" + +from __future__ import annotations + +import re +from typing import Any + +import click + +# Matches rich style tags: the bare close ``[/]`` and named open/close tags such +# as ``[yellow]``, ``[bold yellow]``, ``[/green]``. Anchored to lowercase-letter +# starts so numeric/data brackets (``[1024]``, ``[file.png]``) are left intact. +_MARKUP = re.compile(r"\[(?:/|/?[a-z][a-z ]*)\]") + + +def _strip(obj: Any) -> str: + return _MARKUP.sub("", str(obj)) + + +class Table: + """Drop-in for ``rich.table.Table`` covering add_column/add_row + render.""" + + def __init__(self, *args: Any, title: str | None = None, **kwargs: Any) -> None: + self.title = title + self._headers: list[str] = [] + self._rows: list[list[str]] = [] + + def add_column(self, header: str = "", *args: Any, **kwargs: Any) -> None: + self._headers.append(_strip(header)) + + def add_row(self, *cells: Any) -> None: + self._rows.append([_strip(c) for c in cells]) + + def render(self) -> str: + all_rows = ([self._headers] if self._headers else []) + self._rows + cols = max((len(r) for r in all_rows), default=0) + widths = [0] * cols + for row in all_rows: + for i, cell in enumerate(row): + widths[i] = max(widths[i], len(cell)) + lines: list[str] = [] + if self.title: + lines.append(_strip(self.title)) + if self._headers: + lines.append(" ".join(h.ljust(widths[i]) for i, h in enumerate(self._headers))) + lines.extend(" ".join(c.ljust(widths[i]) for i, c in enumerate(row)) for row in self._rows) + return "\n".join(lines) + + +class Console: + """Drop-in for ``rich.console.Console`` covering ``print`` (with Table).""" + + def print(self, *objects: Any, **kwargs: Any) -> None: + click.echo(" ".join(o.render() if isinstance(o, Table) else _strip(o) for o in objects)) diff --git a/scripts/corpus_gap_scan.py b/scripts/corpus_gap_scan.py index 7e96097..d9fd635 100644 --- a/scripts/corpus_gap_scan.py +++ b/scripts/corpus_gap_scan.py @@ -30,8 +30,7 @@ from collections import Counter from pathlib import Path import click -from rich.console import Console -from rich.table import Table +from _plain_console import Console, Table from remove_ai_watermarks.identify import identify from remove_ai_watermarks.metadata import _png_late_metadata diff --git a/scripts/synthid_corpus.py b/scripts/synthid_corpus.py index dce4fb2..5ef26cd 100644 --- a/scripts/synthid_corpus.py +++ b/scripts/synthid_corpus.py @@ -26,9 +26,8 @@ from datetime import datetime, timezone from pathlib import Path import click +from _plain_console import Console, Table from PIL import Image -from rich.console import Console -from rich.table import Table from remove_ai_watermarks.noai.c2pa import extract_c2pa_info diff --git a/scripts/synthid_pixel_probe.py b/scripts/synthid_pixel_probe.py index 1cefa6f..a55b110 100644 --- a/scripts/synthid_pixel_probe.py +++ b/scripts/synthid_pixel_probe.py @@ -30,8 +30,8 @@ from typing import TYPE_CHECKING import click import numpy as np +from _plain_console import Console from PIL import Image -from rich.console import Console if TYPE_CHECKING: from numpy.typing import NDArray diff --git a/scripts/text_detection_benchmark.py b/scripts/text_detection_benchmark.py index c39c0c9..0af2c40 100644 --- a/scripts/text_detection_benchmark.py +++ b/scripts/text_detection_benchmark.py @@ -33,8 +33,8 @@ from typing import Any import cv2 import numpy as np +from _plain_console import Console from PIL import Image, ImageDraw, ImageFont -from rich.console import Console from remove_ai_watermarks import text_protector as tp