Files
remove-ai-watermarks/scripts/synthid_pixel_probe.py
T
test-user 7dcc922617 feat(probe): solid-fill SynthID carrier probe; corpus reconfirms no pixel detector
scripts/synthid_pixel_probe.py is an experimental/diagnostic tool for the
one pixel-domain question that isn't a dead-end: on solid-color fills the
zero-mean residual IS essentially the watermark carrier. Two modes:
'consistency' (mean pairwise NCC of carriers across fills vs random
baseline) and 'removal' (does the pipeline drop the carrier toward
baseline?). Logic validated synthetically (injected carrier correlates,
random noise doesn't, simulated removal collapses it) -- no real fills or
GPU needed.

Running its metric on the corpus independently re-confirms the documented
dead-end for real content: at matched resolution SynthID positives do not
cluster apart from negatives (within-Gemini 0.07; at 1024 px pos-vs-neg
>= pos-vs-pos). An apparent 0.62 among 1254px ChatGPT positives turned out
to be near-duplicate content (5 renders of one prompt at ~0.92; a distinct
ChatGPT image scored ~0 against them), not a shared carrier. The probe is
solid-fills-only; do not use on real content.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 16:35:39 -07:00

150 lines
6.1 KiB
Python

"""SynthID pixel-carrier probe -- EXPERIMENTAL / DIAGNOSTIC ONLY.
There is no local detector of the SynthID pixel watermark on real content: the
carrier drowns in scene texture (see CLAUDE.md, confirmed repeatedly). This
probe is meaningful ONLY on **solid-color fills**, where the per-pixel deviation
from the image mean is essentially the watermark carrier (almost all the
variance). It answers two controlled questions, neither of which is a
real-content detector:
consistency IMAGES...
Mean pairwise normalized cross-correlation (NCC) of the carriers across
independent solid fills from one model, vs a random baseline. Genuine
SynthID positives share a fixed carrier, so they correlate well above
random (the pilot saw ~0.92 on gpt-image black fills); clean fills don't.
removal --pos P... --cleaned C...
Build a carrier template from the positive fills, then compare how the
positives and the pipeline-cleaned fills correlate to it. If removal
worked, the cleaned correlation collapses toward the random baseline --
pixel-domain evidence that the pipeline destroys the carrier, not just the
C2PA metadata.
Do NOT run this on real-content images; the numbers are uninformative there.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
import click
import numpy as np
from PIL import Image
from rich.console import Console
if TYPE_CHECKING:
from numpy.typing import NDArray
log = logging.getLogger(__name__)
console = Console()
def load_gray(path: str) -> NDArray[np.float64]:
"""Load an image as a float64 grayscale array (mean of RGB channels)."""
with Image.open(path) as img:
return np.asarray(img.convert("RGB"), dtype=np.float64).mean(axis=2)
def carrier(gray: NDArray[np.float64]) -> NDArray[np.float64]:
"""Zero-mean, unit-norm residual of a solid-fill image -- its carrier.
Returns a flattened unit-norm vector for NCC comparison. A perfectly flat
image (std 0, e.g. a synthetic #000000 reference) has no carrier and yields
an all-zero vector, which correlates to 0 with everything.
"""
residual = gray - float(gray.mean())
norm = float(np.linalg.norm(residual))
if norm == 0.0:
return residual.ravel()
return (residual / norm).ravel()
def ncc(a: NDArray[np.float64], b: NDArray[np.float64]) -> float:
"""Normalized cross-correlation of two carriers (unit-norm zero-mean vectors)."""
if a.shape != b.shape or a.size == 0:
return 0.0
return float(np.dot(a, b))
def mean_pairwise_ncc(carriers: list[NDArray[np.float64]]) -> float:
"""Average NCC over all distinct carrier pairs; 0.0 if fewer than two."""
scores = [ncc(carriers[i], carriers[j]) for i in range(len(carriers)) for j in range(i + 1, len(carriers))]
return float(np.mean(scores)) if scores else 0.0
def template(carriers: list[NDArray[np.float64]]) -> NDArray[np.float64]:
"""Average carrier, renormalized to unit norm (the shared-pattern estimate)."""
avg = np.mean(carriers, axis=0)
norm = float(np.linalg.norm(avg))
return avg / norm if norm else avg
def random_baseline(shape: tuple[int, ...], n: int, *, seed: int = 0) -> float:
"""Mean pairwise NCC of ``n`` random-noise carriers of ``shape`` (~0)."""
rng = np.random.default_rng(seed)
noise = [carrier(rng.standard_normal(shape)) for _ in range(max(n, 2))]
return mean_pairwise_ncc(noise)
def _load_carriers(paths: tuple[str, ...]) -> list[NDArray[np.float64]]:
"""Load carriers for same-shaped images; warn and skip mismatched shapes."""
grays = [(p, load_gray(p)) for p in paths]
shape = grays[0][1].shape
carriers: list[NDArray[np.float64]] = []
for p, g in grays:
if g.shape != shape:
console.print(f" [yellow]skip[/] {p}: shape {g.shape} != {shape}")
continue
carriers.append(carrier(g))
return carriers
@click.group()
def cli() -> None:
"""SynthID pixel-carrier probe (solid-color fills only)."""
@cli.command()
@click.argument("images", nargs=-1, required=True, type=click.Path(exists=True))
def consistency(images: tuple[str, ...]) -> None:
"""Mean pairwise carrier NCC across solid fills, vs the random baseline."""
carriers = _load_carriers(images)
if len(carriers) < 2:
console.print("[red]Need at least two same-shaped images.[/]")
raise SystemExit(1)
observed = mean_pairwise_ncc(carriers)
baseline = random_baseline(carriers[0].shape, len(carriers))
console.print(f" carriers: {len(carriers)}")
console.print(f" mean pairwise NCC: [bold]{observed:.3f}[/]")
console.print(f" random baseline: {baseline:.3f}")
verdict = "shared carrier present" if observed > 0.3 else "no shared carrier (within noise)"
console.print(f" verdict: [bold]{verdict}[/]")
@cli.command()
@click.option("--pos", "pos", multiple=True, required=True, type=click.Path(exists=True), help="Positive solid fills.")
@click.option(
"--cleaned", "cleaned", multiple=True, required=True, type=click.Path(exists=True), help="Pipeline-cleaned fills."
)
def removal(pos: tuple[str, ...], cleaned: tuple[str, ...]) -> None:
"""Does the pipeline drop the carrier correlation toward the random baseline?"""
pos_carriers = _load_carriers(pos)
cleaned_carriers = _load_carriers(cleaned)
if not pos_carriers or not cleaned_carriers:
console.print("[red]Need at least one positive and one cleaned fill of matching shape.[/]")
raise SystemExit(1)
tmpl = template(pos_carriers)
pos_corr = float(np.mean([ncc(c, tmpl) for c in pos_carriers]))
cleaned_corr = float(np.mean([ncc(c, tmpl) for c in cleaned_carriers]))
baseline = random_baseline(tmpl.shape, max(len(cleaned_carriers), 2))
console.print(f" positive->template NCC: [bold]{pos_corr:.3f}[/]")
console.print(f" cleaned->template NCC: [bold]{cleaned_corr:.3f}[/]")
console.print(f" random baseline: {baseline:.3f}")
effective = cleaned_corr < pos_corr / 2
console.print(f" verdict: [bold]{'carrier attenuated' if effective else 'carrier survives'}[/]")
if __name__ == "__main__":
cli()