Files
reverse-SynthID/scripts/dissolve_batch.py
T
Alosh Denny 083a5eec6a feat(scripts): add V4 codebook build, batch dissolve, and calibration scripts
build_codebook_v4.py  — builds SpectralCodebookV4 from the hierarchical
  reverse-synthid-dataset (model × color × resolution).
dissolve_batch.py     — runs all bypass presets (gentle … nuke) over an
  input directory. Supports Round-06 'final' and 'nuke' strengths.
calibrate_from_feedback.py — updates carrier_weights from detection
  feedback, closing the human-in-the-loop calibration loop.

Made-with: Cursor
2026-04-24 02:08:56 +05:30

256 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Phase-2 driver for the reverse-SynthID V4 manual-validation loop.
Takes an input folder of watermarked images and emits one or more strength
variants per image (``A``, ``B``, ``C``, ... by default). Writes a
``manifest.csv`` that pairs each variant with:
- source image path
- output path
- strength preset
- profile key used
- PSNR / SSIM achieved
You then paste the variants into the Gemini app, run SynthID detection, and
fill in a small ``tally.csv`` (columns: ``source,variant,still_watermarked``,
values ``y/n``). Feed both files into ``calibrate_from_feedback.py`` to
update the codebook's per-carrier weights and iterate.
Usage::
python scripts/dissolve_batch.py \\
--input /path/to/input_images \\
--output /path/to/out_dir \\
--codebook artifacts/spectral_codebook_v4.npz \\
--model nano-banana-pro-preview \\
--strengths gentle moderate aggressive
Strengths map to filesystem-safe single-letter variants (A,B,C,D) in
manifest order, which makes the tally CSV trivial to fill by hand.
"""
from __future__ import annotations
import argparse
import csv
import glob
import os
import sys
import time
from typing import List, Optional
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(REPO_ROOT, "src", "extraction"))
import cv2 # noqa: E402
import numpy as np # noqa: E402
from synthid_bypass_v4 import SpectralCodebookV4, SynthIDBypassV4 # noqa: E402
IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp")
DEFAULT_STRENGTHS = ("gentle", "moderate", "aggressive")
VARIANT_LETTERS = "ABCDEFGH"
def iter_input_images(input_path: str) -> List[str]:
"""Resolve ``--input`` (file, directory, or glob) to a sorted list."""
if os.path.isdir(input_path):
out: List[str] = []
for ext in IMAGE_EXTS:
out.extend(glob.glob(os.path.join(input_path, f"*{ext}")))
out.extend(glob.glob(os.path.join(input_path, f"*{ext.upper()}")))
return sorted(set(out))
if os.path.isfile(input_path):
return [input_path]
# Treat as a glob pattern.
return sorted(glob.glob(input_path))
def dissolve_one(
bypass: SynthIDBypassV4,
codebook: SpectralCodebookV4,
src: str,
out_dir: str,
variant_letter: str,
strength: str,
model: Optional[str],
) -> dict:
"""Dissolve one image at one strength; return a manifest row."""
base = os.path.splitext(os.path.basename(src))[0]
out_name = f"{base}__{variant_letter}_{strength}.png"
out_path = os.path.join(out_dir, out_name)
t0 = time.time()
try:
result = bypass.bypass_v4_file(
src, out_path, codebook,
strength=strength, model=model, verify=False,
)
row = {
"source": os.path.abspath(src),
"variant": variant_letter,
"strength": strength,
"output": os.path.abspath(out_path),
"profile_key": result.details["profile_key"],
"exact_match": int(bool(result.details["exact_match"])),
"psnr": round(result.psnr, 3),
"ssim": round(result.ssim, 5),
"n_passes_applied": result.details["n_passes_applied"],
"n_passes_rolled_back": result.details["n_passes_rolled_back"],
"elapsed_sec": round(time.time() - t0, 3),
"still_watermarked": "", # filled by you during validation
"notes": "",
}
except Exception as e:
row = {
"source": os.path.abspath(src),
"variant": variant_letter,
"strength": strength,
"output": "",
"profile_key": "",
"exact_match": 0,
"psnr": "",
"ssim": "",
"n_passes_applied": 0,
"n_passes_rolled_back": 0,
"elapsed_sec": round(time.time() - t0, 3),
"still_watermarked": "",
"notes": f"ERROR: {e}",
}
return row
def run(
input_path: str,
out_dir: str,
codebook_path: str,
strengths: List[str],
model: Optional[str] = None,
limit: Optional[int] = None,
manifest_name: str = "manifest.csv",
) -> str:
sources = iter_input_images(input_path)
if limit is not None:
sources = sources[:limit]
if not sources:
print(f"No images found in {input_path}")
sys.exit(2)
os.makedirs(out_dir, exist_ok=True)
codebook = SpectralCodebookV4()
codebook.load(codebook_path)
if model is not None and model not in codebook.models:
print(f"WARNING: --model {model} not found in codebook. "
f"Available: {codebook.models}. Proceeding anyway "
"(best-effort fallback across models).")
bypass = SynthIDBypassV4()
if len(strengths) > len(VARIANT_LETTERS):
raise ValueError(
f"Too many strengths ({len(strengths)}); "
f"max supported: {len(VARIANT_LETTERS)}"
)
letters = list(VARIANT_LETTERS[:len(strengths)])
manifest_path = os.path.join(out_dir, manifest_name)
fieldnames = [
"source", "variant", "strength", "output", "profile_key",
"exact_match", "psnr", "ssim",
"n_passes_applied", "n_passes_rolled_back",
"elapsed_sec", "still_watermarked", "notes",
]
print(f"Dissolving {len(sources)} image(s) × {len(strengths)} variant(s) "
f"{out_dir}")
if model:
print(f"Model hint: {model}")
rows = []
for i, src in enumerate(sources):
print(f"[{i + 1}/{len(sources)}] {os.path.basename(src)}")
for letter, strength in zip(letters, strengths):
row = dissolve_one(
bypass=bypass,
codebook=codebook,
src=src,
out_dir=out_dir,
variant_letter=letter,
strength=strength,
model=model,
)
rows.append(row)
if row["notes"].startswith("ERROR"):
print(f" {letter}/{strength:12s} {row['notes']}")
else:
print(f" {letter}/{strength:12s} "
f"psnr={row['psnr']:>6} ssim={row['ssim']:>7} "
f"profile={row['profile_key']} "
f"exact={row['exact_match']}")
with open(manifest_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
print(f"\nManifest: {manifest_path}")
print("\nNext steps:")
print(" 1. Upload each ABS-path output to the Gemini app and run "
"SynthID detection.")
print(" 2. For each row, fill the `still_watermarked` column with "
"`y` or `n` (leave blank to skip).")
print(f" 3. Save the filled file as tally.csv and run:")
print(f" python scripts/calibrate_from_feedback.py "
f"--manifest {manifest_path} --tally <your_tally.csv> "
f"--codebook {codebook_path}")
return manifest_path
def main() -> None:
p = argparse.ArgumentParser(
description="Emit bypass variants for manual Gemini validation.",
)
p.add_argument("--input", required=True,
help="Path to an image, a directory, or a glob pattern.")
p.add_argument("--output", required=True,
help="Directory to write variants and manifest.csv into.")
p.add_argument("--codebook", required=True,
help="Path to the V4 codebook .npz.")
p.add_argument("--strengths", nargs="+", default=list(DEFAULT_STRENGTHS),
choices=["gentle", "moderate", "aggressive", "maximum",
"demolish", "annihilate", "combo",
"blog_pure", "blog_plus", "blog_combo",
"residual_pure", "residual_plus", "residual_combo",
"regen_pure", "regen_plus", "regen_combo",
"final", "nuke"],
help=f"Strengths to emit (default: {DEFAULT_STRENGTHS}).")
p.add_argument("--model", default=None,
help=(
"Optional model hint (e.g. nano-banana-pro-preview). "
"Omit to let the codebook auto-select by resolution."
))
p.add_argument("--limit", type=int, default=None,
help="Stop after this many input images (for quick tests).")
p.add_argument("--manifest-name", default="manifest.csv",
help="Manifest filename inside --output (default: manifest.csv).")
args = p.parse_args()
run(
input_path=args.input,
out_dir=args.output,
codebook_path=args.codebook,
strengths=args.strengths,
model=args.model,
limit=args.limit,
manifest_name=args.manifest_name,
)
if __name__ == "__main__":
main()