remove-ai-watermarks/pyproject.toml

[project]
name = "remove-ai-watermarks"
version = "0.8.9"
description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Multimedia :: Graphics",
    "Topic :: Scientific/Engineering :: Image Processing",
]
dependencies = [
    "pillow>=10.0.0",
    "piexif>=1.1.3",
    "numpy>=1.24.0",
    "opencv-python-headless>=4.8.0",
    "click>=8.0.0",
    "python-dotenv>=1.0.0",
]

[project.optional-dependencies]
gpu = [
    "torch>=2.0.0",
    # The default PyPI torch wheel is a CPU/CUDA build. To drive an Intel GPU
    # (Arc / Data Center) via ``--device xpu`` you need an XPU-enabled torch
    # from PyTorch's XPU wheel index (Linux/Windows only -- there is no macOS
    # XPU build). Install that build first, then this extra (torch is then
    # already satisfied and won't be re-pulled):
    #   pip install torch --index-url https://download.pytorch.org/whl/xpu
    #   pip install 'remove-ai-watermarks[gpu]'
    # uv users can target the ``pytorch-xpu`` index declared under [tool.uv]:
    #   uv pip install torch --index-url https://download.pytorch.org/whl/xpu
    "diffusers>=0.38.0",
    # diffusers 0.38's auto-pipeline registry imports ``Qwen3VLForConditional
    # Generation`` (its ``nucleusmoe_image`` pipeline), which only exists in
    # transformers 5.x -- so ``from diffusers import AutoPipelineForImage2Image``
    # fails on transformers 4.x. The real SDXL-loading break was NOT transformers
    # 5.x but the tokenizers *release candidate* (0.23.0rc0) that the global
    # ``prerelease = "allow"`` drags in: its CLIP tokenizer raises
    # ``RobertaProcessing.__new__() got an unexpected keyword argument 'cls'``.
    # Cap tokenizers to the stable 0.22 line (transformers 5.x accepts
    # >=0.22,<=0.23.0) so the rc is excluded while SDXL still loads.
    "transformers>=5,<6",
    "tokenizers>=0.22,<0.23",
    "accelerate>=0.25.0",
    "safetensors",
]
# Open invisible-watermark (imwatermark) decoder for detecting the DWT-DCT
# watermarks embedded by Stable Diffusion / SDXL / FLUX. Optional because it
# pulls non-headless opencv AND torch (invisible-watermark declares torch a hard
# dependency, and WatermarkDecoder eagerly imports rivaGan -> torch at import
# time, so the dwtDct-only detect path still needs torch present even though it
# never runs on GPU). So `detect` alone pulls torch -- no need to add `gpu` for
# detection. identify() guards the import and skips the signal when absent.
detect = [
    "invisible-watermark>=0.2.0",
]
# Adobe TrustMark decoder -- the open, keyless watermark behind Adobe Durable
# Content Credentials (soft-binding alg ``com.adobe.trustmark.P``). Optional
# because it pulls torch and downloads model weights on first use. identify()
# guards the import and skips the TrustMark signal when absent.
trustmark = [
    "trustmark>=0.8.0",
]
# Universal region eraser backend -- big-LaMa via onnxruntime (Carve/LaMa-ONNX,
# Apache-2.0). CPU, no torch. Model (~200 MB) is downloaded on first use and
# cached by huggingface_hub; it is never bundled in this repo. The default cv2
# eraser backend needs none of this.
lama = [
    "onnxruntime>=1.16.0",
    "huggingface-hub>=0.20.0",
]
# Optional GFPGAN face-restoration post-pass (commercial-safe Apache-2.0 GFPGAN +
# MIT RetinaFace). Re-synthesizes each face from a StyleGAN2 prior after the
# diffusion removal pass, so it restores identity while still scrubbing the pixel
# watermark. The GFPGANv1.4 weights + RetinaFace detector download on first use;
# they are never bundled. gfpgan/basicsr/facexlib are an OLD ecosystem and must
# stay on numpy < 2.0 to match the pinned gpu diffusion stack -- scipy is capped
# < 1.18 (>= 1.18 uses np.long, gone in numpy 1.24-1.26) and numba < 0.60 to keep
# the whole env on one numpy 1.26 resolution (same trap class as the removed
# faceid/insightface extra). Kept OUT of `all` (heavy + model download).
restore = [
    "gfpgan>=1.3.8",
    "facexlib>=0.3.0",
    "basicsr>=1.4.2",
    "scipy<1.18",
    "numba<0.60",
]
# Optional PhotoMaker-V2 face-identity restoration (commercial-safe end-to-end:
# PhotoMaker-V2 weights Apache-2.0 + OpenCLIP-ViT-H/14 MIT, NO InsightFace). Unlike
# the `restore` extra above (which runs GFPGAN on the watermarked ORIGINAL and was
# oracle-confirmed to re-introduce SynthID), PhotoMaker carries identity in a
# SEMANTIC EMBEDDING and generates fresh face pixels conditioned on it -- so the
# pixel watermark is not transported. Empirically validated 2026-06-04: the OpenCLIP
# embedding changes by cosine 0.002 under SynthID-magnitude pixel noise (an order of
# magnitude less than JPEG90 drift, which SynthID survives). See
# docs/synthid-robust-identity-research.md and
# src/remove_ai_watermarks/photomaker_restore.py. Weights (~3 GB SDXL + ~1 GB
# PhotoMaker-V2 adapter) download on first use; never bundled. Kept OUT of `all`
# (heavy + model download), same as `restore`/`esrgan`.
photomaker = [
    "photomaker @ git+https://github.com/TencentARC/PhotoMaker.git",
    "huggingface-hub>=0.20.0",
]
# Optional pre-diffusion super-resolution for small inputs (Real-ESRGAN). Loaded via
# spandrel (MIT) -- a pure model-loader with NO basicsr dependency (it pulls only
# torch / torchvision / safetensors / numpy / einops), which sidesteps the
# basicsr / torchvision.functional_tensor breakage that the `restore` extra fights.
# The Real-ESRGAN weights (BSD-3-Clause) download on first use and are cached; they
# are never bundled. CPU works but is slow on large inputs -- it is meant for the
# pre-diffusion upscale of SMALL inputs (and the GPU worker). Guarded by
# upscaler.is_available(); the default upscaler stays Lanczos (cv2, no deps). The
# weights are fetched with torch.hub (bundled with spandrel's torch), so no extra
# download dependency is needed.
esrgan = [
    "spandrel>=0.3.0",
]
dev = [
    "pytest>=8.0.0",
    "pytest-cov>=4.1.0",
    "ruff>=0.4.0",
    "pyright>=1.1.0",
    "invisible-watermark>=0.2.0",
]
all = ["remove-ai-watermarks[gpu,detect,trustmark,lama,dev]"]

# diffusers 0.38.0 (security fix for GHSA-98h9-4798-4q5v) declares a dependency
# on safetensors>=0.8.0rc0 — a pre-release. Allow pre-releases globally so the
# resolver can satisfy that. Drop once diffusers publishes a release with a
# stable safetensors pin (or once safetensors 0.8.0 stable is out).
[tool.uv]
prerelease = "allow"

# basicsr 1.4.2 (pulled by the `restore` GFPGAN extra) ships sdist-only and its
# setup.py get_version() reads basicsr/version.py in a way that newer setuptools
# (>= 69) breaks with ``KeyError: '__version__'`` under isolated PEP 517 builds.
# Pin an old setuptools as its build dependency so the sdist builds; this is
# scoped to basicsr and does not affect the rest of the resolution.
[tool.uv.extra-build-dependencies]
basicsr = ["setuptools<69"]

# PyTorch Intel-GPU (XPU) wheel index. ``explicit = true`` keeps it inert for
# the default CPU/CUDA install: uv consults it only when a torch install
# explicitly targets it (see the ``gpu`` extra comment), so it does not alter
# the locked CPU/CUDA resolution. Linux/Windows only -- no macOS XPU build.
[[tool.uv.index]]
name = "pytorch-xpu"
url = "https://download.pytorch.org/whl/xpu"
explicit = true

[project.scripts]
remove-ai-watermarks = "remove_ai_watermarks.cli:main"

[project.urls]
Repository = "https://github.com/wiltodelta/remove-ai-watermarks"

[build-system]
# Pin hatchling < 1.31. hatchling 1.30.0 made Metadata-Version 2.5 (PEP 794) the
# default, which the twine bundled in pypa/gh-action-pypi-publish@release/v1 rejects
# ("'2.5' is not a valid Metadata-Version"), failing the v0.8.3 PyPI upload
# (2026-06-01) when unpinned requires = ["hatchling"] pulled 1.30.0. hatchling 1.30.1
# reverted the default to 2.4 ("kept at 2.4 until more tools support 2.5"), and
# 1.27-1.29 were always 2.4 -- so < 1.31 keeps `uv build` on a 2.4-emitting hatchling
# (it resolves to the latest allowed, 1.30.1). The publish workflow now uses
# `uv publish`, whose uploader accepts 2.5, so this pin is belt-and-suspenders, not
# load-bearing: keeping it makes the first uv-publish release ship 2.4 metadata
# (isolating the uploader swap from the metadata-version bump). Drop to
# `requires = ["hatchling"]` once that release confirms the path.
requires = ["hatchling<1.31"]
build-backend = "hatchling.build"

# Allow the `photomaker` extra to reference its upstream git URL directly (the
# TencentARC/PhotoMaker package is not on PyPI). Apache-2.0; weights download on
# first use, so this only adds the Python wrapper.
[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["src/remove_ai_watermarks"]

[tool.hatch.build.targets.sdist]
# Keep the source distribution small: ship the package + metadata, not the
# committed test corpora / calibration captures under data/ (tens of MB --
# synthid_corpus images + the visible-mark captures), which pushed the 0.8.0
# sdist past PyPI's per-project file-size limit (the wheel ships only src/).
exclude = ["/data"]

[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["src"]
addopts = "-v --tb=short"

[tool.ruff]
target-version = "py310"
line-length = 120
exclude = ["_refs"]

[tool.ruff.lint]
select = ["E", "F", "B", "I", "S", "UP", "SIM", "RET", "COM", "C4", "G", "PT", "PIE", "T20", "DTZ", "ICN", "TCH", "RUF", "ANN"]
ignore = [
    "COM812",  # missing trailing comma (conflicts with ruff formatter)
    "ANN401",  # typing.Any — sometimes unavoidable with third-party libs
]

[tool.ruff.lint.per-file-ignores]
"tests/*.py" = ["ANN", "S101", "S105", "S106", "S108"]
"src/remove_ai_watermarks/noai/watermark_remover.py" = ["S603", "S606", "S607", "T201"]  # subprocess calls for auto-install/CUDA fix
"src/remove_ai_watermarks/noai/c2pa.py" = ["S110"]  # try-except-pass for corrupt file handling

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.pyright]
pythonVersion = "3.10"
typeCheckingMode = "strict"
exclude = ["_refs"]

[[tool.pyright.executionEnvironments]]
root = "tests"
extraPaths = ["."]
reportAttributeAccessIssue = false
reportOptionalSubscript = false
reportOptionalMemberAccess = false
reportArgumentType = false
reportUnknownMemberType = false
reportUnknownArgumentType = false
reportUnknownVariableType = false
reportMissingTypeArgument = false