mirror of
https://github.com/wiltodelta/remove-ai-watermarks.git
synced 2026-06-05 02:28:00 +02:00
31f0a82906
Provenance detection no longer relies on a fixed first-MB read. In a streaming / non-faststart MP4 the C2PA manifest sits AFTER a multi-megabyte mdat, beyond the 1 MB scan window, so it was missed. - isobmff.scan_c2pa_region(path): a file-seeking top-level box walker that returns the payloads of uuid/jumb (provenance) boxes, seeking past mdat by size without reading it -- works on multi-GB files. Returns b"" for non-ISOBMFF or on read error. Mirrors the box-size encoding of the existing in-memory _iter_top_level_boxes (largesize / size==0). - metadata.scan_head(path, size): the shared input for every C2PA/AIGC/IPTC byte scan -- first __TEXT __DATA __OBJC others dec hex bytes plus, for ISOBMFF, the late provenance-box payloads. Behavior-neutral (f.read(size)) for non-ISOBMFF inputs. - Routed all six metadata scan sites (has_ai_metadata, aigc_label, iptc_ai_system, synthid_source, exif_generator XMP, get_ai_metadata soft-binding) and identify's head read through scan_head. 6 new tests: late box found by scan_c2pa_region / scan_head, the fixed window provably misses it, non-ISOBMFF -> b"", front-placed (faststart) regression. The remaining gap stays documented: EXIF/XMP stored as items inside the meta box (AVIF/HEIF stills) still needs meta-box surgery or exiftool. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
126 lines
4.1 KiB
TOML
126 lines
4.1 KiB
TOML
[project]
|
|
name = "remove-ai-watermarks"
|
|
version = "0.6.8"
|
|
description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10"
|
|
license = {text = "MIT"}
|
|
dependencies = [
|
|
"pillow>=10.0.0",
|
|
"piexif>=1.1.3",
|
|
"numpy>=1.24.0",
|
|
"opencv-python-headless>=4.8.0",
|
|
"click>=8.0.0",
|
|
"rich>=13.0.0",
|
|
"python-dotenv>=1.0.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
gpu = [
|
|
"torch>=2.0.0",
|
|
"diffusers>=0.38.0",
|
|
"transformers>=4.35.0",
|
|
"accelerate>=0.25.0",
|
|
"controlnet-aux>=0.0.9",
|
|
"safetensors",
|
|
"ultralytics>=8.0.0",
|
|
"color-matcher>=0.5.0",
|
|
]
|
|
# Open invisible-watermark (imwatermark) decoder for detecting the DWT-DCT
|
|
# watermarks embedded by Stable Diffusion / SDXL / FLUX. Optional because it
|
|
# pulls non-headless opencv AND torch (invisible-watermark declares torch a hard
|
|
# dependency, and WatermarkDecoder eagerly imports rivaGan -> torch at import
|
|
# time, so the dwtDct-only detect path still needs torch present even though it
|
|
# never runs on GPU). So `detect` alone pulls torch -- no need to add `gpu` for
|
|
# detection. identify() guards the import and skips the signal when absent.
|
|
detect = [
|
|
"invisible-watermark>=0.2.0",
|
|
]
|
|
# Adobe TrustMark decoder -- the open, keyless watermark behind Adobe Durable
|
|
# Content Credentials (soft-binding alg ``com.adobe.trustmark.P``). Optional
|
|
# because it pulls torch and downloads model weights on first use. identify()
|
|
# guards the import and skips the TrustMark signal when absent.
|
|
trustmark = [
|
|
"trustmark>=0.8.0",
|
|
]
|
|
# Universal region eraser backend -- big-LaMa via onnxruntime (Carve/LaMa-ONNX,
|
|
# Apache-2.0). CPU, no torch. Model (~200 MB) is downloaded on first use and
|
|
# cached by huggingface_hub; it is never bundled in this repo. The default cv2
|
|
# eraser backend needs none of this.
|
|
lama = [
|
|
"onnxruntime>=1.16.0",
|
|
"huggingface-hub>=0.20.0",
|
|
]
|
|
dev = [
|
|
"pytest>=8.0.0",
|
|
"pytest-cov>=4.1.0",
|
|
"ruff>=0.4.0",
|
|
"pyright>=1.1.0",
|
|
"invisible-watermark>=0.2.0",
|
|
]
|
|
all = ["remove-ai-watermarks[gpu,detect,trustmark,lama,dev]"]
|
|
|
|
# diffusers 0.38.0 (security fix for GHSA-98h9-4798-4q5v) declares a dependency
|
|
# on safetensors>=0.8.0rc0 — a pre-release. Allow pre-releases globally so the
|
|
# resolver can satisfy that. Drop once diffusers publishes a release with a
|
|
# stable safetensors pin (or once safetensors 0.8.0 stable is out).
|
|
[tool.uv]
|
|
prerelease = "allow"
|
|
|
|
[project.scripts]
|
|
remove-ai-watermarks = "remove_ai_watermarks.cli:main"
|
|
|
|
[project.urls]
|
|
Repository = "https://github.com/wiltodelta/remove-ai-watermarks"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/remove_ai_watermarks"]
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
pythonpath = ["src"]
|
|
addopts = "-v --tb=short"
|
|
|
|
[tool.ruff]
|
|
target-version = "py310"
|
|
line-length = 120
|
|
exclude = ["_refs"]
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E", "F", "B", "I", "S", "UP", "SIM", "RET", "COM", "C4", "G", "PT", "PIE", "T20", "DTZ", "ICN", "TCH", "RUF", "ANN"]
|
|
ignore = [
|
|
"COM812", # missing trailing comma (conflicts with ruff formatter)
|
|
"ANN401", # typing.Any — sometimes unavoidable with third-party libs
|
|
]
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"tests/*.py" = ["ANN", "S101", "S105", "S106", "S108"]
|
|
"src/remove_ai_watermarks/noai/watermark_remover.py" = ["S603", "S606", "S607", "T201"] # subprocess calls for auto-install/CUDA fix
|
|
"src/remove_ai_watermarks/noai/c2pa.py" = ["S110"] # try-except-pass for corrupt file handling
|
|
"src/remove_ai_watermarks/noai/ctrlregen/engine.py" = ["S101", "S603"] # assert for loaded state, subprocess for auto-install
|
|
|
|
[tool.ruff.format]
|
|
quote-style = "double"
|
|
indent-style = "space"
|
|
|
|
[tool.pyright]
|
|
pythonVersion = "3.10"
|
|
typeCheckingMode = "strict"
|
|
exclude = ["_refs"]
|
|
|
|
[[tool.pyright.executionEnvironments]]
|
|
root = "tests"
|
|
extraPaths = ["."]
|
|
reportAttributeAccessIssue = false
|
|
reportOptionalSubscript = false
|
|
reportOptionalMemberAccess = false
|
|
reportArgumentType = false
|
|
reportUnknownMemberType = false
|
|
reportUnknownArgumentType = false
|
|
reportUnknownVariableType = false
|
|
reportMissingTypeArgument = false
|