From 9aaa53fe3221df04a9332f570bb843f0461409ff Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Thu, 28 May 2026 18:46:26 -0700 Subject: [PATCH] fix(metadata): preserve upload format and quality on strip remove_ai_metadata now writes JPEG at quality 95 with 4:4:4 (no chroma subsampling) instead of the lossy PIL defaults (q75, 4:2:0), and preserves WebP losslessly instead of silently rewriting it as PNG. Co-Authored-By: Claude Opus 4.7 --- src/remove_ai_watermarks/metadata.py | 12 +++++++++ tests/test_metadata.py | 38 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/remove_ai_watermarks/metadata.py b/src/remove_ai_watermarks/metadata.py index 4ea3c1c..9f678c3 100644 --- a/src/remove_ai_watermarks/metadata.py +++ b/src/remove_ai_watermarks/metadata.py @@ -695,8 +695,20 @@ def remove_ai_metadata( save_kwargs: dict[str, Any] = {} if fmt in (".jpg", ".jpeg"): save_kwargs["format"] = "JPEG" + # JPEG output is unavoidably lossy, so minimize the loss: high quality + # and no chroma subsampling (4:4:4). Without these PIL defaults to + # quality 75 + 4:2:0, which visibly degrades a re-saved image. + save_kwargs["quality"] = 95 + save_kwargs["subsampling"] = 0 if img.mode in ("RGBA", "P"): img = img.convert("RGB") + elif fmt == ".webp": + # Preserve the WebP container losslessly instead of silently rewriting + # it as PNG (which changes the format and bloats the file). + save_kwargs["format"] = "WEBP" + save_kwargs["lossless"] = True + if img.mode == "P": # WebP cannot encode palette mode + img = img.convert("RGBA" if "transparency" in img.info else "RGB") else: save_kwargs["format"] = "PNG" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index b93fe0a..3e3c8b7 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -322,6 +322,44 @@ class TestRemoveAiMetadata: assert result == jpg_path assert jpg_path.exists() + def test_jpeg_output_is_high_quality(self, tmp_path): + """JPEG output uses high quality + 4:4:4 (no chroma subsampling), not the + lossy PIL defaults (quality 75, 4:2:0) that visibly degrade the image.""" + from PIL.JpegImagePlugin import get_sampling + + img = Image.new("RGB", (64, 64), color=(100, 150, 200)) + png_path = tmp_path / "source.png" + img.save(png_path) + + jpg_path = tmp_path / "output.jpg" + remove_ai_metadata(png_path, jpg_path) + + with Image.open(jpg_path) as out: + assert get_sampling(out) == 0 # 4:4:4, no chroma subsampling + # quality 95 quantization tables stay well below the q75 defaults + # (whose max quant value is ~40+); q95 tops out around 12. + assert max(max(t) for t in out.quantization.values()) <= 15 + + def test_webp_output_preserves_format_losslessly(self, tmp_path): + """A .webp output keeps the WebP format (not silently rewritten to PNG) + and is pixel-identical to the source (lossless).""" + import numpy as np + + rng = np.random.default_rng(0) + arr = rng.integers(0, 255, (48, 48, 3), dtype=np.uint8) + src = Image.fromarray(arr, "RGB") + pnginfo = PngInfo() + pnginfo.add_text("parameters", "ai stuff") + png_path = tmp_path / "source.png" + src.save(png_path, pnginfo=pnginfo) + + webp_path = tmp_path / "output.webp" + remove_ai_metadata(png_path, webp_path) + + with Image.open(webp_path) as out: + assert out.format == "WEBP" + assert np.array_equal(np.asarray(out.convert("RGB")), arr) + def test_creates_parent_directories(self, tmp_path): img = Image.new("RGB", (32, 32)) pnginfo = PngInfo()