fix(metadata): preserve upload format and quality on strip

remove_ai_metadata now writes JPEG at quality 95 with 4:4:4 (no chroma
subsampling) instead of the lossy PIL defaults (q75, 4:2:0), and preserves
WebP losslessly instead of silently rewriting it as PNG.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Victor Kuznetsov
2026-05-28 18:46:26 -07:00
parent 41e4365cd4
commit 9aaa53fe32
2 changed files with 50 additions and 0 deletions
+12
View File
@@ -695,8 +695,20 @@ def remove_ai_metadata(
save_kwargs: dict[str, Any] = {}
if fmt in (".jpg", ".jpeg"):
save_kwargs["format"] = "JPEG"
# JPEG output is unavoidably lossy, so minimize the loss: high quality
# and no chroma subsampling (4:4:4). Without these PIL defaults to
# quality 75 + 4:2:0, which visibly degrades a re-saved image.
save_kwargs["quality"] = 95
save_kwargs["subsampling"] = 0
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
elif fmt == ".webp":
# Preserve the WebP container losslessly instead of silently rewriting
# it as PNG (which changes the format and bloats the file).
save_kwargs["format"] = "WEBP"
save_kwargs["lossless"] = True
if img.mode == "P": # WebP cannot encode palette mode
img = img.convert("RGBA" if "transparency" in img.info else "RGB")
else:
save_kwargs["format"] = "PNG"
+38
View File
@@ -322,6 +322,44 @@ class TestRemoveAiMetadata:
assert result == jpg_path
assert jpg_path.exists()
def test_jpeg_output_is_high_quality(self, tmp_path):
"""JPEG output uses high quality + 4:4:4 (no chroma subsampling), not the
lossy PIL defaults (quality 75, 4:2:0) that visibly degrade the image."""
from PIL.JpegImagePlugin import get_sampling
img = Image.new("RGB", (64, 64), color=(100, 150, 200))
png_path = tmp_path / "source.png"
img.save(png_path)
jpg_path = tmp_path / "output.jpg"
remove_ai_metadata(png_path, jpg_path)
with Image.open(jpg_path) as out:
assert get_sampling(out) == 0 # 4:4:4, no chroma subsampling
# quality 95 quantization tables stay well below the q75 defaults
# (whose max quant value is ~40+); q95 tops out around 12.
assert max(max(t) for t in out.quantization.values()) <= 15
def test_webp_output_preserves_format_losslessly(self, tmp_path):
"""A .webp output keeps the WebP format (not silently rewritten to PNG)
and is pixel-identical to the source (lossless)."""
import numpy as np
rng = np.random.default_rng(0)
arr = rng.integers(0, 255, (48, 48, 3), dtype=np.uint8)
src = Image.fromarray(arr, "RGB")
pnginfo = PngInfo()
pnginfo.add_text("parameters", "ai stuff")
png_path = tmp_path / "source.png"
src.save(png_path, pnginfo=pnginfo)
webp_path = tmp_path / "output.webp"
remove_ai_metadata(png_path, webp_path)
with Image.open(webp_path) as out:
assert out.format == "WEBP"
assert np.array_equal(np.asarray(out.convert("RGB")), arr)
def test_creates_parent_directories(self, tmp_path):
img = Image.new("RGB", (32, 32))
pnginfo = PngInfo()