fix(io): Unicode-safe cv2 image IO + un-eat the [gpu] install hint (v0.6.6)

Two CLI/IO robustness bugs surfaced by issues #17 and #19. #17 -- non-ASCII image paths (Chinese/Cyrillic/accented) failed on Windows: cv2.imread/imwrite use the platform ANSI code-page API, so the decode came back empty with a "can't open/read file" warning. New image_io.imread/imwrite route through np.fromfile+cv2.imdecode / cv2.imencode+tofile (Unicode-safe, byte- identical output, cv2.imread None-semantics preserved); all 8 cv2 read/write call sites now go through it. Behavior-neutral on macOS/Linux (already accept UTF-8 paths), so the fix is correct-by-construction for the Windows-only bug. #19 (incidental) -- rich parsed the "[gpu]" in the GPU-extra install hint as a style tag and dropped it, so the printed command was the un-installable "pip install 'remove-ai-watermarks'". Escaped as \[gpu] at both call sites. Tests: test_image_io.py (non-ASCII round-trip, alpha, missing/empty/garbage semantics); test_cli.py::TestGpuHintMarkup (install hint keeps the extra). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-24 00:20:51 +02:00 · 2026-05-27 11:52:48 -07:00
parent d847b39292
commit 7b47fa9f6a
12 changed files with 189 additions and 17 deletions
@@ -1,6 +1,6 @@
 [project]
 name = "remove-ai-watermarks"
-version = "0.6.5"
+version = "0.6.6"
 description = "Remove visible and invisible AI watermarks from images (Gemini / Nano Banana, ChatGPT, Stable Diffusion)"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -1,3 +1,3 @@
 """Remove-AI-Watermarks: Unified tool for removing visible and invisible AI watermarks."""

-__version__ = "0.6.5"
+__version__ = "0.6.6"
@@ -85,7 +85,9 @@ def _read_bgr_and_alpha(path: Path) -> tuple[np.ndarray | None, np.ndarray | Non
    """
    import cv2

-    image = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
+    from remove_ai_watermarks import image_io
+
+    image = image_io.imread(path, cv2.IMREAD_UNCHANGED)
    if image is None:
        return None, None
    if image.ndim == 2:
@@ -109,11 +111,12 @@ def _write_bgr_with_alpha(
    forced to 0 inside that bbox (expanded by ``pad`` px) so the watermark area
    becomes fully transparent in the saved file.
    """
-    import cv2
    import numpy as np

+    from remove_ai_watermarks import image_io
+
    if alpha is None or path.suffix.lower() not in _ALPHA_FORMATS:
-        cv2.imwrite(str(path), bgr)
+        image_io.imwrite(path, bgr)
        return

    alpha_out = alpha
@@ -127,7 +130,7 @@ def _write_bgr_with_alpha(
            alpha_out[y0:y1, x0:x1] = 0

    bgra = np.dstack([bgr, alpha_out])
-    cv2.imwrite(str(path), bgra)
+    image_io.imwrite(path, bgra)


 def _run_doubao_if_selected(
@@ -481,7 +484,7 @@ def cmd_invisible(
    if not invisible_available():
        console.print(
            "[red]Error:[/] GPU dependencies not installed.\n"
-            "  Install them with: [bold]pip install 'remove-ai-watermarks[gpu]'[/]"
+            "  Install them with: [bold]pip install 'remove-ai-watermarks\\[gpu]'[/]"
        )
        raise SystemExit(1)

@@ -744,7 +747,7 @@ def cmd_all(
        if not invisible_available():
            console.print(
                "    [yellow]⚠[/] Skipped — GPU dependencies not installed.\n"
-                "    Install them with: [bold]pip install 'remove-ai-watermarks[gpu]'[/]"
+                "    Install them with: [bold]pip install 'remove-ai-watermarks\\[gpu]'[/]"
            )
        else:
            from remove_ai_watermarks.invisible_engine import InvisibleEngine
@@ -239,7 +239,9 @@ class DoubaoEngine:

 def load_image_bgr(path: str | Path) -> NDArray:
    """Read an image as BGR ndarray (helper for scripts/tests)."""
-    img = cv2.imread(str(path), cv2.IMREAD_COLOR)
+    from remove_ai_watermarks import image_io
+
+    img = image_io.imread(path, cv2.IMREAD_COLOR)
    if img is None:
        raise FileNotFoundError(f"Failed to read image: {path}")
    return img
@@ -556,7 +556,9 @@ def detect_sparkle_confidence(image_path: Path) -> float | None:
    (cv2 returns None for unsupported containers such as HEIC). Kept here so the
    cv2 dependency stays in this module; callers apply their own threshold.
    """
-    img = cv2.imread(str(image_path))
+    from remove_ai_watermarks import image_io
+
+    img = image_io.imread(image_path)
    if img is None:
        return None
    return float(GeminiEngine().detect_watermark(img).confidence)
@@ -0,0 +1,66 @@
+"""Unicode-safe cv2 image IO (issue #17).
+
+``cv2.imread`` / ``cv2.imwrite`` pass the path to the platform C runtime, which
+on Windows uses the narrow (ANSI) code-page API and therefore fails on paths
+containing non-ASCII characters (Chinese, Cyrillic, ...). The symptom is a
+``can't open/read file`` warning and a ``None`` decode even though the file
+exists.
+
+These wrappers route through numpy buffers instead: ``np.fromfile`` /
+``ndarray.tofile`` open the path in Python (full Unicode), and
+``cv2.imdecode`` / ``cv2.imencode`` do the codec work. The decoded/encoded
+bytes are byte-for-byte identical to ``imread`` / ``imwrite``. On macOS/Linux
+cv2 already accepts UTF-8 paths, so the wrappers are behavior-neutral there.
+
+cv2/numpy are imported lazily inside the functions so importing this module
+stays cheap in a bare environment (matching the rest of the package).
+"""
+
+# cv2 ships no type stubs; mirror the pragma used by the other cv2-using modules.
+# pyright: reportMissingTypeStubs=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
+
+
+def imread(path: str | Path, flags: int | None = None) -> NDArray[Any] | None:
+    """Unicode-safe ``cv2.imread``.
+
+    ``flags`` defaults to ``cv2.IMREAD_COLOR`` (same as ``cv2.imread``). Returns
+    ``None`` when the file is missing or cannot be decoded, matching
+    ``cv2.imread`` semantics so existing ``if img is None`` checks keep working.
+    """
+    import cv2
+    import numpy as np
+
+    if flags is None:
+        flags = cv2.IMREAD_COLOR
+    try:
+        data = np.fromfile(str(path), dtype=np.uint8)
+    except OSError:
+        return None
+    if data.size == 0:
+        return None
+    return cv2.imdecode(data, flags)
+
+
+def imwrite(path: str | Path, img: NDArray[Any]) -> bool:
+    """Unicode-safe ``cv2.imwrite``.
+
+    The output format is taken from the path extension (e.g. ``.png``), exactly
+    like ``cv2.imwrite``. Returns ``True`` on success, ``False`` if the codec
+    rejects the image.
+    """
+    import cv2
+
+    ext = Path(path).suffix or ".png"
+    ok, buf = cv2.imencode(ext, img)
+    if not ok:
+        return False
+    buf.tofile(str(path))
+    return True
@@ -217,7 +217,9 @@ class InvisibleEngine:
                import cv2
                import numpy as np

-                out_cv = cv2.imread(str(out_path), cv2.IMREAD_COLOR)
+                from remove_ai_watermarks import image_io
+
+                out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)

                if protect_faces and original_faces:
                    if self._progress_callback:
@@ -243,20 +245,22 @@ class InvisibleEngine:
                    # Using INTER_LANCZOS4 for high-quality upscaling back to original
                    out_cv = cv2.resize(out_cv, orig_size, interpolation=cv2.INTER_LANCZOS4)

-                cv2.imwrite(str(out_path), out_cv)
+                image_io.imwrite(out_path, out_cv)

            else:
                # Even if no protect_faces or humanize, we must restore original size if needed
                import cv2

-                out_cv = cv2.imread(str(out_path), cv2.IMREAD_COLOR)
+                from remove_ai_watermarks import image_io
+
+                out_cv = image_io.imread(out_path, cv2.IMREAD_COLOR)
                if out_cv is not None and (out_cv.shape[1], out_cv.shape[0]) != orig_size:
                    if self._progress_callback:
                        self._progress_callback(
                            f"Upscaling result back to original resolution {orig_size[0]}x{orig_size[1]}..."
                        )
                    out_cv = cv2.resize(out_cv, orig_size, interpolation=cv2.INTER_LANCZOS4)
-                    cv2.imwrite(str(out_path), out_cv)
+                    image_io.imwrite(out_path, out_cv)

            return out_path
        finally:
@@ -78,10 +78,11 @@ def detect_invisible_watermark(image_path: Path) -> str | None:
    """
    if not is_available():
        return None
-    import cv2
    from imwatermark import WatermarkDecoder

-    img = cv2.imread(str(image_path))
+    from remove_ai_watermarks import image_io
+
+    img = image_io.imread(image_path)
    if img is None:
        return None

@@ -509,3 +509,21 @@ class TestBatchCommand:
        assert result.exit_code == 0
        expected_dir = tmp_path / "input_clean"
        assert expected_dir.exists()
+
+
+class TestGpuHintMarkup:
+    """The GPU-extra install hint must survive rich markup (the ``[gpu]`` token
+    is otherwise parsed as a style tag and silently dropped)."""
+
+    def test_invisible_install_hint_keeps_gpu_extra(self, runner, sample_png):
+        with patch("remove_ai_watermarks.invisible_engine.is_available", return_value=False):
+            result = runner.invoke(main, ["invisible", str(sample_png)])
+        assert result.exit_code != 0
+        assert "remove-ai-watermarks[gpu]" in result.output
+
+    def test_all_install_hint_keeps_gpu_extra(self, runner, sample_png):
+        # The `all` pipeline skips the invisible step with a warning that carries
+        # the same hint; it must keep the [gpu] extra too.
+        with patch("remove_ai_watermarks.invisible_engine.is_available", return_value=False):
+            result = runner.invoke(main, ["all", str(sample_png)])
+        assert "remove-ai-watermarks[gpu]" in result.output
@@ -0,0 +1,74 @@
+"""Tests for Unicode-safe cv2 image IO (issue #17)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import cv2
+import numpy as np
+
+from remove_ai_watermarks import image_io
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+# Non-ASCII filenames that break cv2.imread/imwrite on Windows (issue #17).
+_UNICODE_NAMES = [
+    "jimeng-2026-05-27-一面白色的墙.png",  # Chinese
+    "тест-изображение.png",  # Cyrillic
+    "café-señor.png",  # accented Latin
+]
+
+
+def _make_bgr() -> np.ndarray:
+    img = np.zeros((8, 8, 3), dtype=np.uint8)
+    img[2:6, 2:6] = (10, 120, 240)  # a BGR block so the round-trip is checkable
+    return img
+
+
+class TestUnicodeRoundTrip:
+    def test_write_then_read_preserves_pixels(self, tmp_path: Path) -> None:
+        for name in _UNICODE_NAMES:
+            path = tmp_path / name
+            src = _make_bgr()
+            assert image_io.imwrite(path, src) is True
+            assert path.exists()
+            out = image_io.imread(path)
+            assert out is not None
+            # PNG is lossless: pixels must match exactly.
+            assert np.array_equal(out, src)
+
+    def test_alpha_round_trip_with_unchanged_flag(self, tmp_path: Path) -> None:
+        path = tmp_path / "豆包-alpha.png"
+        bgra = np.zeros((8, 8, 4), dtype=np.uint8)
+        bgra[..., 3] = 128
+        assert image_io.imwrite(path, bgra) is True
+        out = image_io.imread(path, cv2.IMREAD_UNCHANGED)
+        assert out is not None
+        assert out.shape[2] == 4
+        assert np.array_equal(out, bgra)
+
+    def test_reads_file_written_by_raw_cv2(self, tmp_path: Path) -> None:
+        # An ASCII file written by plain cv2 must read back identically through
+        # the wrapper (decode path is byte-compatible with cv2.imread).
+        path = tmp_path / "ascii.png"
+        src = _make_bgr()
+        cv2.imwrite(str(path), src)
+        out = image_io.imread(path)
+        assert out is not None
+        assert np.array_equal(out, src)
+
+
+class TestFailureSemantics:
+    def test_missing_file_returns_none(self, tmp_path: Path) -> None:
+        assert image_io.imread(tmp_path / "does-not-exist-不存在.png") is None
+
+    def test_empty_file_returns_none(self, tmp_path: Path) -> None:
+        path = tmp_path / "empty.png"
+        path.write_bytes(b"")
+        assert image_io.imread(path) is None
+
+    def test_undecodable_file_returns_none(self, tmp_path: Path) -> None:
+        path = tmp_path / "garbage.png"
+        path.write_bytes(b"not an image")
+        assert image_io.imread(path) is None
@@ -2865,7 +2865,7 @@ wheels = [

 [[package]]
 name = "remove-ai-watermarks"
-version = "0.6.5"
+version = "0.6.6"
 source = { editable = "." }
 dependencies = [
    { name = "click" },