From 3d00fed00cc1961af574335d288c895336abf8ed Mon Sep 17 00:00:00 2001
From: Victor Kuznetsov <kuznetsov.va@gmail.com>
Date: Mon, 8 Jun 2026 18:49:10 -0700
Subject: [PATCH] fix(photomaker-v2): compute id_embeds via FaceAnalysis2
 before pipeline call

The Modal cert sweep against V2 hit the next layer of the API:

  PhotoMakerIDEncoder_CLIPInsightfaceExtendtoken.forward() missing 1 required
  positional argument: 'id_embeds'

V2 forward takes BOTH the CLIP image embedding (computed inside the pipeline from
input_id_images) AND an ArcFace identity embedding (id_embeds) that the caller
must compute. The upstream pipeline does NOT auto-compute it -- inference_pmv2.py
shows the caller using FaceAnalysis2 + analyze_faces to extract the ArcFace
vector from each input ID image and passing id_embeds=torch.stack([...]) into
pipe(...).

Wired the same flow here:
- New _get_face_analyser() singleton (double-checked lock) builds
  FaceAnalysis2(['CUDAExecutionProvider' | 'CPUExecutionProvider']).prepare(...).
  This is the non-commercial step (antelopev2/buffalo_l auto-download on first
  use). Module docstring already calls it out.
- Per face: analyze_faces() -> torch.from_numpy(embedding) -> .unsqueeze(0) to
  match the pipeline's expected (B, D) shape, casting to pipeline.device/dtype.
  Faces InsightFace can't detect inside the crop get skipped (the most likely
  cause would be the diffusion-cleaned face being too small or stylised after
  the main pass; YuNet already gated us into having a face per crop, so this
  should be rare).
- id_embeds= keyword threaded into the pipeline call site alongside the existing
  input_id_images=.

Tests untouched (the V1-only safety guard was already removed in the previous
commit when we swapped V1->V2; the existing 11 tests still pass).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../photomaker_restore.py                     | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/remove_ai_watermarks/photomaker_restore.py b/src/remove_ai_watermarks/photomaker_restore.py
index a5ff379..d8c4bbd 100644
--- a/src/remove_ai_watermarks/photomaker_restore.py
+++ b/src/remove_ai_watermarks/photomaker_restore.py
@@ -85,6 +85,8 @@ _PHOTOMAKER_FACE_SIZE = 512
 
 _pipeline: Any | None = None
 _pipeline_lock = threading.Lock()
+_face_analyser: Any | None = None
+_face_analyser_lock = threading.Lock()
 
 
 def is_available() -> bool:
@@ -110,6 +112,29 @@ def _select_device() -> str:
     return "cpu"
 
 
+def _get_face_analyser() -> Any:
+    """Return the InsightFace FaceAnalysis2 singleton (downloads model packs on first use).
+
+    **This is the non-commercial step.** Instantiating ``FaceAnalysis2()`` triggers
+    InsightFace's auto-download of the antelopev2/buffalo_l model packs, which are
+    released under a research-only license. See the module docstring NON-COMMERCIAL
+    notice. PhotoMaker-V2 requires this for the ArcFace identity branch.
+    """
+    global _face_analyser
+    if _face_analyser is not None:
+        return _face_analyser
+    with _face_analyser_lock:
+        if _face_analyser is None:
+            import torch
+            from photomaker import FaceAnalysis2
+
+            providers = ["CUDAExecutionProvider"] if torch.cuda.is_available() else ["CPUExecutionProvider"]
+            fa = FaceAnalysis2(providers=providers, allowed_modules=["detection", "recognition"])
+            fa.prepare(ctx_id=0, det_size=(640, 640))
+            _face_analyser = fa
+    return _face_analyser
+
+
 def _get_pipeline() -> Any:
     """Return the lazily-built PhotoMaker pipeline singleton (downloads weights on first use)."""
     global _pipeline
@@ -280,6 +305,9 @@ def restore_faces_photomaker(
         return cleaned_bgr
 
     pipeline = _get_pipeline()
+    face_analyser = _get_face_analyser()  # NON-COMMERCIAL: triggers InsightFace model packs
+    from photomaker import analyze_faces
+
     generator = None
     if seed is not None:
         generator = torch.Generator(device=pipeline.device).manual_seed(seed)
@@ -289,6 +317,14 @@ def restore_faces_photomaker(
         id_crop_bgr, square_box = _face_crop_square(original_bgr, box)
         if id_crop_bgr.size == 0:
             continue
+        # Get the ArcFace embedding for THIS face (V2's required ID branch). InsightFace
+        # expects BGR; analyze_faces returns a list, take the first detection.
+        faces = analyze_faces(face_analyser, id_crop_bgr)
+        if not faces:
+            logger.debug("photomaker_restore: InsightFace did not detect a face in the crop; skipping")
+            continue
+        id_embed = torch.from_numpy(faces[0]["embedding"]).unsqueeze(0).to(pipeline.device, dtype=pipeline.dtype)
+
         id_crop_rgb = cv2.cvtColor(id_crop_bgr, cv2.COLOR_BGR2RGB)
         id_image_pil = Image.fromarray(id_crop_rgb)
 
@@ -302,6 +338,7 @@ def restore_faces_photomaker(
         out = pipeline(
             prompt=_PHOTOMAKER_PROMPT,
             input_id_images=[id_image_pil],
+            id_embeds=id_embed,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
             start_merge_step=style_strength,