From 4ec8ffec6be5d8f6343849b49cf3ccfcefe902fc Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Mon, 8 Jun 2026 20:07:31 -0700 Subject: [PATCH] fix(instantid): patch check_inputs for diffusers-0.38 + set scale at load time Two compat bugs caught by the Modal cert sweep, both rooted in diffusers 0.38 vs InstantID's community pipeline expectations: 1. **Positional check_inputs misalignment.** InstantID's __call__ calls `self.check_inputs(...)` POSITIONALLY using the parent's ~v0.29 signature. Diffusers 0.38 added two new parameters BEFORE `controlnet_conditioning_scale` in the parent's signature (`ip_adapter_image`, `ip_adapter_image_embeds`), which shifts every positional arg by two slots. The argument that lands in the parent's `controlnet_conditioning_scale` slot is actually InstantID's `control_guidance_end` -- which a few lines earlier was converted to `[1.0]` (a list) by InstantID's auto-broadcasting for the single-controlnet case. The parent's check then trips on `not isinstance([1.0], float)` -> TypeError. Our inputs are programmatic and validated by our own callers, so neutralising `pipe.check_inputs = lambda *a, **k: None` after load is safe. This is the standard workaround community ComfyUI ports use for the same compat break. 2. **`ip_adapter_scale` was passed at call time and silently ignored.** It's not in `StableDiffusionXLInstantIDPipeline.__call__`'s signature -- the upstream API sets the IP-Adapter weight on the ArcFace cross-attention branch at LOAD time via `load_ip_adapter_instantid(scale=...)`. Moved the 0.8 default there, dropped the call-time kwarg. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/remove_ai_watermarks/instantid_restore.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/remove_ai_watermarks/instantid_restore.py b/src/remove_ai_watermarks/instantid_restore.py index 921b928..b2dfec3 100644 --- a/src/remove_ai_watermarks/instantid_restore.py +++ b/src/remove_ai_watermarks/instantid_restore.py @@ -233,7 +233,20 @@ def _get_pipeline() -> Any: pipe.to(device) # IP-Adapter weights that wire the ArcFace embedding into cross-attention. ip_adapter_path = hf_hub_download(repo_id=_INSTANTID_REPO, filename=_INSTANTID_IP_ADAPTER) - pipe.load_ip_adapter_instantid(ip_adapter_path) + # IP-Adapter scale (the weight on the ArcFace cross-attention branch) is + # set at load time, not at call time. 0.8 mirrors the upstream demo. + pipe.load_ip_adapter_instantid(ip_adapter_path, scale=0.8) + # Diffusers 0.38 vs InstantID upstream compat patch: InstantID's __call__ + # calls ``self.check_inputs(...)`` POSITIONALLY (signature from ~v0.29), + # but diffusers 0.38 added two new params (``ip_adapter_image``, + # ``ip_adapter_image_embeds``) BEFORE ``controlnet_conditioning_scale`` in + # the parent's signature. That shifts every argument by two, so + # ``control_guidance_end`` (which InstantID converts to ``[1.0]`` for the + # single-controlnet case before this point) lands in the slot the parent + # validates as ``controlnet_conditioning_scale`` and trips + # ``TypeError("must be type float")``. Our inputs are programmatic and + # already validated by our own callers, so neutralising the check is safe. + pipe.check_inputs = lambda *_a, **_k: None _pipeline = pipe return _pipeline @@ -298,7 +311,6 @@ def restore_faces_instantid( cleaned_bgr: NDArray[Any], num_inference_steps: int = 30, guidance_scale: float = 5.0, - ip_adapter_scale: float = 0.8, controlnet_conditioning_scale: float = 0.8, seed: int | None = None, detect_faces_fn: Any | None = None, @@ -387,7 +399,6 @@ def restore_faces_instantid( image_embeds=face_emb, image=landmark_img, controlnet_conditioning_scale=controlnet_conditioning_scale, - ip_adapter_scale=ip_adapter_scale, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator,