From 70e8b3a517cea21e973058eb834dd3a7f2b5d8de Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Mon, 8 Jun 2026 19:44:17 -0700 Subject: [PATCH] feat(face-restore): add InstantID as the default non-commercial restore path Per the 2026-06-08 deep-research synthesis (docs/synthid-robust-identity- research-2026-06-08.md), the entire ArcFace-class identity-adapter ecosystem for SDXL is blocked from commercial use by InsightFace's non-commercial model packs (antelopev2 / buffalo_l). No commercial-safe ArcFace-grade identity stack exists today. The user explicitly opted into shipping a non-commercial restore path (research / personal use; raiw.cc must NOT install the extra). Architectural choice: InstantID over PhotoMaker-V2 as the default. - PhotoMaker-V2 (CLIP+ArcFace dual encoder, txt2img only): documented upstream identity drift on Asian male faces, visually confirmed in our cert sweep (tatsunari rendered as a generic woman; group photo collapsed into a patchwork). - InstantID (ArcFace cross-attention + landmark ControlNet): semantic identity branch + spatial weak landmark control, decoupled. Per InstantID paper (arXiv:2401.07519) and the research report, stronger identity fidelity on single portraits. Critically: NO original face pixels enter the diffusion (ArcFace embedding is semantic, landmark stick figure is pure geometry), so SynthID is not transported. Implementation: - New `src/remove_ai_watermarks/instantid_restore.py` mirrors the `photomaker_restore.py` shape (lazy singletons for pipeline + FaceAnalysis, per-face crop + _composite_faces from photomaker_restore). Loads the InstantID community pipeline via `DiffusionPipeline.from_pretrained( custom_pipeline="pipeline_stable_diffusion_xl_instantid")` -- no upstream Python package needed; diffusers fetches the file from its community examples. - New `instantid` extra in pyproject (insightface + onnxruntime + huggingface-hub). NON-COMMERCIAL block in the comment explains why. - CLI: `--restore-faces-method [instantid|photomaker]`, default `instantid`. Both methods explicitly labeled NON-COMMERCIAL in the help text. - Engine: dispatch on `restore_faces_method` to either `_restore_faces_instantid` or `_restore_faces_photomaker`. - 9 control-flow tests for InstantID without model download (mirror the photomaker_restore.py test pattern + draw_kps helper checks). 587/587 pass. Diffusers-0.38 compat verified by upstream code inspection: the InstantID pipeline inherits from `StableDiffusionXLControlNetPipeline`, uses only public diffusers APIs (`encode_prompt`, `prepare_image`, `prepare_latents`, `get_guidance_scale_embedding`), uses legacy attention processor API which diffusers preserves for backward compat. No PhotoMaker-V1-style internal text_encoder access. End-to-end execution will be validated by the Modal cert sweep in the next step. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...hid-robust-identity-research-2026-06-08.md | 129 +++++++ pyproject.toml | 25 ++ src/remove_ai_watermarks/cli.py | 41 +- src/remove_ai_watermarks/instantid_restore.py | 352 ++++++++++++++++++ src/remove_ai_watermarks/invisible_engine.py | 63 +++- .../photomaker_restore.py | 3 +- tests/test_instantid_restore.py | 146 ++++++++ uv.lock | 11 +- 8 files changed, 752 insertions(+), 18 deletions(-) create mode 100644 docs/synthid-robust-identity-research-2026-06-08.md create mode 100644 src/remove_ai_watermarks/instantid_restore.py create mode 100644 tests/test_instantid_restore.py diff --git a/docs/synthid-robust-identity-research-2026-06-08.md b/docs/synthid-robust-identity-research-2026-06-08.md new file mode 100644 index 0000000..de0e55c --- /dev/null +++ b/docs/synthid-robust-identity-research-2026-06-08.md @@ -0,0 +1,129 @@ +# Deep research: SynthID-safe face-identity recovery for SDXL (2026-06-08) + +**Stats:** {"angles": 6, "sourcesFetched": 28, "claimsExtracted": 104, "claimsVerified": 25, "confirmed": 19, "killed": 6, "afterSynthesis": 6, "urlDupes": 1, "budgetDropped": 7, "agentCalls": 111} + +## Summary + +For SDXL-based SynthID-removal pipelines, the entire ArcFace-class identity-adapter ecosystem (PhotoMaker-V2, InstantID, PuLID, IP-Adapter FaceID, Arc2Face) is blocked from commercial use by a single chokepoint: InsightFace's pretrained model packs (buffalo_l, antelopev2, buffalo_s, buffalo_m) are explicitly non-commercial research-only, regardless of the adapter weights' Apache-2.0/MIT license. The InstantX maintainers themselves acknowledged this on HuggingFace ("cannot be Apache 2.0 if it is using Insight Face") and stated intent to retrain on commercially-licensed embedders — work that as of the verified sources has not shipped. The only verified commercial-safe SDXL adapter surfaced is MS-Diffusion (ICLR 2025), which uses CLIP-ViT-bigG-14 instead of ArcFace and runs on SDXL-base-1.0, but it is an IP-Adapter-class subject-similarity method, not a face-identity method — CLIP image embeddings are empirically weaker for face identity than ArcFace (~81% vs ~88% face-ID accuracy), so it solves the license problem but likely not the identity-fidelity problem. Arc2Face is SD1.5-only and so not a drop-in regardless of license; StyleGAN2-based methods (e4e) terminate in a GAN generator with no SDXL wiring. Net: no fully commercial-safe SynthID-robust SDXL identity-preservation stack with ArcFace-grade fidelity exists today — the space is genuinely blocked by InsightFace's grip on commercial ArcFace embeddings, and AdaFace as a permissive ArcFace alternative was REFUTED in verification. + +## Findings + +### 1. The InsightFace pretrained model packs (buffalo_l, antelopev2, buffalo_s, buffalo_m) are non-commercial research-only and this restriction propagates at runtime to any adapter that calls FaceAnalysis(), regardless of the adapter's own license tag. + +**Confidence:** high +**Vote:** 3-0 unanimous across 4 supporting claims + + +InsightFace upstream: code is MIT but 'The training data containing the annotation (and the models trained with these data) are available for non-commercial research purposes only.' Licensing page enumerates buffalo_l, antelopev2, buffalo_s, buffalo_m as needing separate commercial licensing. InstantX maintainer on HF: 'cannot be Apache 2.0 if it is using Insight Face... we plan to train on other face encoders that support commercial license.' Mechanical propagation: the runtime FaceAnalysis() call pulls these packs. + +- https://github.com/deepinsight/insightface +- https://www.insightface.ai/solutions/face-recognition-licensing +- https://huggingface.co/InstantX/InstantID/discussions/2 + +### 2. InstantID is Apache-2.0 on the adapter weights and architecturally a clean plugin to SDXL-base-1.0 (no UNet training, semantic IdentityNet conditioning + landmark weak-spatial, no pixel copying) — but at runtime it instantiates FaceAnalysis(name='antelopev2'), inheriting the InsightFace non-commercial restriction. + +**Confidence:** high +**Vote:** 3-0 across 5 claims + + +HF model card: 'License: apache-2.0' AND 'For face encoder, you need to manutally download via this URL to models/antelopev2' with 'from insightface.app import FaceAnalysis' in usage. Diffusers community pipeline shows literal `app = FaceAnalysis(name='antelopev2', ...)` then `face_emb = face_info['embedding']`. Paper: 'IdentityNet by imposing strong semantic and weak spatial conditions, integrating facial and landmark images with textual prompts' + 'seamlessly integrates with SD1.5 and SDXL'. So mechanism is semantic (good for SynthID-no-pixel-leak) but license is blocked. + +- https://huggingface.co/InstantX/InstantID +- https://github.com/huggingface/diffusers/blob/main/examples/community/pipeline_stable_diffusion_xl_instantid.py +- https://arxiv.org/pdf/2401.07519 +- https://instantid.github.io/ + +### 3. Arc2Face is SD1.5-only (not SDXL) AND requires InsightFace antelopev2 with arcface.onnx replacing glintr100.onnx — so it is doubly blocked: not a drop-in for the SDXL pipeline AND non-commercial via the model pack. + +**Confidence:** high +**Vote:** 3-0 across 3 claims + + +Arc2Face README verbatim: 'Arc2Face is built upon SD1.5' with stable-diffusion-v1-5 checkpoint as base. Same README: 'manually download the antelopev2 package and place the checkpoints under models/antelopev2', 'Download arcface.onnx from HuggingFace', 'delete glintr100.onnx (the default backbone from insightface)'. Code is MIT but the runtime antelopev2 dependency carries the non-commercial restriction. No SDXL adaptation exists in the official repo. + +- https://github.com/foivospar/Arc2Face + +### 4. MS-Diffusion (ICLR 2025) is the one verified candidate that combines SDXL-base-1.0 as its foundation with a non-ArcFace image encoder (CLIP-ViT-bigG-14), so it avoids the InsightFace non-commercial blocker — but it is an IP-Adapter-class multi-subject personalization method, NOT a face-identity-recognition method, so license safety does not equal face-identity fidelity. + +**Confidence:** medium +**Vote:** 3-0 on the narrow factual claims (SDXL base + CLIP-G encoder) + + +GitHub README explicitly instructs 'Download the pretrained base models from SDXL-base-1.0 and CLIP-G' (CLIP-ViT-bigG-14-laion2B-39B-b160k). Neither README nor arXiv 2406.07209 mention ArcFace/InsightFace/antelopev2/buffalo_l. Architecturally descended from IP-Adapter (CLIP-image-embedding family), not from FaceID/InstantID/PhotoMaker-V2. Verifier caveat (high confidence on the license-narrow claim, medium on suitability): CLIP-image face-ID accuracy ~80.95% vs specialized face recognition ~87.61% — license-safe but probably not identity-grade for portraits. Confidence is medium because the suitability claim for raiw.cc face-identity use case has not been validated empirically. + +- https://proceedings.iclr.cc/paper_files/paper/2025/file/ed4df1609bf7d8602435341c9ce2ab5f-Paper-Conference.pdf +- https://github.com/MS-Diffusion/MS-Diffusion + +### 5. ID-Aligner and the StyleGAN2/e4e-based identity method (arXiv 2510.25084) do not solve the problem: the former does not disclose embedder/license in the primary source; the latter terminates identity in a StyleGAN2 generator with no SDXL adapter, so it cannot be wired into the SDXL+canny ControlNet pipeline. + +**Confidence:** high +**Vote:** 3-0 across 3 claims + + +ID-Aligner paper: no license terms, no weight-release status, no specific face embedder named in the primary source — commercial-safety unresolved. arXiv 2510.25084: 'facial identity features... mapped into the W+ latent space of StyleGAN2 using the e4e encoder' — identity pixels come from StyleGAN2's generator, not from any SDXL-compatible adapter, so architecturally incompatible with our SDXL+canny ControlNet pipeline. Abstract also does not name the face embedder or disclose weights license. + +- https://arxiv.org/pdf/2404.15449 +- https://arxiv.org/pdf/2510.25084 + +### 6. The honest verdict: no fully commercial-safe SynthID-robust ArcFace-grade face-identity stack for SDXL exists today; the space is blocked by InsightFace's grip on ArcFace-class pretrained packs, and the verified permissive alternative (AdaFace as a drop-in replacement) was REFUTED in this verification round. + +**Confidence:** high +**Vote:** 3-0 on the chokepoint claim, 0-3 against the AdaFace escape hatch + + +Every ArcFace-grade SDXL adapter audited (PhotoMaker-V2, InstantID, PuLID, IP-Adapter FaceID, Arc2Face) instantiates an InsightFace FaceAnalysis pack at runtime. The maintainer-acknowledged commercial-retrain path (InstantX) has not shipped. AdaFace as a permissive ArcFace alternative was refuted 0-3 by the adversarial verifier (MIT-licensed claim and drop-in-replacement claim both failed). Outcome: commercial-safe option for SDXL today is CLIP-image-embedding-based (MS-Diffusion class), which is weaker for face identity than ArcFace. For non-commercial / research-only deployments, InstantID on SDXL is the strongest semantic-only (no-pixel-leak) candidate. + +- https://github.com/deepinsight/insightface +- https://huggingface.co/InstantX/InstantID/discussions/2 +- https://github.com/mk-minchul/AdaFace + +## Caveats + +Six claims were refuted in adversarial verification, two of them load-bearing: AdaFace as a permissive ArcFace drop-in (both the MIT license and the drop-in characterization failed 0-3) — so the most attractive escape hatch from the InsightFace chokepoint did not survive verification. PuLID's superiority claim and ID-Aligner's embedder claim also failed, leaving those methods uncharacterized at the mechanism level. None of the verified claims directly answered the diffusers-0.38 compat question — InstantID's compat with our shipped diffusers version is unverified by primary source. MS-Diffusion's identity-fidelity for portraits is not empirically validated for the SynthID-removal use case; it is verified as a CLIP-G/SDXL adapter, not as a face-identity method. No 2025-2026 candidate other than MS-Diffusion was both surfaced AND verified as license-safe and SDXL-compatible — the verification round did not produce a confirmed CCSR-V2/ConsistencyID/OmniGen-V2/ConsistentID/MagicID candidate. The multi-face scenario (group photos) was not addressed by any verified claim — MS-Diffusion is the only multi-subject candidate but its face-identity strength is unmeasured. Time-sensitivity: InstantX's stated intent to retrain on commercial embedders ("We agree, we plan to train on other face encoders that support commercial license") is dated and unfulfilled per the verified sources; this could change. + +## Open questions + +- Does MS-Diffusion (or any CLIP-image-embedding SDXL adapter) achieve usable face-identity fidelity on the raiw.cc input distribution (portraits + group photos), or is the ArcFace gap (~7 pp face-ID accuracy) visually disqualifying — and can a face-specific CLIP fine-tune close it? +- Has InstantX (or any community fork) actually shipped an InstantID variant retrained on a commercially-licensed face embedder since the maintainer's 2024 commitment, and if so what is its identity-fidelity vs the antelopev2 original? +- What is the exact diffusers-0.38 compat status of InstantID, MS-Diffusion, and PuLID-FLUX inference scripts — does any need a fork the way PhotoMaker-V1 did, and if so what specifically breaks? +- Is there a single-pipeline multi-subject identity-preservation method (mask-guided regional ID-adapters, multi-subject InstantID, MS-Diffusion multi-subject mode) that handles group photos without the per-face crop+composite patchwork that PhotoMaker-V2 produced? + +## Refuted claims + +- **AdaFace code is released under the MIT license, making it permissively licensed for commercial use (in contrast to InsightFace's research-only model packs).** — vote 0-3 — source: https://github.com/mk-minchul/AdaFace +- **AdaFace produces 512-dim face recognition embeddings comparable to ArcFace, positioning it as a drop-in alternative for ID-conditioning adapters that currently depend on InsightFace ArcFace.** — vote 0-3 — source: https://github.com/mk-minchul/AdaFace +- **The paper claims PuLID achieves superior performance in both ID fidelity and editability compared to prior ID-customization methods.** — vote 0-3 — source: https://arxiv.org/pdf/2404.16022 +- **Identity consistency in ID-Aligner is enforced by reward feedback from face detection and recognition models, implying dependency on an external face-recognition embedder (typically ArcFace/InsightFace-class) rather than a CLIP-only path.** — vote 1-2 — source: https://arxiv.org/pdf/2404.15449 +- **The pipeline decouples face detection from inference by accepting a pre-computed embedding via image_embeds, so any ArcFace-class embedder could be swapped in if a commercially-licensed equivalent existed.** — vote 1-2 — source: https://github.com/huggingface/diffusers/blob/main/examples/community/pipeline_stable_diffusion_xl_instantid.py +- **The model card does not explicitly prohibit commercial use, but the license of the required InsightFace antelopev2 embedder is not specified on this page.** — vote 1-2 — source: https://huggingface.co/InstantX/InstantID + +## Sources + +- [source](https://huggingface.co/InstantX/InstantID/discussions/2) +- [source](https://www.insightface.ai/solutions/face-recognition-licensing) +- [source](https://github.com/mk-minchul/AdaFace) +- [source](https://github.com/foivospar/Arc2Face) +- [source](https://apatero.com/blog/instantid-vs-pulid-vs-faceid-ultimate-face-swap-comparison-2025) +- [source](https://arxiv.org/pdf/2404.16022) +- [source](https://instantid.github.io/) +- [source](https://arxiv.org/pdf/2404.15449) +- [source](https://arxiv.org/pdf/2511.11989) +- [source](https://arxiv.org/pdf/2510.25084) +- [source](https://github.com/huggingface/diffusers/blob/main/examples/community/pipeline_stable_diffusion_xl_instantid.py) +- [source](https://huggingface.co/InstantX/InstantID) +- [source](https://github.com/huggingface/diffusers/issues/9158) +- [source](https://github.com/huggingface/diffusers/issues/5904) +- [source](https://github.com/Mikubill/sd-webui-controlnet/discussions/2589) +- [source](https://arxiv.org/pdf/2401.07519) +- [source](https://proceedings.iclr.cc/paper_files/paper/2025/file/ed4df1609bf7d8602435341c9ce2ab5f-Paper-Conference.pdf) +- [source](https://github.com/huggingface/diffusers/issues/8626) +- [source](https://arxiv.org/pdf/2404.04243) +- [source](https://huggingface.co/OmniGen2/OmniGen2) +- [source](https://github.com/VectorSpaceLab/OmniGen) +- [source](https://github.com/ToTheBeginning/PuLID) +- [source](https://arc2face.github.io/) +- [source](https://github.com/mk-minchul/AdaFace/blob/master/LICENSE) +- [source](https://github.com/IrvingMeng/MagFace/blob/main/LICENSE) +- [source](https://github.com/askerlee/AdaFace-dev) +- [source](https://openreview.net/forum?id=Hc2ZwCYgmB) +- [source](https://github.com/tencent-ailab/IP-Adapter/wiki/IP%E2%80%90Adapter%E2%80%90Face) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6398198..3e09439 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,31 @@ photomaker = [ "onnxruntime>=1.16.0", "peft>=0.10.0", ] +# **NON-COMMERCIAL.** Optional InstantID SynthID-robust face-identity post-pass. +# InstantID adapter weights (IdentityNet ControlNet + ip-adapter.bin) are Apache-2.0 +# from InstantX/InstantID on HuggingFace, BUT the runtime depends on InsightFace's +# antelopev2 ArcFace pack (non-commercial / research-only). InstantX's maintainers +# explicitly acknowledged this on HF (discussion #2) and stated intent to retrain +# on commercial embedders -- as of the 2026-06-08 deep-research synthesis +# (docs/synthid-robust-identity-research-2026-06-08.md) that retrain has not +# shipped. A paid service (raiw.cc, any monetized SaaS) MUST NOT use this extra. +# See `src/remove_ai_watermarks/instantid_restore.py`. +# +# Compared to the `photomaker` extra: InstantID adds spatial landmark conditioning +# alongside the ArcFace semantic branch, giving stronger identity fidelity on +# single portraits per the InstantID paper (arXiv:2401.07519). Both extras are +# non-commercial; pick `instantid` by default for better identity, `photomaker` +# when the InstantID community pipeline can't load. +# +# Loads via diffusers' community-pipeline mechanism (no upstream `instantid` +# Python package on PyPI). Only direct deps are insightface (MIT code, the +# non-commercial blocker is its MODEL packs) + onnxruntime (transitive via +# insightface) + huggingface-hub (weights download). +instantid = [ + "insightface>=0.7.3", + "onnxruntime>=1.16.0", + "huggingface-hub>=0.20.0", +] # Optional pre-diffusion super-resolution for small inputs (Real-ESRGAN). Loaded via # spandrel (MIT) -- a pure model-loader with NO basicsr dependency (it pulls only # torch / torchvision / safetensors / numpy / einops). diff --git a/src/remove_ai_watermarks/cli.py b/src/remove_ai_watermarks/cli.py index f2d3d7a..9613bc9 100644 --- a/src/remove_ai_watermarks/cli.py +++ b/src/remove_ai_watermarks/cli.py @@ -236,21 +236,34 @@ def _warn_if_esrgan_unavailable(upscaler: str) -> None: def _restore_faces_options(f: Any) -> Any: - """Attach the face-restoration flag to an invisible-pipeline command. + """Attach the face-restoration flags to an invisible-pipeline command. - The post-pass uses PhotoMaker-V2 to regenerate each face from a CLIP+ArcFace - embedding. **NON-COMMERCIAL** -- PhotoMaker-V2 pulls InsightFace antelopev2/ - buffalo_l model packs at runtime, which are research-only. A paid service - (raiw.cc, any monetized SaaS) MUST NOT use this flag. + Two methods. ``instantid`` (default; the `instantid` extra) regenerates each + face from an ArcFace embedding + landmark ControlNet -- semantic identity + plus weak spatial control, no original pixels. ``photomaker`` (the + `photomaker` extra) uses PhotoMaker-V2's CLIP+ArcFace dual encoder. + **BOTH ARE NON-COMMERCIAL**: they pull InsightFace antelopev2 / buffalo_l + model packs at runtime, which are research-only. A paid service (raiw.cc, + any monetized SaaS) MUST NOT use this flag. """ + method = click.option( + "--restore-faces-method", + type=click.Choice(["instantid", "photomaker"]), + default="instantid", + help="Face-restore mechanism. 'instantid' (default) uses InstantID's ArcFace + " + "landmark ControlNet for stronger identity fidelity on single portraits. " + "'photomaker' uses PhotoMaker-V2's CLIP+ArcFace dual encoder. **BOTH are " + "NON-COMMERCIAL** (InsightFace antelopev2 / buffalo_l model packs are " + "research-only). Pick whichever extra you've installed; for personal / research " + "use only. Do NOT use in a paid service.", + )(f) return click.option( "--restore-faces/--no-restore-faces", default=False, - help="EXPERIMENTAL, opt-in, **NON-COMMERCIAL** -- needs the 'photomaker' extra " - "which pulls non-commercial InsightFace model packs. Restores face identity via " - "PhotoMaker-V2 (CLIP+ArcFace embedding -> fresh face); off by default, auto-skips " - "when no face is detected or the extra is absent.", - )(f) + help="EXPERIMENTAL, opt-in, **NON-COMMERCIAL**. Restore face identity via the " + "chosen --restore-faces-method (default: instantid); off by default, auto-skips " + "when no face is detected or the chosen extra is absent.", + )(method) def _watermark_region(det: DetectionResult, width: int, height: int) -> tuple[int, int, int, int]: @@ -601,6 +614,7 @@ def cmd_invisible( min_resolution: int, controlnet_scale: float, restore_faces: bool, + restore_faces_method: str, upscaler: str, auto: bool, adaptive_polish: bool, @@ -663,6 +677,7 @@ def cmd_invisible( upscaler=upscaler, vendor=vendor, restore_faces=restore_faces, + restore_faces_method=restore_faces_method, ) elapsed = time.monotonic() - t0 @@ -864,6 +879,7 @@ def cmd_all( min_resolution: int, controlnet_scale: float, restore_faces: bool, + restore_faces_method: str, upscaler: str, auto: bool, adaptive_polish: bool, @@ -972,6 +988,7 @@ def cmd_all( upscaler=upscaler, vendor=vendor, restore_faces=restore_faces, + restore_faces_method=restore_faces_method, ) console.print(" Invisible watermark removed") @@ -1027,6 +1044,7 @@ def _process_batch_image( max_resolution: int = 0, min_resolution: int = 1024, restore_faces: bool = False, + restore_faces_method: str = "instantid", controlnet_scale: float = 1.0, upscaler: str = "lanczos", auto: bool = False, @@ -1105,6 +1123,7 @@ def _process_batch_image( min_resolution=min_resolution, upscaler=upscaler, restore_faces=restore_faces, + restore_faces_method=restore_faces_method, # Detect the vendor from the pristine original (`img_path`), not the # visible-processed `out_path` whose C2PA is already gone. vendor=vendor_for_strength(img_path), @@ -1187,6 +1206,7 @@ def cmd_batch( max_resolution: int, min_resolution: int, restore_faces: bool, + restore_faces_method: str, controlnet_scale: float, upscaler: str, auto: bool, @@ -1246,6 +1266,7 @@ def cmd_batch( max_resolution=max_resolution, min_resolution=min_resolution, restore_faces=restore_faces, + restore_faces_method=restore_faces_method, controlnet_scale=controlnet_scale, upscaler=upscaler, auto=auto, diff --git a/src/remove_ai_watermarks/instantid_restore.py b/src/remove_ai_watermarks/instantid_restore.py new file mode 100644 index 0000000..d6c0992 --- /dev/null +++ b/src/remove_ai_watermarks/instantid_restore.py @@ -0,0 +1,352 @@ +"""SynthID-robust face identity restoration via InstantID. + +**NON-COMMERCIAL.** InstantID's runtime depends on the InsightFace ``antelopev2`` +ArcFace model pack, which InsightFace releases under a research-only license: + + "The training data containing the annotation (and the models trained with + these data) are available for non-commercial research purposes only." + -- insightface upstream README + +The InstantX maintainers themselves acknowledged on HuggingFace +(``InstantX/InstantID`` discussion #2) that "InstantID cannot be Apache 2.0 if it +is using Insight Face" and stated intent to retrain on commercial face encoders. +As of 2026-06-08 (deep-research synthesis in +``docs/synthid-robust-identity-research-2026-06-08.md``) that retrain has not +shipped. **A paid service (raiw.cc, any monetized SaaS) MUST NOT use this path.** + +The default ``--restore-faces-method`` is ``instantid`` (this module). The +alternative ``photomaker`` is also non-commercial. There is no commercial-safe +ArcFace-grade identity-preservation stack for SDXL today. + +Architecture (vs PhotoMaker-V2): +- PhotoMaker-V2 conditions on a CLIP+ArcFace embedding and runs as txt2img with + no spatial control. Identity drift on Asian male faces is documented upstream + and was visually confirmed in our cert sweep. +- InstantID conditions on the ArcFace embedding via cross-attention (IP-Adapter + style) AND uses a separate landmark ControlNet (5 facial keypoints) for weak + pose control. The semantic identity branch and spatial landmark branch are + decoupled, which gives stronger identity fidelity per the InstantID paper + (arXiv:2401.07519) and our research report. Critically, NO original face + pixels enter the diffusion -- only the ArcFace embedding (semantic) and the + rendered landmark stick figure (geometry, content-free) -- so SynthID is not + transported. + +Pipeline this module wires: + 1. Detect faces in the CLEANED image (YuNet via ``auto_config``). + 2. For each face: take the SAME box from the ORIGINAL image, extract its + ArcFace embedding + 5 keypoints via InsightFace ``FaceAnalysis(antelopev2)``. + 3. Render the keypoints as a stick figure (``draw_kps`` from upstream). + 4. Call the InstantID community pipeline + (``StableDiffusionXLInstantIDPipeline``) with the ArcFace embedding as + ``image_embeds=`` and the landmark image as ``image=`` (the ControlNet + conditioning). + 5. Feather-composite the regenerated face into the cleaned image. + +Requires the optional ``instantid`` extra: ``pip install +'remove-ai-watermarks[instantid]'``. Weights download on first use; never +bundled. The InstantID adapter weights (IdentityNet ControlNet + +``ip-adapter.bin``) are Apache-2.0; the runtime InsightFace ``antelopev2`` model +pack is non-commercial. + +Multi-face: like PhotoMaker, this module loops over face boxes and composites +back. InstantID's strength is single-portrait; for group photos identity +fidelity per-face is preserved but the composite still uses the cleaned-image +geometry as the canvas. +""" + +# cv2/torch/diffusers boundary: relax unknown-type rules for this file only. +# pyright: reportUnknownMemberType=false, reportUnknownArgumentType=false, reportUnknownVariableType=false, reportUnknownParameterType=false, reportMissingTypeArgument=false, reportMissingTypeStubs=false, reportMissingImports=false, reportArgumentType=false, reportAssignmentType=false, reportReturnType=false, reportCallIssue=false, reportIndexIssue=false, reportOperatorIssue=false, reportOptionalMemberAccess=false, reportOptionalCall=false, reportOptionalSubscript=false, reportOptionalOperand=false, reportAttributeAccessIssue=false, reportPrivateImportUsage=false, reportPrivateUsage=false, reportInvalidTypeForm=false, reportConstantRedefinition=false, reportUnnecessaryComparison=false +from __future__ import annotations + +import importlib.util +import logging +import threading +from typing import TYPE_CHECKING, Any + +from remove_ai_watermarks.photomaker_restore import _composite_faces, _face_crop_square + +if TYPE_CHECKING: + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + +# InstantID checkpoint repo on HuggingFace. The IdentityNet ControlNet weights live +# under ``ControlNetModel/`` and the IP-Adapter file is ``ip-adapter.bin`` at the +# root. Both are Apache-2.0 (the InsightFace runtime dep is what makes the path +# non-commercial). Downloaded on first use. +_INSTANTID_REPO = "InstantX/InstantID" +_INSTANTID_CONTROLNET_SUBFOLDER = "ControlNetModel" +_INSTANTID_IP_ADAPTER = "ip-adapter.bin" + +# SDXL base shared with the main pipeline (same checkpoint as `default`/`controlnet`). +_SDXL_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0" + +# Prompt format. InstantID is less sensitive to prompt than PhotoMaker because the +# ID branch is cross-attention; a neutral descriptive prompt is recommended by the +# upstream gradio demo. +_INSTANTID_PROMPT = "portrait photo of a person, natural skin, soft lighting, sharp focus, best quality" +_INSTANTID_NEGATIVE = ( + "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, " + "cartoons, sketch), open mouth, blurry, watermark, deformed" +) + +# Square size used to feed InstantID. SDXL is happiest at 1024 (a smaller value sends +# it into low-res mosaic mode -- caught visually on PhotoMaker, same root cause). +_INSTANTID_FACE_SIZE = 1024 + +_pipeline: Any | None = None +_pipeline_lock = threading.Lock() +_face_analyser: Any | None = None +_face_analyser_lock = threading.Lock() + + +def is_available() -> bool: + """True when the optional InstantID extra deps are importable.""" + return ( + importlib.util.find_spec("insightface") is not None + and importlib.util.find_spec("diffusers") is not None + and importlib.util.find_spec("torch") is not None + and importlib.util.find_spec("huggingface_hub") is not None + ) + + +def _select_device() -> str: + """Pick the InstantID pipeline device: CUDA when present, MPS on Apple, else CPU.""" + try: + import torch + + if torch.cuda.is_available(): + return "cuda" + if torch.backends.mps.is_available(): + return "mps" + except Exception as e: + logger.debug("instantid_restore: device probe failed (%s); using CPU", e) + return "cpu" + + +def _get_face_analyser() -> Any: + """Return the InsightFace FaceAnalysis singleton (antelopev2, non-commercial). + + Triggers InsightFace's auto-download of the antelopev2 pack on first + instantiation. See the NON-COMMERCIAL notice at the top of the module. + """ + global _face_analyser + if _face_analyser is not None: + return _face_analyser + with _face_analyser_lock: + if _face_analyser is None: + import torch + from insightface.app import FaceAnalysis + + providers = ["CUDAExecutionProvider"] if torch.cuda.is_available() else ["CPUExecutionProvider"] + # InstantID's upstream uses name='antelopev2' and root='./' (which puts + # the auto-downloaded pack under ./models/antelopev2/). Use the same root + # so the pack lands under the process cwd (Modal volume in prod). + fa = FaceAnalysis(name="antelopev2", root="./", providers=providers) + fa.prepare(ctx_id=0, det_size=(640, 640)) + _face_analyser = fa + return _face_analyser + + +def _get_pipeline() -> Any: + """Return the lazily-built InstantID pipeline singleton (downloads weights on first use). + + Loads via diffusers' community-pipeline mechanism: the file + ``pipeline_stable_diffusion_xl_instantid.py`` lives in + ``diffusers/examples/community/`` and is selected by the slug + ``pipeline_stable_diffusion_xl_instantid``. + """ + global _pipeline + if _pipeline is not None: + return _pipeline + with _pipeline_lock: + if _pipeline is None: + import torch + from diffusers import ControlNetModel, DiffusionPipeline + from huggingface_hub import hf_hub_download + + device = _select_device() + dtype = torch.float16 if device == "cuda" else torch.float32 + logger.info("instantid_restore: loading SDXL+InstantID on %s (%s)", device, dtype) + + # IdentityNet ControlNet weights. + controlnet = ControlNetModel.from_pretrained( + _INSTANTID_REPO, + subfolder=_INSTANTID_CONTROLNET_SUBFOLDER, + torch_dtype=dtype, + ) + # SDXL base + InstantID community pipeline (txt2img w/ IdentityNet ControlNet + # + IP-Adapter cross-attention conditioned on the ArcFace embedding). + pipe = DiffusionPipeline.from_pretrained( + _SDXL_MODEL_ID, + controlnet=controlnet, + torch_dtype=dtype, + custom_pipeline="pipeline_stable_diffusion_xl_instantid", + ) + pipe.to(device) + # IP-Adapter weights that wire the ArcFace embedding into cross-attention. + ip_adapter_path = hf_hub_download(repo_id=_INSTANTID_REPO, filename=_INSTANTID_IP_ADAPTER) + pipe.load_ip_adapter_instantid(ip_adapter_path) + _pipeline = pipe + return _pipeline + + +def _draw_kps(image_size: tuple[int, int], kps: Any) -> Any: + """Render the 5 facial keypoints as a colored stick figure. + + Mirrors upstream's ``draw_kps`` (in ``pipeline_stable_diffusion_xl_instantid.py``): + the 5 keypoints (left eye, right eye, nose tip, left mouth corner, right mouth + corner) get drawn as colored circles connected by colored lines, on a black + background. The result is the ControlNet conditioning image -- pure landmark + geometry, no pixels from the original face leak through this branch. + + ``image_size`` is ``(width, height)``; ``kps`` is a numpy array of shape (5, 2). + """ + import cv2 + import numpy as np + from PIL import Image + + # Same color palette as upstream (blue/red/green/purple/yellow). + stick_width = 4 + limb_seq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) + color_list = [ + (255, 0, 0), + (0, 255, 0), + (0, 0, 255), + (255, 255, 0), + (255, 0, 255), + ] + + w, h = image_size + out_img = np.zeros((h, w, 3), dtype=np.uint8) + + kps_arr = np.array(kps) + for i in range(len(limb_seq)): + index = limb_seq[i] + color = color_list[index[0]] + x = kps_arr[index][:, 0] + y = kps_arr[index][:, 1] + length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 + angle = np.degrees(np.arctan2(y[0] - y[1], x[0] - x[1])) + polygon = cv2.ellipse2Poly( + (int(np.mean(x)), int(np.mean(y))), + (int(length / 2), stick_width), + int(angle), + 0, + 360, + 1, + ) + out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) + out_img = (out_img * 0.6).astype(np.uint8) + + for i, kp in enumerate(kps_arr): + x, y = kp + out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color_list[i], -1) + + return Image.fromarray(out_img.astype(np.uint8)) + + +def restore_faces_instantid( + original_bgr: NDArray[Any], + cleaned_bgr: NDArray[Any], + num_inference_steps: int = 30, + guidance_scale: float = 5.0, + ip_adapter_scale: float = 0.8, + controlnet_conditioning_scale: float = 0.8, + seed: int | None = None, + detect_faces_fn: Any | None = None, +) -> NDArray[Any]: + """SynthID-robust face identity restoration via InstantID. + + Flow: + 1. Detect faces in ``cleaned_bgr`` (YuNet via ``auto_config`` by default; + override via ``detect_faces_fn`` for tests). + 2. For each face: take the SAME box from ``original_bgr`` -> square crop -> + InsightFace extracts ArcFace embedding + 5 keypoints -> ``_draw_kps`` + renders the landmark stick figure -> InstantID pipeline generates a + fresh face conditioned on the embedding and the landmark control image. + 3. Feather-composite each regenerated face into ``cleaned_bgr``. + + Faces are read from ``original_bgr`` for the ArcFace embedding + landmarks, but + the OUTPUT pixels are diffusion-fresh (ArcFace embedding is semantic; landmark + image is pure geometry), so SynthID is not transported. + + ``detect_faces_fn`` returns a list of ``(x, y, w, h)`` boxes given a BGR image. + """ + import cv2 + import numpy as np + import torch + + if detect_faces_fn is None: + from remove_ai_watermarks.auto_config import _get_yunet + + det = _get_yunet() + + def _default_detect(bgr: NDArray[Any]) -> list[tuple[int, int, int, int]]: + h_d, w_d = bgr.shape[:2] + det.setInputSize((w_d, h_d)) + _, faces = det.detect(bgr) + if faces is None: + return [] + return [(int(f[0]), int(f[1]), int(f[2]), int(f[3])) for f in faces if int(f[2]) > 0 and int(f[3]) > 0] + + detect_faces_fn = _default_detect + + boxes = detect_faces_fn(cleaned_bgr) + if not boxes: + logger.debug("instantid_restore: no faces detected; returning cleaned image unchanged") + return cleaned_bgr + + pipeline = _get_pipeline() + face_analyser = _get_face_analyser() + + generator = None + if seed is not None: + generator = torch.Generator(device=pipeline.device).manual_seed(seed) + + restored: list[tuple[NDArray[Any], tuple[int, int, int, int]]] = [] + for box in boxes: + id_crop_bgr, square_box = _face_crop_square(original_bgr, box) + if id_crop_bgr.size == 0: + continue + + # Resize the crop to the InstantID target so InsightFace + the pipeline both + # work in the same coordinate space. + crop_resized = cv2.resize( + id_crop_bgr, (_INSTANTID_FACE_SIZE, _INSTANTID_FACE_SIZE), interpolation=cv2.INTER_LANCZOS4 + ) + + # InsightFace expects BGR. It returns embedding + 5 keypoints per detected face. + # Pick the largest face in the crop (sorted by bbox area). + face_infos = face_analyser.get(crop_resized) + if not face_infos: + logger.debug("instantid_restore: InsightFace did not find a face in the crop; skipping") + continue + face_info = sorted( + face_infos, + key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]), + )[-1] + face_emb = face_info["embedding"] + face_kps = face_info["kps"] + + # Render the landmark stick figure at the same size as the generation target. + landmark_img = _draw_kps((_INSTANTID_FACE_SIZE, _INSTANTID_FACE_SIZE), face_kps) + + out = pipeline( + prompt=_INSTANTID_PROMPT, + negative_prompt=_INSTANTID_NEGATIVE, + image_embeds=face_emb, + image=landmark_img, + controlnet_conditioning_scale=controlnet_conditioning_scale, + ip_adapter_scale=ip_adapter_scale, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + generator=generator, + ) + gen_rgb = out.images[0] + gen_bgr = cv2.cvtColor(np.array(gen_rgb), cv2.COLOR_RGB2BGR) + restored.append((gen_bgr, square_box)) + + if not restored: + return cleaned_bgr + return _composite_faces(cleaned_bgr, restored) diff --git a/src/remove_ai_watermarks/invisible_engine.py b/src/remove_ai_watermarks/invisible_engine.py index 68cf5fe..08c9302 100644 --- a/src/remove_ai_watermarks/invisible_engine.py +++ b/src/remove_ai_watermarks/invisible_engine.py @@ -165,6 +165,7 @@ class InvisibleEngine: min_resolution: int = 1024, vendor: str | None = None, restore_faces: bool = False, + restore_faces_method: str = "instantid", unsharp: float = 0.0, adaptive_polish: bool = False, upscaler: str = "lanczos", @@ -181,10 +182,15 @@ class InvisibleEngine: seed: Random seed for reproducibility. humanize: Intensity of Analog Humanizer film grain (0 = off). restore_faces: EXPERIMENTAL, opt-in (default False). **NON-COMMERCIAL.** - Run the PhotoMaker-V2 face-identity post-pass when faces are present - (needs the ``photomaker`` extra, which pulls non-commercial InsightFace - model packs). Auto-skips with a debug log when the extra is absent or no - face is detected. See ``photomaker_restore.py`` for the legal notice. + Run the face-identity post-pass when faces are present. Method is + chosen by ``restore_faces_method`` -- ``instantid`` (default, + stronger identity, needs the ``instantid`` extra) or ``photomaker`` + (PhotoMaker-V2, needs the ``photomaker`` extra). Both extras pull + non-commercial InsightFace model packs. Auto-skips with a debug log + when the chosen extra is absent or no face is detected. See + ``instantid_restore.py`` / ``photomaker_restore.py``. + restore_faces_method: ``instantid`` (default) or ``photomaker``. Both + NON-COMMERCIAL; pick the one whose extra you've installed. unsharp: Final unsharp-mask sharpening strength (0 = off, default). Applied last (after face restoration) to counter the soft, over-smoothed look of the diffusion + restoration; ~0.5-0.8 is a @@ -316,7 +322,10 @@ class InvisibleEngine: # GFPGAN derives from are already SynthID-free). Auto-skips when faces are # absent or the optional `restore` extra is not installed. if restore_faces: - self._restore_faces_photomaker(out_path, image, seed) + if restore_faces_method == "photomaker": + self._restore_faces_photomaker(out_path, image, seed) + else: + self._restore_faces_instantid(out_path, image, seed) # Final sharpening, LAST so it crisps the face-restored result too (a # pre-restore sharpen would be smoothed back over by the face pass). @@ -355,6 +364,50 @@ class InvisibleEngine: if _tmp_path.exists(): _tmp_path.unlink() + def _restore_faces_instantid( + self, + out_path: Path, + original_image: Any, + seed: int | None, + ) -> None: + """Run the InstantID face-identity post-pass on the cleaned ``out_path``. + + **NON-COMMERCIAL** (see ``instantid_restore.py``). InstantID conditions on + an ArcFace embedding (semantic) plus a landmark ControlNet (geometry, + content-free) -- no original face pixels enter the diffusion. Best-effort: + any failure (missing extra, model load, runtime error) logs a warning and + leaves the un-restored cleaned output in place. + """ + from remove_ai_watermarks import instantid_restore + + if not instantid_restore.is_available(): + logger.debug("restore_faces requested but the 'instantid' extra is not installed; skipping") + return + + try: + import cv2 + import numpy as np + + from remove_ai_watermarks import image_io + + cleaned_bgr = image_io.imread(out_path, cv2.IMREAD_COLOR) + if cleaned_bgr is None: + logger.warning("restore_faces: could not read cleaned output %s; skipping", out_path) + return + + original_rgb = original_image.convert("RGB") + original_bgr = cv2.cvtColor(np.array(original_rgb), cv2.COLOR_RGB2BGR) + cleaned_size = (cleaned_bgr.shape[1], cleaned_bgr.shape[0]) + if (original_bgr.shape[1], original_bgr.shape[0]) != cleaned_size: + original_bgr = cv2.resize(original_bgr, cleaned_size, interpolation=cv2.INTER_LANCZOS4) + + if self._progress_callback: + self._progress_callback("Restoring face identity (InstantID post-pass)...") + restored = instantid_restore.restore_faces_instantid(original_bgr, cleaned_bgr, seed=seed) + image_io.imwrite(out_path, restored) + except Exception as e: + logger.warning("restore_faces post-pass failed (%s); keeping un-restored output", e) + def _restore_faces_photomaker( self, out_path: Path, diff --git a/src/remove_ai_watermarks/photomaker_restore.py b/src/remove_ai_watermarks/photomaker_restore.py index 5d788e2..731389f 100644 --- a/src/remove_ai_watermarks/photomaker_restore.py +++ b/src/remove_ai_watermarks/photomaker_restore.py @@ -83,8 +83,7 @@ _SDXL_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0" # 2026-06-04: at 512 V2 produced a collage of training-time faces; at 1024 with the # upstream-style descriptive prompt it produces a clean face. _PHOTOMAKER_PROMPT = ( - "instagram photo, portrait photo of a person img, natural skin, soft lighting, " - "best quality, sharp focus" + "instagram photo, portrait photo of a person img, natural skin, soft lighting, best quality, sharp focus" ) _PHOTOMAKER_NEGATIVE = ( "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, " diff --git a/tests/test_instantid_restore.py b/tests/test_instantid_restore.py new file mode 100644 index 0000000..95f1bd6 --- /dev/null +++ b/tests/test_instantid_restore.py @@ -0,0 +1,146 @@ +"""Control-flow tests for instantid_restore -- no model download. + +The end-to-end InstantID run is monkey-patched: we replace ``_get_pipeline`` and +``_get_face_analyser`` with fakes, install a fake InsightFace ``FaceAnalysis`` +embedding, and check that the per-face crop + composite pipeline wires up the +expected pixels into ``cleaned_bgr``. +""" + +from __future__ import annotations + +import cv2 +import numpy as np +import pytest + +from remove_ai_watermarks import instantid_restore + + +class TestIsAvailable: + def test_returns_bool(self): + assert isinstance(instantid_restore.is_available(), bool) + + +class TestRepoPins: + """Pin the InstantID repo + adapter file so a maintainer change is visible.""" + + def test_repo_is_instantx_instantid(self): + assert instantid_restore._INSTANTID_REPO == "InstantX/InstantID" + + def test_controlnet_subfolder(self): + assert instantid_restore._INSTANTID_CONTROLNET_SUBFOLDER == "ControlNetModel" + + def test_ip_adapter_filename(self): + assert instantid_restore._INSTANTID_IP_ADAPTER == "ip-adapter.bin" + + +class TestDrawKps: + def test_renders_color_image(self): + kps = np.array([[100, 100], [200, 100], [150, 150], [120, 200], [180, 200]]) + img = instantid_restore._draw_kps((256, 256), kps) + arr = np.array(img) + assert arr.shape == (256, 256, 3) + # Has nonzero pixels (the stick figure is rendered). + assert arr.sum() > 0 + + def test_black_outside_kps(self): + kps = np.array([[100, 100], [200, 100], [150, 150], [120, 200], [180, 200]]) + img = instantid_restore._draw_kps((256, 256), kps) + arr = np.array(img) + # Top-left corner should be black (no keypoint there). + assert arr[0, 0].sum() == 0 + + +class TestRestoreFacesInstantidControlFlow: + """End-to-end flow with the pipeline / face analyser / InsightFace mocked. + + Checks that with one detected face: (1) the original crop is fed to the + InsightFace mock; (2) the pipeline mock receives the expected kwargs; (3) + the regenerated output ends up composited into the cleaned image. + """ + + @staticmethod + def _fake_pipeline_class(fill_value: int = 210): + import torch + from PIL import Image + + class _FakePipeOutput: + def __init__(self, images): + self.images = images + + class _FakePipe: + device = "cpu" + dtype = torch.float32 + + def __call__(self, **kwargs): + # Save kwargs for assertion. + _FakePipe.last_kwargs = kwargs + img = Image.fromarray(np.full((1024, 1024, 3), fill_value, dtype=np.uint8)) + return _FakePipeOutput([img]) + + return _FakePipe() + + def test_no_faces_returns_cleaned_unchanged(self, monkeypatch): + monkeypatch.setattr(instantid_restore, "is_available", lambda: True) + monkeypatch.setattr(instantid_restore, "_get_pipeline", lambda: self._fake_pipeline_class()) + monkeypatch.setattr(instantid_restore, "_get_face_analyser", lambda: object()) + + orig = np.full((400, 400, 3), 50, dtype=np.uint8) + cleaned = np.full((400, 400, 3), 100, dtype=np.uint8) + out = instantid_restore.restore_faces_instantid(orig, cleaned, detect_faces_fn=lambda _b: []) + assert np.array_equal(out, cleaned) + + def test_one_face_gets_composited_into_cleaned(self, monkeypatch): + monkeypatch.setattr(instantid_restore, "is_available", lambda: True) + monkeypatch.setattr(instantid_restore, "_get_pipeline", lambda: self._fake_pipeline_class(fill_value=210)) + + # Fake FaceAnalyser that returns one face with a 512-d embedding + 5 keypoints. + class _FakeFA: + def get(self, _bgr): + return [ + { + "bbox": np.array([10, 10, 100, 100], dtype=np.float32), + "embedding": np.zeros(512, dtype=np.float32), + "kps": np.array( + [[30, 40], [70, 40], [50, 60], [35, 80], [65, 80]], + dtype=np.float32, + ), + } + ] + + monkeypatch.setattr(instantid_restore, "_get_face_analyser", lambda: _FakeFA()) + + orig = np.full((400, 400, 3), 30, dtype=np.uint8) + cleaned = np.full((400, 400, 3), 90, dtype=np.uint8) + cv2.rectangle(orig, (150, 150), (250, 250), (200, 100, 50), -1) + + out = instantid_restore.restore_faces_instantid( + orig, cleaned, detect_faces_fn=lambda _b: [(150, 150, 100, 100)] + ) + # The cleaned image should have shifted toward the fake-pipe fill (210) + # inside the face region. + assert out[200, 200, 0] > 150 + # Corner pixels far outside the feather stay close to the cleaned base. + assert int(out[0, 0, 0]) - int(cleaned[0, 0, 0]) <= 1 + + def test_insightface_misses_face_skips_gracefully(self, monkeypatch): + monkeypatch.setattr(instantid_restore, "is_available", lambda: True) + monkeypatch.setattr(instantid_restore, "_get_pipeline", lambda: self._fake_pipeline_class()) + + class _EmptyFA: + def get(self, _bgr): + return [] + + monkeypatch.setattr(instantid_restore, "_get_face_analyser", lambda: _EmptyFA()) + + orig = np.full((400, 400, 3), 30, dtype=np.uint8) + cleaned = np.full((400, 400, 3), 90, dtype=np.uint8) + + out = instantid_restore.restore_faces_instantid( + orig, cleaned, detect_faces_fn=lambda _b: [(150, 150, 100, 100)] + ) + # No face detected by InsightFace -> cleaned image is returned unchanged. + assert np.array_equal(out, cleaned) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/uv.lock b/uv.lock index 40429ab..bd10c30 100644 --- a/uv.lock +++ b/uv.lock @@ -2431,6 +2431,12 @@ gpu = [ { name = "torch" }, { name = "transformers" }, ] +instantid = [ + { name = "huggingface-hub" }, + { name = "insightface" }, + { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "onnxruntime", version = "1.26.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] lama = [ { name = "huggingface-hub" }, { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -2455,12 +2461,15 @@ requires-dist = [ { name = "click", specifier = ">=8.0.0" }, { name = "diffusers", marker = "extra == 'gpu'", specifier = ">=0.38.0" }, { name = "einops", marker = "extra == 'photomaker'", specifier = ">=0.7.0" }, + { name = "huggingface-hub", marker = "extra == 'instantid'", specifier = ">=0.20.0" }, { name = "huggingface-hub", marker = "extra == 'lama'", specifier = ">=0.20.0" }, { name = "huggingface-hub", marker = "extra == 'photomaker'", specifier = ">=0.20.0" }, + { name = "insightface", marker = "extra == 'instantid'", specifier = ">=0.7.3" }, { name = "insightface", marker = "extra == 'photomaker'", specifier = ">=0.7.3" }, { name = "invisible-watermark", marker = "extra == 'detect'", specifier = ">=0.2.0" }, { name = "invisible-watermark", marker = "extra == 'dev'", specifier = ">=0.2.0" }, { name = "numpy", specifier = ">=1.24.0" }, + { name = "onnxruntime", marker = "extra == 'instantid'", specifier = ">=1.16.0" }, { name = "onnxruntime", marker = "extra == 'lama'", specifier = ">=1.16.0" }, { name = "onnxruntime", marker = "extra == 'photomaker'", specifier = ">=1.16.0" }, { name = "opencv-python-headless", specifier = ">=4.8.0" }, @@ -2481,7 +2490,7 @@ requires-dist = [ { name = "transformers", marker = "extra == 'gpu'", specifier = ">=5,<6" }, { name = "trustmark", marker = "extra == 'trustmark'", specifier = ">=0.8.0" }, ] -provides-extras = ["gpu", "detect", "trustmark", "lama", "photomaker", "esrgan", "dev", "all"] +provides-extras = ["gpu", "detect", "trustmark", "lama", "photomaker", "instantid", "esrgan", "dev", "all"] [[package]] name = "requests"