mirror of
https://github.com/hacksider/Deep-Live-Cam.git
synced 2026-05-14 02:42:09 +02:00
f65aeae5db
Bundles CoreML graph rewrites, GPU-accelerated pipeline work, Windows CUDA fixes, and Mac/Windows runtime routing into a single drop. CoreML (Apple Silicon): - Decompose Pad(reflect) → Slice+Concat in inswapper_128 so the model runs in one CoreML partition instead of 14 (TEMPORARY: fixed upstream in microsoft/onnxruntime#28073, drop when ORT >= 1.26.0). - Fold Shape/Gather chains to constants in det_10g (21ms → 4ms). - Decompose Split(axis=1) → Slice pairs in GFPGAN (155ms → 89ms). - Route detection model to GPU so the ANE is free for the swap model. - Centralize provider/config selection in create_onnx_session. Pipeline (all platforms): - Parallelize face landmark + recognition post-detection; skip landmark_2d_106 when only face_swapper is active. - Pipeline face detection with swap for ANE overlap. - GPU-accelerated paste_back, MJPEG capture, zero-copy display path. - Standalone pipeline benchmark script. Windows / CUDA: - CUDA graphs + FP16 model + all-GPU pipeline for 1080p 60 FPS. - Auto-detect GPU provider and fix DLL discovery for Windows CUDA execution. Cross-platform: - platform_info helper for Mac/Windows runtime routing. - GFPGAN 30 fps + MSMF camera 60 fps with adaptive pipeline tuning. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
81 lines
2.5 KiB
Python
81 lines
2.5 KiB
Python
"""Centralized platform + accelerator detection.
|
|
|
|
Imported once at startup to expose typed flags the rest of the codebase
|
|
can branch on without re-querying `platform`, `torch.cuda`, or
|
|
`onnxruntime.get_available_providers()` repeatedly.
|
|
|
|
The banner printed by :func:`print_banner` is the single user-facing
|
|
report of which code path the app will take.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import platform as _platform
|
|
import sys
|
|
from typing import List, Tuple
|
|
|
|
IS_WINDOWS: bool = _platform.system() == "Windows"
|
|
IS_MACOS: bool = _platform.system() == "Darwin"
|
|
IS_LINUX: bool = _platform.system() == "Linux"
|
|
IS_APPLE_SILICON: bool = IS_MACOS and _platform.machine() == "arm64"
|
|
|
|
|
|
def _detect_torch_cuda() -> bool:
|
|
try:
|
|
import torch # noqa: WPS433 — local import, avoid hard dep at module load
|
|
return bool(torch.cuda.is_available())
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def _detect_onnx_providers() -> List[str]:
|
|
try:
|
|
import onnxruntime
|
|
return list(onnxruntime.get_available_providers())
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
HAS_TORCH_CUDA: bool = _detect_torch_cuda()
|
|
ONNX_PROVIDERS: List[str] = _detect_onnx_providers()
|
|
HAS_CUDA_PROVIDER: bool = "CUDAExecutionProvider" in ONNX_PROVIDERS
|
|
HAS_COREML_PROVIDER: bool = "CoreMLExecutionProvider" in ONNX_PROVIDERS
|
|
HAS_DML_PROVIDER: bool = "DmlExecutionProvider" in ONNX_PROVIDERS
|
|
|
|
|
|
def camera_backends() -> List[Tuple[int, int]]:
|
|
"""Return an ordered list of ``(device_index, cv2_backend)`` attempts.
|
|
|
|
Windows prefers MSMF (60fps capable) with DirectShow as fallback.
|
|
macOS/Linux use the default backend (AVFoundation / V4L2).
|
|
"""
|
|
import cv2
|
|
if IS_WINDOWS:
|
|
return [
|
|
(0, cv2.CAP_MSMF),
|
|
(0, cv2.CAP_DSHOW),
|
|
(0, cv2.CAP_ANY),
|
|
]
|
|
return [(0, cv2.CAP_ANY)]
|
|
|
|
|
|
def accelerator_label() -> str:
|
|
if HAS_TORCH_CUDA and HAS_CUDA_PROVIDER:
|
|
return "CUDA (NVIDIA)"
|
|
if IS_APPLE_SILICON and HAS_COREML_PROVIDER:
|
|
return "CoreML (Apple Neural Engine)"
|
|
if HAS_COREML_PROVIDER:
|
|
return "CoreML"
|
|
if HAS_DML_PROVIDER:
|
|
return "DirectML"
|
|
return "CPU"
|
|
|
|
|
|
def print_banner() -> None:
|
|
"""Print a one-line summary of the platform + accelerator selection."""
|
|
os_label = f"{_platform.system()} {_platform.machine()}"
|
|
print(
|
|
f"[platform] {os_label} | python {sys.version.split()[0]} | "
|
|
f"accelerator: {accelerator_label()} | providers: {ONNX_PROVIDERS}",
|
|
flush=True,
|
|
)
|