diff --git a/modules/core.py b/modules/core.py index b6ef9b8..4f8d1b6 100644 --- a/modules/core.py +++ b/modules/core.py @@ -129,11 +129,22 @@ def suggest_execution_providers() -> List[str]: def suggest_execution_threads() -> int: + """Suggest optimal thread count based on hardware and execution provider.""" + import os + + # Get CPU count + cpu_count = os.cpu_count() or 4 + if 'DmlExecutionProvider' in modules.globals.execution_providers: return 1 if 'ROCMExecutionProvider' in modules.globals.execution_providers: return 1 - return 8 + if 'CUDAExecutionProvider' in modules.globals.execution_providers: + # For CUDA, use more threads for parallel frame processing + return min(cpu_count, 16) + + # For CPU execution, use most cores but leave some for system + return max(4, min(cpu_count - 2, 16)) def limit_resources() -> None: @@ -176,10 +187,16 @@ def update_status(message: str, scope: str = 'DLC.CORE') -> None: ui.update_status(message) def start() -> None: + """Start processing with performance monitoring.""" + import time + + start_time = time.time() + for frame_processor in get_frame_processors_modules(modules.globals.frame_processors): if not frame_processor.pre_start(): return update_status('Processing...') + # process image to image if has_image_extension(modules.globals.target_path): if modules.globals.nsfw_filter and ui.check_and_ignore_nsfw(modules.globals.target_path, destroy): @@ -193,26 +210,40 @@ def start() -> None: frame_processor.process_image(modules.globals.source_path, modules.globals.output_path, modules.globals.output_path) release_resources() if is_image(modules.globals.target_path): - update_status('Processing to image succeed!') + elapsed = time.time() - start_time + update_status(f'Processing to image succeed! (Time: {elapsed:.2f}s)') else: update_status('Processing to image failed!') return + # process image to videos if modules.globals.nsfw_filter and ui.check_and_ignore_nsfw(modules.globals.target_path, destroy): return + extraction_start = time.time() if not modules.globals.map_faces: update_status('Creating temp resources...') create_temp(modules.globals.target_path) update_status('Extracting frames...') extract_frames(modules.globals.target_path) + extraction_time = time.time() - extraction_start + update_status(f'Frame extraction completed in {extraction_time:.2f}s') temp_frame_paths = get_temp_frame_paths(modules.globals.target_path) + total_frames = len(temp_frame_paths) + update_status(f'Processing {total_frames} frames with {modules.globals.execution_threads} threads...') + + processing_start = time.time() for frame_processor in get_frame_processors_modules(modules.globals.frame_processors): update_status('Progressing...', frame_processor.NAME) frame_processor.process_video(modules.globals.source_path, temp_frame_paths) release_resources() + processing_time = time.time() - processing_start + fps_processing = total_frames / processing_time if processing_time > 0 else 0 + update_status(f'Frame processing completed in {processing_time:.2f}s ({fps_processing:.2f} fps)') + # handles fps + encoding_start = time.time() if modules.globals.keep_fps: update_status('Detecting fps...') fps = detect_fps(modules.globals.target_path) @@ -221,6 +252,9 @@ def start() -> None: else: update_status('Creating video with 30.0 fps...') create_video(modules.globals.target_path) + encoding_time = time.time() - encoding_start + update_status(f'Video encoding completed in {encoding_time:.2f}s') + # handle audio if modules.globals.keep_audio: if modules.globals.keep_fps: @@ -230,10 +264,13 @@ def start() -> None: restore_audio(modules.globals.target_path, modules.globals.output_path) else: move_temp(modules.globals.target_path, modules.globals.output_path) + # clean and validate clean_temp(modules.globals.target_path) + + total_time = time.time() - start_time if is_video(modules.globals.target_path): - update_status('Processing to video succeed!') + update_status(f'Processing to video succeed! Total time: {total_time:.2f}s') else: update_status('Processing to video failed!') diff --git a/modules/face_analyser.py b/modules/face_analyser.py index ef124d5..f150d35 100644 --- a/modules/face_analyser.py +++ b/modules/face_analyser.py @@ -2,6 +2,7 @@ import os import shutil from typing import Any import insightface +import threading import cv2 import numpy as np @@ -13,14 +14,22 @@ from modules.utilities import get_temp_directory_path, create_temp, extract_fram from pathlib import Path FACE_ANALYSER = None +FACE_ANALYSER_LOCK = threading.Lock() def get_face_analyser() -> Any: + """Get face analyser with thread-safe initialization.""" global FACE_ANALYSER if FACE_ANALYSER is None: - FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=modules.globals.execution_providers) - FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640)) + with FACE_ANALYSER_LOCK: + # Double-check after acquiring lock + if FACE_ANALYSER is None: + FACE_ANALYSER = insightface.app.FaceAnalysis( + name='buffalo_l', + providers=modules.globals.execution_providers + ) + FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640)) return FACE_ANALYSER diff --git a/modules/metadata.py b/modules/metadata.py index e566d42..e538c7b 100644 --- a/modules/metadata.py +++ b/modules/metadata.py @@ -1,3 +1,3 @@ name = 'Deep-Live-Cam' -version = '2.0.1c' -edition = 'GitHub Edition' +version = '2.0.2c' +edition = 'GitHub Edition' \ No newline at end of file diff --git a/modules/processors/frame/core.py b/modules/processors/frame/core.py index 6d99fd1..2208109 100644 --- a/modules/processors/frame/core.py +++ b/modules/processors/frame/core.py @@ -67,13 +67,29 @@ def set_frame_processors_modules_from_ui(frame_processors: List[str]) -> None: print(f"Warning: Error removing frame processor {frame_processor}: {e}") def multi_process_frame(source_path: str, temp_frame_paths: List[str], process_frames: Callable[[str, List[str], Any], None], progress: Any = None) -> None: - with ThreadPoolExecutor(max_workers=modules.globals.execution_threads) as executor: - futures = [] - for path in temp_frame_paths: - future = executor.submit(process_frames, source_path, [path], progress) - futures.append(future) - for future in futures: - future.result() + """Process frames in parallel with optimized batching and memory management.""" + max_workers = modules.globals.execution_threads + + # Determine optimal batch size based on available memory and thread count + # Process frames in batches to avoid memory overflow + batch_size = max(1, min(32, len(temp_frame_paths) // max(1, max_workers))) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Process in batches to manage memory better + for i in range(0, len(temp_frame_paths), batch_size): + batch = temp_frame_paths[i:i + batch_size] + futures = [] + + for path in batch: + future = executor.submit(process_frames, source_path, [path], progress) + futures.append(future) + + # Wait for batch to complete before starting next batch + for future in futures: + try: + future.result() + except Exception as e: + print(f"Error processing frame: {e}") def process_video(source_path: str, frame_paths: list[str], process_frames: Callable[[str, List[str], Any], None]) -> None: diff --git a/modules/processors/frame/face_swapper.py b/modules/processors/frame/face_swapper.py index bf8c787..427507f 100644 --- a/modules/processors/frame/face_swapper.py +++ b/modules/processors/frame/face_swapper.py @@ -113,6 +113,7 @@ def get_face_swapper() -> Any: def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: + """Optimized face swapping with better memory management and performance.""" face_swapper = get_face_swapper() if face_swapper is None: update_status("Face swapper model not loaded or failed to load. Skipping swap.", NAME) @@ -127,9 +128,8 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: # Apply the face swap with optimized memory handling try: - # For Apple Silicon, use optimized inference - if IS_APPLE_SILICON: - # Ensure contiguous memory layout for better performance + # Ensure contiguous memory layout for better performance on all platforms + if not temp_frame.flags['C_CONTIGUOUS']: temp_frame = np.ascontiguousarray(temp_frame) swapped_frame_raw = face_swapper.get( @@ -532,6 +532,7 @@ def process_frames( ) -> None: """ Processes a list of frame paths (typically for video). + Optimized with better memory management and caching. Iterates through frames, applies the appropriate swapping logic based on globals, and saves the result back to the frame path. Handles multi-threading via caller. """ @@ -555,6 +556,8 @@ def process_frames( if source_face is None: # Specific message for no face detected after successful read update_status(f"Warning: Successfully read source image {source_path}, but no face was detected. Swaps will be skipped.", NAME) + # Free memory immediately after extracting face + del source_img except Exception as e: # Print the specific exception caught import traceback @@ -582,6 +585,7 @@ def process_frames( # update_status(f"Processing frame {i+1}/{total_frames}: {os.path.basename(temp_frame_path)}", NAME) # Optional Debug # Read the target frame + temp_frame = None try: temp_frame = cv2.imread(temp_frame_path) if temp_frame is None: @@ -616,13 +620,19 @@ def process_frames( # traceback.print_exc() result_frame = temp_frame # Use original frame on processing error - # Write the result back to the same frame path + # Write the result back to the same frame path with optimized compression try: - write_success = cv2.imwrite(temp_frame_path, result_frame) + # Use PNG compression level 3 (faster) instead of default 9 + write_success = cv2.imwrite(temp_frame_path, result_frame, [cv2.IMWRITE_PNG_COMPRESSION, 3]) if not write_success: print(f"{NAME}: Error: Failed to write processed frame to {temp_frame_path}") except Exception as write_e: print(f"{NAME}: Error writing frame {temp_frame_path}: {write_e}") + + # Free memory immediately after processing + del temp_frame + if result_frame is not None: + del result_frame # Update progress bar if progress: diff --git a/modules/utilities.py b/modules/utilities.py index fe17997..dbb58c7 100644 --- a/modules/utilities.py +++ b/modules/utilities.py @@ -21,13 +21,14 @@ if platform.system().lower() == "darwin": def run_ffmpeg(args: List[str]) -> bool: + """Run ffmpeg with hardware acceleration and optimized settings.""" commands = [ "ffmpeg", "-hide_banner", - "-hwaccel", - "auto", - "-loglevel", - modules.globals.log_level, + "-hwaccel", "auto", # Auto-detect hardware acceleration + "-hwaccel_output_format", "auto", # Use hardware format when possible + "-threads", str(modules.globals.execution_threads or 0), # 0 = auto-detect optimal thread count + "-loglevel", modules.globals.log_level, ] commands.extend(args) try: @@ -61,39 +62,131 @@ def detect_fps(target_path: str) -> float: def extract_frames(target_path: str) -> None: + """Extract frames with hardware acceleration and optimized settings.""" temp_directory_path = get_temp_directory_path(target_path) + + # Use hardware-accelerated decoding and optimized pixel format run_ffmpeg( [ - "-i", - target_path, - "-pix_fmt", - "rgb24", + "-i", target_path, + "-vf", "format=rgb24", # Use video filter for format conversion (faster) + "-vsync", "0", # Prevent frame duplication + "-frame_pts", "1", # Preserve frame timing os.path.join(temp_directory_path, "%04d.png"), ] ) def create_video(target_path: str, fps: float = 30.0) -> None: + """Create video with hardware-accelerated encoding and optimized settings.""" temp_output_path = get_temp_output_path(target_path) temp_directory_path = get_temp_directory_path(target_path) - run_ffmpeg( - [ - "-r", - str(fps), - "-i", - os.path.join(temp_directory_path, "%04d.png"), - "-c:v", - modules.globals.video_encoder, - "-crf", - str(modules.globals.video_quality), - "-pix_fmt", - "yuv420p", - "-vf", - "colorspace=bt709:iall=bt601-6-625:fast=1", + + # Determine optimal encoder based on available hardware + encoder = modules.globals.video_encoder + encoder_options = [] + + # GPU-accelerated encoding options + if 'CUDAExecutionProvider' in modules.globals.execution_providers: + # NVIDIA GPU encoding + if encoder == 'libx264': + encoder = 'h264_nvenc' + encoder_options = [ + "-preset", "p7", # Highest quality preset for NVENC + "-tune", "hq", # High quality tuning + "-rc", "vbr", # Variable bitrate + "-cq", str(modules.globals.video_quality), # Quality level + "-b:v", "0", # Let CQ control bitrate + "-multipass", "fullres", # Two-pass encoding for better quality + ] + elif encoder == 'libx265': + encoder = 'hevc_nvenc' + encoder_options = [ + "-preset", "p7", + "-tune", "hq", + "-rc", "vbr", + "-cq", str(modules.globals.video_quality), + "-b:v", "0", + ] + elif 'DmlExecutionProvider' in modules.globals.execution_providers: + # AMD/Intel GPU encoding (DirectML on Windows) + if encoder == 'libx264': + # Try AMD AMF encoder + encoder = 'h264_amf' + encoder_options = [ + "-quality", "quality", # Quality mode + "-rc", "vbr_latency", + "-qp_i", str(modules.globals.video_quality), + "-qp_p", str(modules.globals.video_quality), + ] + elif encoder == 'libx265': + encoder = 'hevc_amf' + encoder_options = [ + "-quality", "quality", + "-rc", "vbr_latency", + "-qp_i", str(modules.globals.video_quality), + "-qp_p", str(modules.globals.video_quality), + ] + else: + # CPU encoding with optimized settings + if encoder == 'libx264': + encoder_options = [ + "-preset", "medium", # Balance speed/quality + "-crf", str(modules.globals.video_quality), + "-tune", "film", # Optimize for film content + ] + elif encoder == 'libx265': + encoder_options = [ + "-preset", "medium", + "-crf", str(modules.globals.video_quality), + "-x265-params", "log-level=error", + ] + elif encoder == 'libvpx-vp9': + encoder_options = [ + "-crf", str(modules.globals.video_quality), + "-b:v", "0", # Constant quality mode + "-cpu-used", "2", # Speed vs quality (0-5, lower=slower/better) + ] + + # Build ffmpeg command + ffmpeg_args = [ + "-r", str(fps), + "-i", os.path.join(temp_directory_path, "%04d.png"), + "-c:v", encoder, + ] + + # Add encoder-specific options + ffmpeg_args.extend(encoder_options) + + # Add common options + ffmpeg_args.extend([ + "-pix_fmt", "yuv420p", + "-movflags", "+faststart", # Enable fast start for web playback + "-vf", "colorspace=bt709:iall=bt601-6-625:fast=1", + "-y", + temp_output_path, + ]) + + # Try with hardware encoder first, fallback to software if it fails + success = run_ffmpeg(ffmpeg_args) + + if not success and encoder in ['h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf']: + # Fallback to software encoding + print(f"Hardware encoding with {encoder} failed, falling back to software encoding...") + fallback_encoder = 'libx264' if 'h264' in encoder else 'libx265' + ffmpeg_args_fallback = [ + "-r", str(fps), + "-i", os.path.join(temp_directory_path, "%04d.png"), + "-c:v", fallback_encoder, + "-preset", "medium", + "-crf", str(modules.globals.video_quality), + "-pix_fmt", "yuv420p", + "-movflags", "+faststart", + "-vf", "colorspace=bt709:iall=bt601-6-625:fast=1", "-y", temp_output_path, ] - ) + run_ffmpeg(ffmpeg_args_fallback) def restore_audio(target_path: str, output_path: str) -> None: