From eac2ad23077e697d798f97342deaf912bcdaab37 Mon Sep 17 00:00:00 2001 From: ozp3 Date: Sat, 28 Mar 2026 13:09:20 +0300 Subject: [PATCH] feat: AMD DML optimization - GPU face detection, detection throttle, pre-load fix --- DeepLiveCam.lnk | Bin 0 -> 1110 bytes modules/core.py | 7 +++-- modules/face_analyser.py | 10 +++++-- modules/globals.py | 3 ++ modules/processors/frame/face_swapper.py | 10 +++---- modules/ui.py | 34 ++++++++++++++++++----- run-dml.bat | 5 ++++ 7 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 DeepLiveCam.lnk create mode 100644 run-dml.bat diff --git a/DeepLiveCam.lnk b/DeepLiveCam.lnk new file mode 100644 index 0000000000000000000000000000000000000000..f89232025d1cbdbf4ad98bdc64529fa8744bf813 GIT binary patch literal 1110 zcmbtTPe@a79RED?Fhgr2jasv25Lxub7Ia9E2Aer((!fTeUg(=!xZJ7wxK0SpX z8c>h5c{9Le3hQz|;(W4D`CbAelvVhkBgJS3qBuwyxTINHoX8EQ)mSR&it2d;DpkZI zkSaeWrNoMmsxwu%f*gjCX66|4CgEaU9eKvs35{?NGNo{pkaL=_5gUjZ-|9UpIeZ;j z*ofy0E#%YuabqZ-$&y!%uT?jRCf3qwRBz*)9rT7_aOxbXh7fHU None: for frame_processor in get_frame_processors_modules(modules.globals.frame_processors): if not frame_processor.pre_check(): return + # Pre-load face analyser in main thread before GUI starts + #from modules.face_analyser import get_face_analyser + #get_face_analyser() limit_resources() if modules.globals.headless: start() else: window = ui.init(start, destroy, modules.globals.lang) - window.mainloop() + window.mainloop() \ No newline at end of file diff --git a/modules/face_analyser.py b/modules/face_analyser.py index 696398b..5860880 100644 --- a/modules/face_analyser.py +++ b/modules/face_analyser.py @@ -35,7 +35,9 @@ def get_face_analyser() -> Any: def get_one_face(frame: Frame) -> Any: - face = get_face_analyser().get(frame) + import modules.globals as g + with g.dml_lock: + face = get_face_analyser().get(frame) try: return min(face, key=lambda x: x.bbox[0]) except ValueError: @@ -43,8 +45,10 @@ def get_one_face(frame: Frame) -> Any: def get_many_faces(frame: Frame) -> Any: + import modules.globals as g try: - return get_face_analyser().get(frame) + with g.dml_lock: + return get_face_analyser().get(frame) except IndexError: return None @@ -196,4 +200,4 @@ def dump_faces(centroids: Any, frame_face_embeddings: list): if temp_frame[int(y_min):int(y_max), int(x_min):int(x_max)].size > 0: cv2.imwrite(temp_directory_path + f"/{i}/{frame['frame']}_{j}.png", temp_frame[int(y_min):int(y_max), int(x_min):int(x_max)]) - j += 1 \ No newline at end of file + j += 1 diff --git a/modules/globals.py b/modules/globals.py index aabc19a..3d88931 100644 --- a/modules/globals.py +++ b/modules/globals.py @@ -71,3 +71,6 @@ interpolation_weight: float = 0 # Blend weight for current frame (0.0-1.0). Low # --- END: Added for Frame Interpolation --- # --- END OF FILE globals.py --- + +import threading +dml_lock = threading.Lock() diff --git a/modules/processors/frame/face_swapper.py b/modules/processors/frame/face_swapper.py index 04f846b..57bf508 100644 --- a/modules/processors/frame/face_swapper.py +++ b/modules/processors/frame/face_swapper.py @@ -110,7 +110,6 @@ def get_face_swapper() -> Any: )) else: providers_config.append(p) - FACE_SWAPPER = insightface.model_zoo.get_model( model_path, providers=providers_config, @@ -153,9 +152,10 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: if not temp_frame.flags['C_CONTIGUOUS']: temp_frame = np.ascontiguousarray(temp_frame) - swapped_frame_raw = face_swapper.get( - temp_frame, target_face, source_face, paste_back=True - ) + with modules.globals.dml_lock: + swapped_frame_raw = face_swapper.get( + temp_frame, target_face, source_face, paste_back=True + ) # --- START: CRITICAL FIX FOR ORT 1.17 --- # Check the output type and range from the model @@ -1183,4 +1183,4 @@ def apply_color_transfer(source, target): # traceback.print_exc() return source - return result_bgr \ No newline at end of file + return result_bgr diff --git a/modules/ui.py b/modules/ui.py index 7143076..6041fc4 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -72,8 +72,8 @@ ROOT_WIDTH = 600 PREVIEW = None PREVIEW_MAX_HEIGHT = 700 PREVIEW_MAX_WIDTH = 1200 -PREVIEW_DEFAULT_WIDTH = 960 -PREVIEW_DEFAULT_HEIGHT = 540 +PREVIEW_DEFAULT_WIDTH = 640 +PREVIEW_DEFAULT_HEIGHT = 360 POPUP_WIDTH = 750 POPUP_HEIGHT = 810 @@ -1000,6 +1000,10 @@ def webcam_preview(root: ctk.CTk, camera_index: int): if modules.globals.source_path is None: update_status("Please select a source image first") return + from modules.processors.frame.face_swapper import get_face_swapper + from modules.face_analyser import get_face_analyser + get_face_analyser() + get_face_swapper() create_webcam_preview(camera_index) else: modules.globals.source_target_map = [] @@ -1105,7 +1109,7 @@ def _detection_thread_func(latest_frame_holder, detection_result, detection_lock frame = latest_frame_holder[0] if frame is None: - time.sleep(0.005) + time.sleep(0.2) continue if modules.globals.many_faces: @@ -1157,7 +1161,22 @@ def _processing_thread_func(capture_queue, processed_queue, stop_event, source_image = get_one_face(cv2.imread(modules.globals.source_path)) # Read latest detection results (brief lock to avoid blocking detection thread) - with detection_lock: + # Run detection inline since detection thread is disabled + # Run detection every 3 frames, reuse cached result otherwise + if not hasattr(_processing_thread_func, '_det_count'): + _processing_thread_func._det_count = 0 + _processing_thread_func._det_count += 1 + + if _processing_thread_func._det_count % 3 == 0: + if modules.globals.many_faces: + cached_target_face = None + cached_many_faces = get_many_faces(temp_frame) + detection_result['many_faces'] = cached_many_faces + else: + cached_target_face = get_one_face(temp_frame) + cached_many_faces = None + detection_result['target_face'] = cached_target_face + else: cached_target_face = detection_result.get('target_face') cached_many_faces = detection_result.get('many_faces') @@ -1275,7 +1294,7 @@ def create_webcam_preview(camera_index: int): args=(latest_frame_holder, detection_result, detection_lock, stop_event), daemon=True, ) - det_thread.start() + # det_thread.start() # Start processing thread proc_thread = threading.Thread( @@ -1316,7 +1335,7 @@ def create_webcam_preview(camera_index: int): temp_frame = fit_image_to_size( temp_frame, PREVIEW.winfo_width(), PREVIEW.winfo_height() ) - + temp_frame = temp_frame.copy() image = gpu_cvt_color(temp_frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image = ImageOps.contain( @@ -1574,4 +1593,5 @@ def update_webcam_target( target_label_dict_live[button_num] = target_image else: update_pop_live_status("Face could not be detected in last upload!") - return map \ No newline at end of file + return map + diff --git a/run-dml.bat b/run-dml.bat new file mode 100644 index 0000000..68a67be --- /dev/null +++ b/run-dml.bat @@ -0,0 +1,5 @@ +@echo off +cd /d "%~dp0" +call venv\Scripts\activate +python run.py --execution-provider dml +pause