From ca6cba9311078cb6a32aa69f133a1be6c4e92aab Mon Sep 17 00:00:00 2001 From: Lauri Gates Date: Sun, 22 Feb 2026 18:41:47 +0200 Subject: [PATCH] perf(ui): decouple face detection from swap in live webcam pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a dedicated detection thread that runs face detection continuously on the latest captured frame and publishes results to a shared dict. The processing/swap thread reads cached detection results instead of running detection inline, so it never blocks on the 15-30ms detection cost. Architecture change: 2 threads → 3 threads Before: capture → [detect + swap] → display After: capture → swap (uses cached detections) → display ↘ detect (async, writes to shared cache) ↗ Also replaces the blocking while/ROOT.update() display loop with ROOT.after()-based scheduling, which avoids Tk event loop re-entrancy issues and UI freezes. Closes #1664 --- modules/ui.py | 127 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 74681bc..e776608 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -997,28 +997,48 @@ def _capture_thread_func(cap, capture_queue, stop_event): pass -# How often to run full face detection. On intermediate frames the last -# detected face positions are reused, which significantly reduces the -# per-frame cost of the processing thread. -DETECT_EVERY_N = 2 +def _detection_thread_func(latest_frame_holder, detection_result, detection_lock, stop_event): + """Detection thread: continuously runs face detection on the latest + captured frame and stores results in detection_result under detection_lock. + + This decouples face detection (~15-30ms) from face swapping (~5-10ms) + so the swap loop never blocks on detection, significantly improving + live mode FPS.""" + while not stop_event.is_set(): + with detection_lock: + frame = latest_frame_holder[0] + + if frame is None: + time.sleep(0.005) + continue + + if modules.globals.many_faces: + many = get_many_faces(frame) + with detection_lock: + detection_result['target_face'] = None + detection_result['many_faces'] = many + else: + face = get_one_face(frame) + with detection_lock: + detection_result['target_face'] = face + detection_result['many_faces'] = None -def _processing_thread_func(capture_queue, processed_queue, stop_event): - """Processing thread: takes raw frames from capture_queue, applies face - processing, and puts results into processed_queue. Drops processed frames - when the output queue is full so the UI always gets the latest result. +def _processing_thread_func(capture_queue, processed_queue, stop_event, + latest_frame_holder, detection_result, detection_lock): + """Processing thread: takes raw frames from capture_queue, reads the + latest detection result from the shared detection_result dict, applies + face swap/enhancement, and puts results into processed_queue. - Uses DETECT_EVERY_N to skip expensive face detection on intermediate - frames, reusing cached face positions instead.""" + Face detection runs concurrently in _detection_thread_func — this thread + only reads cached results so it never blocks on detection.""" frame_processors = get_frame_processors_modules(modules.globals.frame_processors) source_image = None + last_source_path = None prev_time = time.time() fps_update_interval = 0.5 frame_count = 0 fps = 0 - proc_frame_index = 0 - cached_target_face = None # cached single-face result - cached_many_faces = None # cached many-faces result while not stop_event.is_set(): try: @@ -1026,32 +1046,31 @@ def _processing_thread_func(capture_queue, processed_queue, stop_event): except queue.Empty: continue - temp_frame = frame.copy() - run_detection = (proc_frame_index % DETECT_EVERY_N == 0) - proc_frame_index += 1 + temp_frame = frame if modules.globals.live_mirror: temp_frame = gpu_flip(temp_frame, 1) + # Publish the mirrored frame for the detection thread to pick up + with detection_lock: + latest_frame_holder[0] = temp_frame + if not modules.globals.map_faces: - if source_image is None and modules.globals.source_path: + if modules.globals.source_path and modules.globals.source_path != last_source_path: + last_source_path = modules.globals.source_path source_image = get_one_face(cv2.imread(modules.globals.source_path)) - # Update face detection cache on detection frames - if run_detection or (cached_target_face is None and cached_many_faces is None): - if modules.globals.many_faces: - cached_many_faces = get_many_faces(temp_frame) - cached_target_face = None - else: - cached_target_face = get_one_face(temp_frame) - cached_many_faces = None + # Read latest detection results (brief lock to avoid blocking detection thread) + with detection_lock: + cached_target_face = detection_result.get('target_face') + cached_many_faces = detection_result.get('many_faces') for frame_processor in frame_processors: if frame_processor.NAME == "DLC.FACE-ENHANCER": if modules.globals.fp_ui["face_enhancer"]: temp_frame = frame_processor.process_frame(None, temp_frame) elif frame_processor.NAME == "DLC.FACE-SWAPPER": - # Use cached face positions to skip redundant detection + # Use cached face positions from detection thread swapped_bboxes = [] if modules.globals.many_faces and cached_many_faces: result = temp_frame.copy() @@ -1127,6 +1146,14 @@ def create_webcam_preview(camera_index: int): processed_queue = queue.Queue(maxsize=2) stop_event = threading.Event() + # Shared state for the detection pipeline. + # latest_frame_holder[0] is the most recent raw frame for the detection + # thread; detection_result holds the last detected faces for the + # processing thread to read. Both are guarded by detection_lock. + detection_lock = threading.Lock() + latest_frame_holder = [None] + detection_result = {'target_face': None, 'many_faces': None} + # Start capture thread cap_thread = threading.Thread( target=_capture_thread_func, @@ -1135,21 +1162,45 @@ def create_webcam_preview(camera_index: int): ) cap_thread.start() + # Start detection thread — runs face detection asynchronously so the + # processing/swap thread never blocks on it + det_thread = threading.Thread( + target=_detection_thread_func, + args=(latest_frame_holder, detection_result, detection_lock, stop_event), + daemon=True, + ) + det_thread.start() + # Start processing thread proc_thread = threading.Thread( target=_processing_thread_func, - args=(capture_queue, processed_queue, stop_event), + args=(capture_queue, processed_queue, stop_event, + latest_frame_holder, detection_result, detection_lock), daemon=True, ) proc_thread.start() - # Main (UI) thread: pull processed frames and update the display - while not stop_event.is_set(): + # Cleanup helper called from the display loop when preview closes + def _cleanup(): + stop_event.set() + cap_thread.join(timeout=2.0) + det_thread.join(timeout=2.0) + proc_thread.join(timeout=2.0) + cap.release() + PREVIEW.withdraw() + + # Non-blocking display loop using ROOT.after() — avoids blocking the + # Tk event loop which could cause UI freezes or re-entrancy issues + def _display_next_frame(): + if stop_event.is_set() or PREVIEW.state() == "withdrawn": + _cleanup() + return + try: - temp_frame = processed_queue.get(timeout=0.03) + temp_frame = processed_queue.get_nowait() except queue.Empty: - ROOT.update() - continue + ROOT.after(16, _display_next_frame) + return if modules.globals.live_resizable: temp_frame = fit_image_to_size( @@ -1167,17 +1218,11 @@ def create_webcam_preview(camera_index: int): ) image = ctk.CTkImage(image, size=image.size) preview_label.configure(image=image) - ROOT.update() - if PREVIEW.state() == "withdrawn": - break + ROOT.after(16, _display_next_frame) - # Signal threads to stop and wait for them - stop_event.set() - cap_thread.join(timeout=2.0) - proc_thread.join(timeout=2.0) - cap.release() - PREVIEW.withdraw() + # Kick off the non-blocking display loop + ROOT.after(0, _display_next_frame) def create_source_target_popup_for_webcam(