diff --git a/facefusion/camera_manager.py b/facefusion/camera_manager.py index 04194329..a27e5fc0 100644 --- a/facefusion/camera_manager.py +++ b/facefusion/camera_manager.py @@ -43,9 +43,9 @@ def detect_local_camera_ids(id_start : int, id_end : int) -> List[int]: local_camera_ids = [] for camera_id in range(id_start, id_end): - cv2.utils.logging.setLogLevel(0) + cv2.setLogLevel(0) camera_capture = get_local_camera_capture(camera_id) - cv2.utils.logging.setLogLevel(3) + cv2.setLogLevel(3) if camera_capture and camera_capture.isOpened(): local_camera_ids.append(camera_id) diff --git a/facefusion/face_selector.py b/facefusion/face_selector.py index eeafdd2d..12225e46 100644 --- a/facefusion/face_selector.py +++ b/facefusion/face_selector.py @@ -6,6 +6,7 @@ import facefusion.choices from facefusion import state_manager from facefusion.common_helper import get_first from facefusion.face_analyser import get_many_faces, get_one_face, get_static_faces +from facefusion.face_stabilizer import stabilize_faces from facefusion.types import Face, FaceSelectorOrder, Gender, Race, Score, VisionFrame @@ -13,6 +14,9 @@ def select_faces(reference_vision_frame : VisionFrame, source_vision_frames : Li source_faces = get_static_faces(source_vision_frames) target_faces = get_many_faces([ target_vision_frame ]) + if state_manager.get_item('face_stabilizer'): + target_faces = stabilize_faces(target_faces, (target_vision_frame.shape[1], target_vision_frame.shape[0]), state_manager.get_item('face_stabilizer_smoothness')) + if state_manager.get_item('face_selector_mode') == 'many': return sort_and_filter_faces(source_faces, target_faces) diff --git a/facefusion/locales.py b/facefusion/locales.py index de729569..612d59ea 100644 --- a/facefusion/locales.py +++ b/facefusion/locales.py @@ -262,6 +262,8 @@ LOCALES : Locales =\ 'trim_frame_slider': 'TRIM FRAME', 'ui_workflow': 'UI WORKFLOW', 'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY', + 'webcam_face_stabilizer_checkbox': 'WEBCAM FACE STABILIZER', + 'webcam_face_stabilizer_smoothness_slider': 'WEBCAM FACE STABILIZER SMOOTHNESS', 'webcam_fps_slider': 'WEBCAM FPS', 'webcam_image': 'WEBCAM', 'webcam_device_id_dropdown': 'WEBCAM DEVICE ID', diff --git a/facefusion/processors/modules/face_debugger/core.py b/facefusion/processors/modules/face_debugger/core.py index 61f8567c..c875614f 100755 --- a/facefusion/processors/modules/face_debugger/core.py +++ b/facefusion/processors/modules/face_debugger/core.py @@ -10,6 +10,7 @@ from facefusion.face_analyser import scale_face from facefusion.face_helper import warp_face_by_face_landmark_5 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask from facefusion.face_selector import select_faces +from facefusion.face_stabilizer import get_face_stabilizer_trail from facefusion.filesystem import in_directory, is_image, is_video, same_file_extension from facefusion.processors.modules.face_debugger import choices as face_debugger_choices from facefusion.processors.modules.face_debugger.types import FaceDebuggerInputs @@ -68,9 +69,12 @@ def post_process() -> None: face_recognizer.clear_inference_pool() -def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: +def debug_face(face_key : str, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: face_debugger_items = state_manager.get_item('face_debugger_items') + if 'face-stabilizer' in face_debugger_items: + temp_vision_frame = draw_face_stabilizer(face_key, temp_vision_frame) + if 'bounding-box' in face_debugger_items: temp_vision_frame = draw_bounding_box(target_face, temp_vision_frame) @@ -114,6 +118,23 @@ def draw_bounding_box(target_face : Face, temp_vision_frame : VisionFrame) -> Vi return temp_vision_frame +def draw_face_stabilizer(face_key : str, temp_vision_frame : VisionFrame) -> VisionFrame: + temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame) + face_stabilizer_trail = get_face_stabilizer_trail(face_key) + frame_height, frame_width = temp_vision_frame.shape[:2] + line_scale = 1 + raw_color = 0, 0, 255 + smoothed_color = 0, 255, 0 + + if face_stabilizer_trail: + raw_points = (numpy.array([ raw_point for raw_point, _ in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32) + smoothed_points = (numpy.array([ smoothed_point for _, smoothed_point in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32) + cv2.polylines(temp_vision_frame, [ raw_points ], False, raw_color, line_scale) + cv2.polylines(temp_vision_frame, [ smoothed_points ], False, smoothed_color, line_scale) + + return temp_vision_frame + + def draw_face_mask(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: crop_masks = [] temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame) @@ -239,8 +260,8 @@ def process_frame(inputs : FaceDebuggerInputs) -> ProcessorOutputs: target_faces = select_faces(reference_vision_frame, source_vision_frames, target_vision_frame) if target_faces: - for target_face in target_faces: + for face_index, target_face in enumerate(target_faces): target_face = scale_face(target_face, target_vision_frame, temp_vision_frame) - temp_vision_frame = debug_face(target_face, temp_vision_frame) + temp_vision_frame = debug_face(str(face_index), target_face, temp_vision_frame) return temp_vision_frame, temp_vision_mask diff --git a/facefusion/processors/modules/face_debugger/types.py b/facefusion/processors/modules/face_debugger/types.py index fcd9963f..5e6e4677 100644 --- a/facefusion/processors/modules/face_debugger/types.py +++ b/facefusion/processors/modules/face_debugger/types.py @@ -11,4 +11,4 @@ FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', 'temp_vision_mask' : Mask }) -FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask'] +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-stabilizer'] diff --git a/facefusion/streamer.py b/facefusion/streamer.py index 80c5b583..3a0233f2 100644 --- a/facefusion/streamer.py +++ b/facefusion/streamer.py @@ -11,6 +11,7 @@ from tqdm import tqdm from facefusion import ffmpeg_builder, logger, state_manager, translator from facefusion.audio import create_empty_audio_frame from facefusion.content_analyser import analyse_stream +from facefusion.face_stabilizer import clear_face_stabilizer from facefusion.ffmpeg import open_ffmpeg from facefusion.filesystem import is_directory from facefusion.processors.core import get_processors_modules @@ -21,6 +22,7 @@ from facefusion.vision import extract_vision_mask, read_static_images def multi_process_capture(camera_capture : cv2.VideoCapture, camera_fps : Fps) -> Iterator[VisionFrame]: capture_deque : Deque[VisionFrame] = deque() source_vision_frames = read_static_images(state_manager.get_item('source_paths')) + clear_face_stabilizer() with tqdm(desc = translator.get('streaming'), unit = 'frame', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor: diff --git a/facefusion/types.py b/facefusion/types.py index aa866cf2..e3587327 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -1,5 +1,5 @@ from collections import namedtuple -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict +from typing import Any, Callable, Deque, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict import cv2 import numpy @@ -76,6 +76,10 @@ Distance : TypeAlias = NDArray[Any] Matrix : TypeAlias = NDArray[Any] Anchors : TypeAlias = NDArray[Any] Translation : TypeAlias = NDArray[Any] +FaceStabilizerState : TypeAlias = Tuple[Points, Points, float] +FaceStabilizerStore : TypeAlias = Dict[str, FaceStabilizerState] +FaceStabilizerTrailEntry : TypeAlias = Deque[Tuple[Points, Points]] +FaceStabilizerTrail : TypeAlias = Dict[str, FaceStabilizerTrailEntry] AudioBuffer : TypeAlias = bytes Audio : TypeAlias = NDArray[Any] @@ -298,6 +302,8 @@ StateKey = Literal\ 'face_mask_regions', 'face_mask_blur', 'face_mask_padding', + 'face_stabilizer', + 'face_stabilizer_smoothness', 'voice_extractor_model', 'trim_frame_start', 'trim_frame_end', @@ -367,6 +373,8 @@ State = TypedDict('State', 'face_mask_regions' : List[FaceMaskRegion], 'face_mask_blur' : float, 'face_mask_padding' : Padding, + 'face_stabilizer' : bool, + 'face_stabilizer_smoothness' : float, 'voice_extractor_model' : VoiceExtractorModel, 'trim_frame_start' : int, 'trim_frame_end' : int, diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index d04fde65..40deeddd 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -53,10 +53,12 @@ def listen() -> None: webcam_mode_radio = get_ui_component('webcam_mode_radio') webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown') webcam_fps_slider = get_ui_component('webcam_fps_slider') + webcam_face_stabilizer_checkbox = get_ui_component('webcam_face_stabilizer_checkbox') + webcam_face_stabilizer_smoothness_slider = get_ui_component('webcam_face_stabilizer_smoothness_slider') - if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: + if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider and webcam_face_stabilizer_checkbox and webcam_face_stabilizer_smoothness_slider: WEBCAM_START_BUTTON.click(pre_start, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ]) - start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) + start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider, webcam_face_stabilizer_checkbox, webcam_face_stabilizer_smoothness_slider ], outputs = WEBCAM_IMAGE) start_event.then(pre_stop) WEBCAM_STOP_BUTTON.click(stop, cancels = start_event, outputs = WEBCAM_IMAGE) WEBCAM_STOP_BUTTON.click(pre_stop, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ]) @@ -82,8 +84,10 @@ def pre_stop() -> Tuple[gradio.File, gradio.Image, gradio.Button, gradio.Button] return gradio.File(visible = True), gradio.Image(visible = False), gradio.Button(visible = True), gradio.Button(visible = False) -def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Iterator[VisionFrame]: +def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps, webcam_face_stabilizer : bool, webcam_face_stabilizer_smoothness : float) -> Iterator[VisionFrame]: state_manager.init_item('face_selector_mode', 'one') + state_manager.init_item('face_stabilizer', webcam_face_stabilizer) + state_manager.init_item('face_stabilizer_smoothness', webcam_face_stabilizer_smoothness) state_manager.sync_state() camera_capture = get_local_camera_capture(webcam_device_id) diff --git a/facefusion/uis/components/webcam_options.py b/facefusion/uis/components/webcam_options.py index e3a8bf7f..7d9f4527 100644 --- a/facefusion/uis/components/webcam_options.py +++ b/facefusion/uis/components/webcam_options.py @@ -12,6 +12,8 @@ WEBCAM_DEVICE_ID_DROPDOWN : Optional[gradio.Dropdown] = None WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None +WEBCAM_FACE_STABILIZER_CHECKBOX : Optional[gradio.Checkbox] = None +WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER : Optional[gradio.Slider] = None def render() -> None: @@ -19,6 +21,8 @@ def render() -> None: global WEBCAM_MODE_RADIO global WEBCAM_RESOLUTION_DROPDOWN global WEBCAM_FPS_SLIDER + global WEBCAM_FACE_STABILIZER_CHECKBOX + global WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER local_camera_ids = detect_local_camera_ids(0, 10) or [ 'none' ] #type:ignore[list-item] WEBCAM_DEVICE_ID_DROPDOWN = gradio.Dropdown( @@ -43,7 +47,20 @@ def render() -> None: minimum = 1, maximum = 30 ) + WEBCAM_FACE_STABILIZER_CHECKBOX = gradio.Checkbox( + label = translator.get('uis.webcam_face_stabilizer_checkbox'), + value = False + ) + WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER = gradio.Slider( + label = translator.get('uis.webcam_face_stabilizer_smoothness_slider'), + value = 1.0, + step = 0.05, + minimum = 0.0, + maximum = 1.0 + ) register_ui_component('webcam_device_id_dropdown', WEBCAM_DEVICE_ID_DROPDOWN) register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO) register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN) register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER) + register_ui_component('webcam_face_stabilizer_checkbox', WEBCAM_FACE_STABILIZER_CHECKBOX) + register_ui_component('webcam_face_stabilizer_smoothness_slider', WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER)