implement stabilizer

2026-08-01 22:58:35 +02:00 · 2026-06-09 22:09:35 +05:30
parent 87f561b1e5
commit c70dd25f83
9 changed files with 68 additions and 10 deletions
@@ -43,9 +43,9 @@ def detect_local_camera_ids(id_start : int, id_end : int) -> List[int]:
 	local_camera_ids = []

 	for camera_id in range(id_start, id_end):
-		cv2.utils.logging.setLogLevel(0)
+		cv2.setLogLevel(0)
 		camera_capture = get_local_camera_capture(camera_id)
-		cv2.utils.logging.setLogLevel(3)
+		cv2.setLogLevel(3)

 		if camera_capture and camera_capture.isOpened():
 			local_camera_ids.append(camera_id)
@@ -6,6 +6,7 @@ import facefusion.choices
 from facefusion import state_manager
 from facefusion.common_helper import get_first
 from facefusion.face_analyser import get_many_faces, get_one_face, get_static_faces
+from facefusion.face_stabilizer import stabilize_faces
 from facefusion.types import Face, FaceSelectorOrder, Gender, Race, Score, VisionFrame


@@ -13,6 +14,9 @@ def select_faces(reference_vision_frame : VisionFrame, source_vision_frames : Li
 	source_faces = get_static_faces(source_vision_frames)
 	target_faces = get_many_faces([ target_vision_frame ])

+	if state_manager.get_item('face_stabilizer'):
+		target_faces = stabilize_faces(target_faces, (target_vision_frame.shape[1], target_vision_frame.shape[0]), state_manager.get_item('face_stabilizer_smoothness'))
+
 	if state_manager.get_item('face_selector_mode') == 'many':
 		return sort_and_filter_faces(source_faces, target_faces)

@@ -262,6 +262,8 @@ LOCALES : Locales =\
 			'trim_frame_slider': 'TRIM FRAME',
 			'ui_workflow': 'UI WORKFLOW',
 			'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY',
+			'webcam_face_stabilizer_checkbox': 'WEBCAM FACE STABILIZER',
+			'webcam_face_stabilizer_smoothness_slider': 'WEBCAM FACE STABILIZER SMOOTHNESS',
 			'webcam_fps_slider': 'WEBCAM FPS',
 			'webcam_image': 'WEBCAM',
 			'webcam_device_id_dropdown': 'WEBCAM DEVICE ID',
@@ -10,6 +10,7 @@ from facefusion.face_analyser import scale_face
 from facefusion.face_helper import warp_face_by_face_landmark_5
 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask
 from facefusion.face_selector import select_faces
+from facefusion.face_stabilizer import get_face_stabilizer_trail
 from facefusion.filesystem import in_directory, is_image, is_video, same_file_extension
 from facefusion.processors.modules.face_debugger import choices as face_debugger_choices
 from facefusion.processors.modules.face_debugger.types import FaceDebuggerInputs
@@ -68,9 +69,12 @@ def post_process() -> None:
 		face_recognizer.clear_inference_pool()


-def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
+def debug_face(face_key : str, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
 	face_debugger_items = state_manager.get_item('face_debugger_items')

+	if 'face-stabilizer' in face_debugger_items:
+		temp_vision_frame = draw_face_stabilizer(face_key, temp_vision_frame)
+
 	if 'bounding-box' in face_debugger_items:
 		temp_vision_frame = draw_bounding_box(target_face, temp_vision_frame)

@@ -114,6 +118,23 @@ def draw_bounding_box(target_face : Face, temp_vision_frame : VisionFrame) -> Vi
 	return temp_vision_frame


+def draw_face_stabilizer(face_key : str, temp_vision_frame : VisionFrame) -> VisionFrame:
+	temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame)
+	face_stabilizer_trail = get_face_stabilizer_trail(face_key)
+	frame_height, frame_width = temp_vision_frame.shape[:2]
+	line_scale = 1
+	raw_color = 0, 0, 255
+	smoothed_color = 0, 255, 0
+
+	if face_stabilizer_trail:
+		raw_points = (numpy.array([ raw_point for raw_point, _ in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32)
+		smoothed_points = (numpy.array([ smoothed_point for _, smoothed_point in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32)
+		cv2.polylines(temp_vision_frame, [ raw_points ], False, raw_color, line_scale)
+		cv2.polylines(temp_vision_frame, [ smoothed_points ], False, smoothed_color, line_scale)
+
+	return temp_vision_frame
+
+
 def draw_face_mask(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
 	crop_masks = []
 	temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame)
@@ -239,8 +260,8 @@ def process_frame(inputs : FaceDebuggerInputs) -> ProcessorOutputs:
 	target_faces = select_faces(reference_vision_frame, source_vision_frames, target_vision_frame)

 	if target_faces:
-		for target_face in target_faces:
+		for face_index, target_face in enumerate(target_faces):
 			target_face = scale_face(target_face, target_vision_frame, temp_vision_frame)
-			temp_vision_frame = debug_face(target_face, temp_vision_frame)
+			temp_vision_frame = debug_face(str(face_index), target_face, temp_vision_frame)

 	return temp_vision_frame, temp_vision_mask
@@ -11,4 +11,4 @@ FaceDebuggerInputs = TypedDict('FaceDebuggerInputs',
 	'temp_vision_mask' : Mask
 })

-FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask']
+FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-stabilizer']
@@ -11,6 +11,7 @@ from tqdm import tqdm
 from facefusion import ffmpeg_builder, logger, state_manager, translator
 from facefusion.audio import create_empty_audio_frame
 from facefusion.content_analyser import analyse_stream
+from facefusion.face_stabilizer import clear_face_stabilizer
 from facefusion.ffmpeg import open_ffmpeg
 from facefusion.filesystem import is_directory
 from facefusion.processors.core import get_processors_modules
@@ -21,6 +22,7 @@ from facefusion.vision import extract_vision_mask, read_static_images
 def multi_process_capture(camera_capture : cv2.VideoCapture, camera_fps : Fps) -> Iterator[VisionFrame]:
 	capture_deque : Deque[VisionFrame] = deque()
 	source_vision_frames = read_static_images(state_manager.get_item('source_paths'))
+	clear_face_stabilizer()

 	with tqdm(desc = translator.get('streaming'), unit = 'frame', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress:
 		with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor:
@@ -1,5 +1,5 @@
 from collections import namedtuple
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict
+from typing import Any, Callable, Deque, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict

 import cv2
 import numpy
@@ -76,6 +76,10 @@ Distance : TypeAlias = NDArray[Any]
 Matrix : TypeAlias = NDArray[Any]
 Anchors : TypeAlias = NDArray[Any]
 Translation : TypeAlias = NDArray[Any]
+FaceStabilizerState : TypeAlias = Tuple[Points, Points, float]
+FaceStabilizerStore : TypeAlias = Dict[str, FaceStabilizerState]
+FaceStabilizerTrailEntry : TypeAlias = Deque[Tuple[Points, Points]]
+FaceStabilizerTrail : TypeAlias = Dict[str, FaceStabilizerTrailEntry]

 AudioBuffer : TypeAlias = bytes
 Audio : TypeAlias = NDArray[Any]
@@ -298,6 +302,8 @@ StateKey = Literal\
 	'face_mask_regions',
 	'face_mask_blur',
 	'face_mask_padding',
+	'face_stabilizer',
+	'face_stabilizer_smoothness',
 	'voice_extractor_model',
 	'trim_frame_start',
 	'trim_frame_end',
@@ -367,6 +373,8 @@ State = TypedDict('State',
 	'face_mask_regions' : List[FaceMaskRegion],
 	'face_mask_blur' : float,
 	'face_mask_padding' : Padding,
+	'face_stabilizer' : bool,
+	'face_stabilizer_smoothness' : float,
 	'voice_extractor_model' : VoiceExtractorModel,
 	'trim_frame_start' : int,
 	'trim_frame_end' : int,
@@ -53,10 +53,12 @@ def listen() -> None:
 	webcam_mode_radio = get_ui_component('webcam_mode_radio')
 	webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown')
 	webcam_fps_slider = get_ui_component('webcam_fps_slider')
+	webcam_face_stabilizer_checkbox = get_ui_component('webcam_face_stabilizer_checkbox')
+	webcam_face_stabilizer_smoothness_slider = get_ui_component('webcam_face_stabilizer_smoothness_slider')

-	if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider:
+	if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider and webcam_face_stabilizer_checkbox and webcam_face_stabilizer_smoothness_slider:
 		WEBCAM_START_BUTTON.click(pre_start, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ])
-		start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE)
+		start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider, webcam_face_stabilizer_checkbox, webcam_face_stabilizer_smoothness_slider ], outputs = WEBCAM_IMAGE)
 		start_event.then(pre_stop)
 		WEBCAM_STOP_BUTTON.click(stop, cancels = start_event, outputs = WEBCAM_IMAGE)
 		WEBCAM_STOP_BUTTON.click(pre_stop, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ])
@@ -82,8 +84,10 @@ def pre_stop() -> Tuple[gradio.File, gradio.Image, gradio.Button, gradio.Button]
 	return gradio.File(visible = True), gradio.Image(visible = False), gradio.Button(visible = True), gradio.Button(visible = False)


-def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Iterator[VisionFrame]:
+def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps, webcam_face_stabilizer : bool, webcam_face_stabilizer_smoothness : float) -> Iterator[VisionFrame]:
 	state_manager.init_item('face_selector_mode', 'one')
+	state_manager.init_item('face_stabilizer', webcam_face_stabilizer)
+	state_manager.init_item('face_stabilizer_smoothness', webcam_face_stabilizer_smoothness)
 	state_manager.sync_state()

 	camera_capture = get_local_camera_capture(webcam_device_id)
@@ -12,6 +12,8 @@ WEBCAM_DEVICE_ID_DROPDOWN : Optional[gradio.Dropdown] = None
 WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None
 WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None
 WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None
+WEBCAM_FACE_STABILIZER_CHECKBOX : Optional[gradio.Checkbox] = None
+WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER : Optional[gradio.Slider] = None


 def render() -> None:
@@ -19,6 +21,8 @@ def render() -> None:
 	global WEBCAM_MODE_RADIO
 	global WEBCAM_RESOLUTION_DROPDOWN
 	global WEBCAM_FPS_SLIDER
+	global WEBCAM_FACE_STABILIZER_CHECKBOX
+	global WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER

 	local_camera_ids = detect_local_camera_ids(0, 10) or [ 'none' ] #type:ignore[list-item]
 	WEBCAM_DEVICE_ID_DROPDOWN = gradio.Dropdown(
@@ -43,7 +47,20 @@ def render() -> None:
 		minimum = 1,
 		maximum = 30
 	)
+	WEBCAM_FACE_STABILIZER_CHECKBOX = gradio.Checkbox(
+		label = translator.get('uis.webcam_face_stabilizer_checkbox'),
+		value = False
+	)
+	WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER = gradio.Slider(
+		label = translator.get('uis.webcam_face_stabilizer_smoothness_slider'),
+		value = 1.0,
+		step = 0.05,
+		minimum = 0.0,
+		maximum = 1.0
+	)
 	register_ui_component('webcam_device_id_dropdown', WEBCAM_DEVICE_ID_DROPDOWN)
 	register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO)
 	register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN)
 	register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER)
+	register_ui_component('webcam_face_stabilizer_checkbox', WEBCAM_FACE_STABILIZER_CHECKBOX)
+	register_ui_component('webcam_face_stabilizer_smoothness_slider', WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER)