implement stabilizer

This commit is contained in:
harisreedhar
2026-06-09 22:09:35 +05:30
parent 87f561b1e5
commit c70dd25f83
9 changed files with 68 additions and 10 deletions
+2 -2
View File
@@ -43,9 +43,9 @@ def detect_local_camera_ids(id_start : int, id_end : int) -> List[int]:
local_camera_ids = []
for camera_id in range(id_start, id_end):
cv2.utils.logging.setLogLevel(0)
cv2.setLogLevel(0)
camera_capture = get_local_camera_capture(camera_id)
cv2.utils.logging.setLogLevel(3)
cv2.setLogLevel(3)
if camera_capture and camera_capture.isOpened():
local_camera_ids.append(camera_id)
+4
View File
@@ -6,6 +6,7 @@ import facefusion.choices
from facefusion import state_manager
from facefusion.common_helper import get_first
from facefusion.face_analyser import get_many_faces, get_one_face, get_static_faces
from facefusion.face_stabilizer import stabilize_faces
from facefusion.types import Face, FaceSelectorOrder, Gender, Race, Score, VisionFrame
@@ -13,6 +14,9 @@ def select_faces(reference_vision_frame : VisionFrame, source_vision_frames : Li
source_faces = get_static_faces(source_vision_frames)
target_faces = get_many_faces([ target_vision_frame ])
if state_manager.get_item('face_stabilizer'):
target_faces = stabilize_faces(target_faces, (target_vision_frame.shape[1], target_vision_frame.shape[0]), state_manager.get_item('face_stabilizer_smoothness'))
if state_manager.get_item('face_selector_mode') == 'many':
return sort_and_filter_faces(source_faces, target_faces)
+2
View File
@@ -262,6 +262,8 @@ LOCALES : Locales =\
'trim_frame_slider': 'TRIM FRAME',
'ui_workflow': 'UI WORKFLOW',
'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY',
'webcam_face_stabilizer_checkbox': 'WEBCAM FACE STABILIZER',
'webcam_face_stabilizer_smoothness_slider': 'WEBCAM FACE STABILIZER SMOOTHNESS',
'webcam_fps_slider': 'WEBCAM FPS',
'webcam_image': 'WEBCAM',
'webcam_device_id_dropdown': 'WEBCAM DEVICE ID',
@@ -10,6 +10,7 @@ from facefusion.face_analyser import scale_face
from facefusion.face_helper import warp_face_by_face_landmark_5
from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask
from facefusion.face_selector import select_faces
from facefusion.face_stabilizer import get_face_stabilizer_trail
from facefusion.filesystem import in_directory, is_image, is_video, same_file_extension
from facefusion.processors.modules.face_debugger import choices as face_debugger_choices
from facefusion.processors.modules.face_debugger.types import FaceDebuggerInputs
@@ -68,9 +69,12 @@ def post_process() -> None:
face_recognizer.clear_inference_pool()
def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
def debug_face(face_key : str, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
face_debugger_items = state_manager.get_item('face_debugger_items')
if 'face-stabilizer' in face_debugger_items:
temp_vision_frame = draw_face_stabilizer(face_key, temp_vision_frame)
if 'bounding-box' in face_debugger_items:
temp_vision_frame = draw_bounding_box(target_face, temp_vision_frame)
@@ -114,6 +118,23 @@ def draw_bounding_box(target_face : Face, temp_vision_frame : VisionFrame) -> Vi
return temp_vision_frame
def draw_face_stabilizer(face_key : str, temp_vision_frame : VisionFrame) -> VisionFrame:
temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame)
face_stabilizer_trail = get_face_stabilizer_trail(face_key)
frame_height, frame_width = temp_vision_frame.shape[:2]
line_scale = 1
raw_color = 0, 0, 255
smoothed_color = 0, 255, 0
if face_stabilizer_trail:
raw_points = (numpy.array([ raw_point for raw_point, _ in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32)
smoothed_points = (numpy.array([ smoothed_point for _, smoothed_point in face_stabilizer_trail ]) * [ frame_width, frame_height ]).astype(numpy.int32)
cv2.polylines(temp_vision_frame, [ raw_points ], False, raw_color, line_scale)
cv2.polylines(temp_vision_frame, [ smoothed_points ], False, smoothed_color, line_scale)
return temp_vision_frame
def draw_face_mask(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame:
crop_masks = []
temp_vision_frame = numpy.ascontiguousarray(temp_vision_frame)
@@ -239,8 +260,8 @@ def process_frame(inputs : FaceDebuggerInputs) -> ProcessorOutputs:
target_faces = select_faces(reference_vision_frame, source_vision_frames, target_vision_frame)
if target_faces:
for target_face in target_faces:
for face_index, target_face in enumerate(target_faces):
target_face = scale_face(target_face, target_vision_frame, temp_vision_frame)
temp_vision_frame = debug_face(target_face, temp_vision_frame)
temp_vision_frame = debug_face(str(face_index), target_face, temp_vision_frame)
return temp_vision_frame, temp_vision_mask
@@ -11,4 +11,4 @@ FaceDebuggerInputs = TypedDict('FaceDebuggerInputs',
'temp_vision_mask' : Mask
})
FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask']
FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-stabilizer']
+2
View File
@@ -11,6 +11,7 @@ from tqdm import tqdm
from facefusion import ffmpeg_builder, logger, state_manager, translator
from facefusion.audio import create_empty_audio_frame
from facefusion.content_analyser import analyse_stream
from facefusion.face_stabilizer import clear_face_stabilizer
from facefusion.ffmpeg import open_ffmpeg
from facefusion.filesystem import is_directory
from facefusion.processors.core import get_processors_modules
@@ -21,6 +22,7 @@ from facefusion.vision import extract_vision_mask, read_static_images
def multi_process_capture(camera_capture : cv2.VideoCapture, camera_fps : Fps) -> Iterator[VisionFrame]:
capture_deque : Deque[VisionFrame] = deque()
source_vision_frames = read_static_images(state_manager.get_item('source_paths'))
clear_face_stabilizer()
with tqdm(desc = translator.get('streaming'), unit = 'frame', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress:
with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor:
+9 -1
View File
@@ -1,5 +1,5 @@
from collections import namedtuple
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict
from typing import Any, Callable, Deque, Dict, List, Literal, Optional, Tuple, TypeAlias, TypedDict
import cv2
import numpy
@@ -76,6 +76,10 @@ Distance : TypeAlias = NDArray[Any]
Matrix : TypeAlias = NDArray[Any]
Anchors : TypeAlias = NDArray[Any]
Translation : TypeAlias = NDArray[Any]
FaceStabilizerState : TypeAlias = Tuple[Points, Points, float]
FaceStabilizerStore : TypeAlias = Dict[str, FaceStabilizerState]
FaceStabilizerTrailEntry : TypeAlias = Deque[Tuple[Points, Points]]
FaceStabilizerTrail : TypeAlias = Dict[str, FaceStabilizerTrailEntry]
AudioBuffer : TypeAlias = bytes
Audio : TypeAlias = NDArray[Any]
@@ -298,6 +302,8 @@ StateKey = Literal\
'face_mask_regions',
'face_mask_blur',
'face_mask_padding',
'face_stabilizer',
'face_stabilizer_smoothness',
'voice_extractor_model',
'trim_frame_start',
'trim_frame_end',
@@ -367,6 +373,8 @@ State = TypedDict('State',
'face_mask_regions' : List[FaceMaskRegion],
'face_mask_blur' : float,
'face_mask_padding' : Padding,
'face_stabilizer' : bool,
'face_stabilizer_smoothness' : float,
'voice_extractor_model' : VoiceExtractorModel,
'trim_frame_start' : int,
'trim_frame_end' : int,
+7 -3
View File
@@ -53,10 +53,12 @@ def listen() -> None:
webcam_mode_radio = get_ui_component('webcam_mode_radio')
webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown')
webcam_fps_slider = get_ui_component('webcam_fps_slider')
webcam_face_stabilizer_checkbox = get_ui_component('webcam_face_stabilizer_checkbox')
webcam_face_stabilizer_smoothness_slider = get_ui_component('webcam_face_stabilizer_smoothness_slider')
if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider:
if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider and webcam_face_stabilizer_checkbox and webcam_face_stabilizer_smoothness_slider:
WEBCAM_START_BUTTON.click(pre_start, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ])
start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE)
start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider, webcam_face_stabilizer_checkbox, webcam_face_stabilizer_smoothness_slider ], outputs = WEBCAM_IMAGE)
start_event.then(pre_stop)
WEBCAM_STOP_BUTTON.click(stop, cancels = start_event, outputs = WEBCAM_IMAGE)
WEBCAM_STOP_BUTTON.click(pre_stop, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ])
@@ -82,8 +84,10 @@ def pre_stop() -> Tuple[gradio.File, gradio.Image, gradio.Button, gradio.Button]
return gradio.File(visible = True), gradio.Image(visible = False), gradio.Button(visible = True), gradio.Button(visible = False)
def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Iterator[VisionFrame]:
def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps, webcam_face_stabilizer : bool, webcam_face_stabilizer_smoothness : float) -> Iterator[VisionFrame]:
state_manager.init_item('face_selector_mode', 'one')
state_manager.init_item('face_stabilizer', webcam_face_stabilizer)
state_manager.init_item('face_stabilizer_smoothness', webcam_face_stabilizer_smoothness)
state_manager.sync_state()
camera_capture = get_local_camera_capture(webcam_device_id)
@@ -12,6 +12,8 @@ WEBCAM_DEVICE_ID_DROPDOWN : Optional[gradio.Dropdown] = None
WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None
WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None
WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None
WEBCAM_FACE_STABILIZER_CHECKBOX : Optional[gradio.Checkbox] = None
WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER : Optional[gradio.Slider] = None
def render() -> None:
@@ -19,6 +21,8 @@ def render() -> None:
global WEBCAM_MODE_RADIO
global WEBCAM_RESOLUTION_DROPDOWN
global WEBCAM_FPS_SLIDER
global WEBCAM_FACE_STABILIZER_CHECKBOX
global WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER
local_camera_ids = detect_local_camera_ids(0, 10) or [ 'none' ] #type:ignore[list-item]
WEBCAM_DEVICE_ID_DROPDOWN = gradio.Dropdown(
@@ -43,7 +47,20 @@ def render() -> None:
minimum = 1,
maximum = 30
)
WEBCAM_FACE_STABILIZER_CHECKBOX = gradio.Checkbox(
label = translator.get('uis.webcam_face_stabilizer_checkbox'),
value = False
)
WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER = gradio.Slider(
label = translator.get('uis.webcam_face_stabilizer_smoothness_slider'),
value = 1.0,
step = 0.05,
minimum = 0.0,
maximum = 1.0
)
register_ui_component('webcam_device_id_dropdown', WEBCAM_DEVICE_ID_DROPDOWN)
register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO)
register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN)
register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER)
register_ui_component('webcam_face_stabilizer_checkbox', WEBCAM_FACE_STABILIZER_CHECKBOX)
register_ui_component('webcam_face_stabilizer_smoothness_slider', WEBCAM_FACE_STABILIZER_SMOOTHNESS_SLIDER)