mirror of
https://github.com/facefusion/facefusion.git
synced 2026-04-30 13:27:50 +02:00
feat/yoloface (#334)
* added yolov8 to face_detector (#323) * added yolov8 to face_detector * added yolov8 to face_detector * Initial cleanup and renaming * Update README * refactored detect_with_yoloface (#329) * refactored detect_with_yoloface * apply review * Change order again * Restore working code * modified code (#330) * refactored detect_with_yoloface * apply review * use temp_frame in detect_with_yoloface * reorder * modified * reorder models * Tiny cleanup --------- Co-authored-by: tamoharu <133945583+tamoharu@users.noreply.github.com>
This commit is contained in:
@@ -30,73 +30,74 @@ Run the command:
|
||||
python run.py [options]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-s SOURCE_PATHS, --source SOURCE_PATHS select a source image
|
||||
-t TARGET_PATH, --target TARGET_PATH select a target image or video
|
||||
-o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory
|
||||
-v, --version show program's version number and exit
|
||||
-h, --help show this help message and exit
|
||||
-s SOURCE_PATHS, --source SOURCE_PATHS select a source image
|
||||
-t TARGET_PATH, --target TARGET_PATH select a target image or video
|
||||
-o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory
|
||||
-v, --version show program's version number and exit
|
||||
|
||||
misc:
|
||||
--skip-download omit automate downloads and lookups
|
||||
--headless run the program in headless mode
|
||||
--log-level {error,warn,info,debug} choose from the available log levels
|
||||
--skip-download omit automate downloads and lookups
|
||||
--headless run the program in headless mode
|
||||
--log-level {error,warn,info,debug} choose from the available log levels
|
||||
|
||||
execution:
|
||||
--execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] choose from the available execution providers (choices: cpu, ...)
|
||||
--execution-thread-count [1-128] specify the number of execution threads
|
||||
--execution-queue-count [1-32] specify the number of execution queries
|
||||
--execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] choose from the available execution providers (choices: cpu, ...)
|
||||
--execution-thread-count [1-128] specify the number of execution threads
|
||||
--execution-queue-count [1-32] specify the number of execution queries
|
||||
|
||||
memory:
|
||||
--video-memory-strategy {strict,moderate,tolerant} specify strategy to handle the video memory
|
||||
--system-memory-limit [0-128] specify the amount (gb) of system memory to be used
|
||||
--video-memory-strategy {strict,moderate,tolerant} specify strategy to handle the video memory
|
||||
--system-memory-limit [0-128] specify the amount (gb) of system memory to be used
|
||||
|
||||
face analyser:
|
||||
--face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order used for the face analyser
|
||||
--face-analyser-age {child,teen,adult,senior} specify the age used for the face analyser
|
||||
--face-analyser-gender {male,female} specify the gender used for the face analyser
|
||||
--face-detector-model {retinaface,yunet} specify the model used for the face detector
|
||||
--face-detector-size {160x160,320x320,480x480,512x512,640x640,768x768,960x960,1024x1024} specify the size threshold used for the face detector
|
||||
--face-detector-score [0.0-1.0] specify the score threshold used for the face detector
|
||||
--face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order used for the face analyser
|
||||
--face-analyser-age {child,teen,adult,senior} specify the age used for the face analyser
|
||||
--face-analyser-gender {male,female} specify the gender used for the face analyser
|
||||
--face-detector-model {retinaface,yoloface,yunet} specify the model used for the face detector
|
||||
--face-detector-size {160x160,320x320,480x480,512x512,640x640,768x768,960x960,1024x1024} specify the size threshold used for the face detector
|
||||
--face-detector-score [0.0-1.0] specify the score threshold used for the face detector
|
||||
|
||||
face selector:
|
||||
--face-selector-mode {reference,one,many} specify the mode for the face selector
|
||||
--reference-face-position REFERENCE_FACE_POSITION specify the position of the reference face
|
||||
--reference-face-distance [0.0-1.5] specify the distance between the reference face and the target face
|
||||
--reference-frame-number REFERENCE_FRAME_NUMBER specify the number of the reference frame
|
||||
--face-selector-mode {reference,one,many} specify the mode for the face selector
|
||||
--reference-face-position REFERENCE_FACE_POSITION specify the position of the reference face
|
||||
--reference-face-distance [0.0-1.5] specify the distance between the reference face and the target face
|
||||
--reference-frame-number REFERENCE_FRAME_NUMBER specify the number of the reference frame
|
||||
|
||||
face mask:
|
||||
--face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] choose from the available face mask types (choices: box, occlusion, region)
|
||||
--face-mask-blur [0.0-1.0] specify the blur amount for face mask
|
||||
--face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] specify the face mask padding (top, right, bottom, left) in percent
|
||||
--face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose from the available face mask regions (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip)
|
||||
--face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] choose from the available face mask types (choices: box, occlusion, region)
|
||||
--face-mask-blur [0.0-1.0] specify the blur amount for face mask
|
||||
--face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] specify the face mask padding (top, right, bottom, left) in percent
|
||||
--face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose from the available face mask regions (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip)
|
||||
|
||||
frame extraction:
|
||||
--trim-frame-start TRIM_FRAME_START specify the start frame for extraction
|
||||
--trim-frame-end TRIM_FRAME_END specify the end frame for extraction
|
||||
--temp-frame-format {jpg,png,bmp} specify the image format used for frame extraction
|
||||
--temp-frame-quality [0-100] specify the image quality used for frame extraction
|
||||
--keep-temp retain temporary frames after processing
|
||||
--trim-frame-start TRIM_FRAME_START specify the start frame for extraction
|
||||
--trim-frame-end TRIM_FRAME_END specify the end frame for extraction
|
||||
--temp-frame-format {jpg,png,bmp} specify the image format used for frame extraction
|
||||
--temp-frame-quality [0-100] specify the image quality used for frame extraction
|
||||
--keep-temp retain temporary frames after processing
|
||||
|
||||
output creation:
|
||||
--output-image-quality [0-100] specify the quality used for the output image
|
||||
--output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder used for the output video
|
||||
--output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} specify the preset used for the output video
|
||||
--output-video-quality [0-100] specify the quality used for the output video
|
||||
--output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the resolution used for the output video
|
||||
--output-video-fps OUTPUT_VIDEO_FPS specify the frames per second (fps) used for the output video
|
||||
--skip-audio omit audio from the target
|
||||
--output-image-quality [0-100] specify the quality used for the output image
|
||||
--output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder used for the output video
|
||||
--output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} specify the preset used for the output video
|
||||
--output-video-quality [0-100] specify the quality used for the output video
|
||||
--output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the resolution used for the output video
|
||||
--output-video-fps OUTPUT_VIDEO_FPS specify the frames per second (fps) used for the output video
|
||||
--skip-audio omit audio from the target
|
||||
|
||||
frame processors:
|
||||
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, ...)
|
||||
--face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] specify the face debugger items (choices: bbox, kps, face-mask, score)
|
||||
--face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer} choose the model for the frame processor
|
||||
--face-enhancer-blend [0-100] specify the blend amount for the frame processor
|
||||
--face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial} choose the model for the frame processor
|
||||
--frame-enhancer-model {real_esrgan_x2plus,real_esrgan_x4plus,real_esrnet_x4plus} choose the model for the frame processor
|
||||
--frame-enhancer-blend [0-100] specify the blend amount for the frame processor
|
||||
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, ...)
|
||||
--face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] specify the face debugger items (choices: bbox, kps, face-mask, score)
|
||||
--face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer_plus_plus}
|
||||
choose the model for the frame processor
|
||||
--face-enhancer-blend [0-100] specify the blend amount for the frame processor
|
||||
--face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial} choose the model for the frame processor
|
||||
--frame-enhancer-model {real_esrgan_x2plus,real_esrgan_x4plus,real_esrnet_x4plus} choose the model for the frame processor
|
||||
--frame-enhancer-blend [0-100] specify the blend amount for the frame processor
|
||||
|
||||
uis:
|
||||
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, webcam, default, ...)
|
||||
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, webcam, default, ...)
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ video_memory_strategies : List[VideoMemoryStrategy] = [ 'strict', 'moderate', 't
|
||||
face_analyser_orders : List[FaceAnalyserOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ]
|
||||
face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior' ]
|
||||
face_analyser_genders : List[FaceAnalyserGender] = [ 'male', 'female' ]
|
||||
face_detector_models : List[str] = [ 'retinaface', 'yunet' ]
|
||||
face_detector_models : List[str] = [ 'retinaface', 'yoloface', 'yunet' ]
|
||||
face_detector_sizes : List[str] = [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ]
|
||||
face_selector_modes : List[FaceSelectorMode] = [ 'reference', 'one', 'many' ]
|
||||
face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ]
|
||||
|
||||
@@ -23,6 +23,11 @@ MODELS : ModelSet =\
|
||||
'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/retinaface_10g.onnx',
|
||||
'path': resolve_relative_path('../.assets/models/retinaface_10g.onnx')
|
||||
},
|
||||
'face_detector_yoloface':
|
||||
{
|
||||
'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx',
|
||||
'path': resolve_relative_path('../.assets/models/yoloface_8n.onnx')
|
||||
},
|
||||
'face_detector_yunet':
|
||||
{
|
||||
'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yunet_2023mar.onnx',
|
||||
@@ -58,6 +63,8 @@ def get_face_analyser() -> Any:
|
||||
if FACE_ANALYSER is None:
|
||||
if facefusion.globals.face_detector_model == 'retinaface':
|
||||
face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
|
||||
if facefusion.globals.face_detector_model == 'yoloface':
|
||||
face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
|
||||
if facefusion.globals.face_detector_model == 'yunet':
|
||||
face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0))
|
||||
if facefusion.globals.face_recognizer_model == 'arcface_blendswap':
|
||||
@@ -88,6 +95,7 @@ def pre_check() -> bool:
|
||||
model_urls =\
|
||||
[
|
||||
MODELS.get('face_detector_retinaface').get('url'),
|
||||
MODELS.get('face_detector_yoloface').get('url'),
|
||||
MODELS.get('face_detector_yunet').get('url'),
|
||||
MODELS.get('face_recognizer_arcface_inswapper').get('url'),
|
||||
MODELS.get('face_recognizer_arcface_simswap').get('url'),
|
||||
@@ -104,10 +112,13 @@ def extract_faces(frame : Frame) -> List[Face]:
|
||||
temp_frame_height, temp_frame_width, _ = temp_frame.shape
|
||||
ratio_height = frame_height / temp_frame_height
|
||||
ratio_width = frame_width / temp_frame_width
|
||||
if facefusion.globals.face_detector_model == 'yoloface':
|
||||
bbox_list, kps_list, score_list = detect_with_yoloface(temp_frame, temp_frame_height, temp_frame_width, face_detector_height, face_detector_width, ratio_height, ratio_width)
|
||||
return create_faces(frame, bbox_list, kps_list, score_list)
|
||||
if facefusion.globals.face_detector_model == 'retinaface':
|
||||
bbox_list, kps_list, score_list = detect_with_retinaface(temp_frame, temp_frame_height, temp_frame_width, face_detector_height, face_detector_width, ratio_height, ratio_width)
|
||||
return create_faces(frame, bbox_list, kps_list, score_list)
|
||||
elif facefusion.globals.face_detector_model == 'yunet':
|
||||
if facefusion.globals.face_detector_model == 'yunet':
|
||||
bbox_list, kps_list, score_list = detect_with_yunet(temp_frame, temp_frame_height, temp_frame_width, ratio_height, ratio_width)
|
||||
return create_faces(frame, bbox_list, kps_list, score_list)
|
||||
return []
|
||||
@@ -153,6 +164,48 @@ def detect_with_retinaface(temp_frame : Frame, temp_frame_height : int, temp_fra
|
||||
return bbox_list, kps_list, score_list
|
||||
|
||||
|
||||
def detect_with_yoloface(temp_frame : Frame, temp_frame_height : int, temp_frame_width : int, face_detector_height : int, face_detector_width : int, ratio_height : float, ratio_width : float) -> Tuple[List[Bbox], List[Kps], List[Score]]:
|
||||
face_detector = get_face_analyser().get('face_detector')
|
||||
bbox_list = []
|
||||
kps_list = []
|
||||
score_list = []
|
||||
offset_width = (face_detector_width - temp_frame_width) / 2
|
||||
offset_height = (face_detector_height - temp_frame_height) / 2
|
||||
temp_frame = cv2.copyMakeBorder(temp_frame, round(offset_height - 0.1), round(offset_height + 0.1), round(offset_width - 0.1), round(offset_width + 0.1), cv2.BORDER_CONSTANT, value = (114, 114, 114))
|
||||
temp_frame = temp_frame.astype(numpy.float32) / 255.0
|
||||
temp_frame = temp_frame[..., ::-1].transpose(2, 0, 1)
|
||||
temp_frame = numpy.expand_dims(temp_frame, axis = 0)
|
||||
temp_frame = numpy.ascontiguousarray(temp_frame)
|
||||
with THREAD_SEMAPHORE:
|
||||
detections = face_detector.run(None,
|
||||
{
|
||||
face_detector.get_inputs()[0].name: temp_frame
|
||||
})
|
||||
detections = numpy.squeeze(detections).T
|
||||
bbox_raw, score_raw, kps_raw = numpy.split(detections, [ 4, 5 ], axis = 1)
|
||||
keep_indices = numpy.where(score_raw > facefusion.globals.face_detector_score)[0]
|
||||
if keep_indices.any():
|
||||
bbox_raw, kps_raw, score_raw = bbox_raw[keep_indices], kps_raw[keep_indices], score_raw[keep_indices]
|
||||
for bbox in bbox_raw:
|
||||
bbox_list.append(numpy.array(
|
||||
[
|
||||
(bbox[0] - bbox[2] / 2 - offset_width) * ratio_width,
|
||||
(bbox[1] - bbox[3] / 2 - offset_height) * ratio_height,
|
||||
(bbox[0] + bbox[2] / 2 - offset_width) * ratio_width,
|
||||
(bbox[1] + bbox[3] / 2 - offset_height) * ratio_height
|
||||
]))
|
||||
kps_raw[:, 0::3] = (kps_raw[:, 0::3] - offset_width) * ratio_width
|
||||
kps_raw[:, 1::3] = (kps_raw[:, 1::3] - offset_height) * ratio_height
|
||||
for kps in kps_raw:
|
||||
indexes = numpy.arange(0, len(kps), 3)
|
||||
temp_kps = []
|
||||
for index in indexes:
|
||||
temp_kps.append([kps[index], kps[index + 1]])
|
||||
kps_list.append(numpy.array(temp_kps))
|
||||
score_list = score_raw.ravel().tolist()
|
||||
return bbox_list, kps_list, score_list
|
||||
|
||||
|
||||
def detect_with_yunet(temp_frame : Frame, temp_frame_height : int, temp_frame_width : int, ratio_height : float, ratio_width : float) -> Tuple[List[Bbox], List[Kps], List[Score]]:
|
||||
face_detector = get_face_analyser().get('face_detector')
|
||||
face_detector.setInputSize((temp_frame_width, temp_frame_height))
|
||||
|
||||
@@ -42,7 +42,7 @@ FaceSelectorMode = Literal['reference', 'one', 'many']
|
||||
FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best']
|
||||
FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior']
|
||||
FaceAnalyserGender = Literal['male', 'female']
|
||||
FaceDetectorModel = Literal['retinaface', 'yunet']
|
||||
FaceDetectorModel = Literal['retinaface', 'yoloface', 'yunet']
|
||||
FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap']
|
||||
FaceMaskType = Literal['box', 'occlusion', 'region']
|
||||
FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip']
|
||||
|
||||
Reference in New Issue
Block a user