commit 47d3520c19eb606ff040b4e0b7bbb02b107243c5 Author: vnyash Date: Thu Jun 13 07:56:13 2024 +0530 deepfuze diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..6508a31 Binary files /dev/null and b/.DS_Store differ diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b88a39d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +indent_size = 4 +indent_style = tab +trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bf70b07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.assets +.idea +.vscode +__pychache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..22cf9b5 --- /dev/null +++ b/README.md @@ -0,0 +1,144 @@ +# DeepFuze + +![DeepFuze Lipsync](https://user-images.githubusercontent.com/4397546/222490039-b1f6156b-bf00-405b-9fda-0c9a9156f991.gif) + + +## Overview + +DeepFuze is a state-of-the-art deep learning tool that seamlessly integrates with [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to revolutionize facial transformations, lipsyncing, video generation, voice cloning, face swapping, and lipsync translation. Leveraging advanced algorithms, DeepFuze enables users to combine audio and video with unparalleled realism, ensuring perfectly synchronized facial movements. This innovative solution is ideal for content creators, animators, developers, and anyone seeking to elevate their video editing projects with sophisticated AI-driven features. + + +[![DeepFuze Lipsync](https://github.com/SamKhoze/ComfyUI-DeepFuze/blob/main/imgs/DeepFuze_Lipsync.jpg)](https://www.youtube.com/watch?v=9WbvlOK_BlI "DeepFuze Lipsync") + +[![IMAGE ALT TEXT HERE](https://github.com/SamKhoze/ComfyUI-DeepFuze/blob/main/imgs/DeepFuze_Lipsync_02.jpg)](https://www.youtube.com/watch?v=1c5TK3zTKr8) + +--- + +## Installation + +### Prerequisites for Voice Cloning and Lipsyncing + +Below are the two ComfyUI repositories required to load video and audio. Install them into your `custom_nodes` folder: + +1. Clone the repositories: + ```bash + cd custom_nodes + git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git + git clone https://github.com/a1lazydog/ComfyUI-AudioScheduler.git + ``` + +### Running the Model and Installation + +2. Clone this repository into the `custom_nodes` folder and install requirements: + ```bash + git clone https://github.com/SamKhoze/CompfyUI-DeepFuze.git + cd CompfyUI-DeepFuze + pip3 install -r requirements.txt + ``` + +3. Download models from the links below or download all models at once via [DeepFuze Models](https://drive.google.com/drive/folders/1dyu81WAP7_us8-loHjOXZzBJETNeJYJk?usp=sharing) +---- + +### Windows Native + +- Make sure you have `ffmpeg` in the `%PATH%`, following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) tutorial to install `ffmpeg` or using scoop. + +---- + +### For MAC users please set the environment variable before running it +This method has been tested on a M1 and M3 Mac + +``` +export PYTORCH_ENABLE_MPS_FALLBACK=1 +``` +### macOS needs to install the original dlib. +``` +pip install dlib +``` +--- + +## DeepFuze Lipsync + +This node generates lipsyncing video from, video, image, and WAV audio files. + +**Input Types:** +- `images`: Extracted frame images as PyTorch tensors. +- `audio`: An instance of loaded audio data. +- `mata_batch`: Load batch numbers via the Meta Batch Manager node. + +**Output Types:** +- `IMAGES`: Extracted frame images as PyTorch tensors. +- `frame_count`: Output frame counts int. +- `audio`: Output audio. +- `video_info`: Output video metadata. + +**DeepFuze Lipsync Features:** +- `enhancer`: You can add an enhancer to improve the quality of the generated video. Using gfpgan or RestoreFormer to enhance the generated face via face restoration network +- `frame_enhancer`: You can add an enhancing the whole frame of video +- `face_mask_padding_left` : padding to left on the face while lipsycing +- `face_mask_padding_right` : padding to right on the face while lipsycing +- `face_mask_padding_bottom` : padding to bottom on the face while lipsycing +- `face_mask_padding_top` : padding to top on the face while lipsycing +- `device` : [cpu,gpu] +- `trim_frame_start`: remove the number of frames from start +- `trim_frame_end`: remove the number of frames from end +- `save_outpou`: If it is True, it will save the output. + +![Lipsyncing Node example](https://github.com/SamKhoze/ComfyUI-DeepFuze/blob/main/examples/node.jpeg) + +### DeepFuze_TTS + +**Languages:** + +**DeepFuze_TTS voice cloning supports 17 languages: English (en), Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Turkish (tr), Russian (ru), Dutch (nl), Czech (cs), Arabic (ar), Chinese (zh-cn), Japanese (ja), Hungarian (hu), Korean (ko) Hindi (hi).** + +This node is used to clone any voice from typed input. The audio file should be 10-15 seconds long for better results and should not have much noise. + +**Input Types:** +- `audio`: An instance of loaded audio data. +- `text`: Text to generate the cloned voice audio. + +**Output Types:** +- `audio`: An instance of loaded audio data. + +![TTS Node example](https://github.com/SamKhoze/ComfyUI-DeepFuze/blob/main/imgs/DeepFuze_TTS.jpg) + + +**Basic Integration** + +![BasicWorkspace](https://github.com/SamKhoze/ComfyUI-DeepFuze/blob/main/imgs/BasicWorkspace.jpg) + +--- + +## Example of How to Use DeepFuze Programmatically + +```python +from deepfuze import DeepFuze + +# Initialize the DeepFuze instance +deepfuze = DeepFuze() + +# Load video and audio files +deepfuze.load_video('path/to/video.mp4') +deepfuze.load_audio('path/to/audio.mp3') +deepfuze.load_checkpoint('path/to/checkpoint_path') + +# Set parameters (optional) +deepfuze.set_parameters(sync_level=5, transform_intensity=3) + +# Generate lipsynced video +output_path = deepfuze.generate(output='path/to/output.mp4') + +print(f"Lipsynced video saved at {output_path}") +``` + +# Acknowledgements + +This repository could not have been completed without the contributions from [SadTalker](https://github.com/OpenTalker/SadTalker/tree/main), [Facexlib](https://github.com/xinntao/facexlib), [GFPGAN](https://github.com/TencentARC/GFPGAN), [GPEN](https://github.com/yangxy/GPEN), [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN), [TTS](https://github.com/coqui-ai/TTS/tree/dev), [SSD](https://pytorch.org/hub/nvidia_deeplearningexamples_ssd/), and [wav2lip](https://github.com/Rudrabha/Wav2Lip), + +1. Please carefully read and comply with the open-source license applicable to this code and models before using it. +2. Please carefully read and comply with the intellectual property declaration applicable to this code and models before using it. +3. This open-source code runs completely offline and does not collect any personal information or other data. If you use this code to provide services to end-users and collect related data, please take necessary compliance measures according to applicable laws and regulations (such as publishing privacy policies, adopting necessary data security strategies, etc.). If the collected data involves personal information, user consent must be obtained (if applicable). +4. It is prohibited to use this open-source code for activities that harm the legitimate rights and interests of others (including but not limited to fraud, deception, infringement of others' portrait rights, reputation rights, etc.), or other behaviors that violate applicable laws and regulations or go against social ethics and good customs (including providing incorrect or false information, terrorist, child/minors pornography and violent information, etc.). Otherwise, you may be liable for legal responsibilities. + +The DeepFuze code is developed by Dr. Sam Khoze and his team. Feel free to use the DeepFuze code for personal, research, academic, and non-commercial purposes. You can create videos with this tool, but please make sure to follow local laws and use it responsibly. The developers will not be responsible for any misuse of the tool by users. For commercial use, please contact us at info@cogidigm.com. diff --git a/__init__.py b/__init__.py new file mode 100755 index 0000000..727763a --- /dev/null +++ b/__init__.py @@ -0,0 +1,6 @@ +from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS +import folder_paths + + +WEB_DIRECTORY = "./web" +__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"] diff --git a/audio_playback.py b/audio_playback.py new file mode 100644 index 0000000..825a6e1 --- /dev/null +++ b/audio_playback.py @@ -0,0 +1,20 @@ +import sounddevice + +class PlayBackAudio: + + @classmethod + def INPUT_TYPES(self): + return { + "required":{ + "audio": ("AUDIO",) + } + } + OUTPUT_NODE = True + RETURN_NAMES = () + RETURN_TYPES = () + CATEGORY = "DeepFuze (Adavance)" + FUNCTION = "play_audio" + + def play_audio(self,audio): + sounddevice.play(audio.audio_data,audio.sample_rate) + return () \ No newline at end of file diff --git a/deepfuze/.DS_Store b/deepfuze/.DS_Store new file mode 100644 index 0000000..f0a5cfa Binary files /dev/null and b/deepfuze/.DS_Store differ diff --git a/deepfuze/__init__.py b/deepfuze/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/audio.py b/deepfuze/audio.py new file mode 100644 index 0000000..30c62b5 --- /dev/null +++ b/deepfuze/audio.py @@ -0,0 +1,137 @@ +from typing import Optional, Any, List +from functools import lru_cache +import numpy +import scipy + +from deepfuze.filesystem import is_audio +from deepfuze.ffmpeg import read_audio_buffer +from deepfuze.typing import Fps, Audio, AudioFrame, Spectrogram, MelFilterBank +from deepfuze.voice_extractor import batch_extract_voice + + +@lru_cache(maxsize = 128) +def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_audio(audio_path, fps) + + +def read_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 48000 + channel_total = 2 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = prepare_audio(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None + + +@lru_cache(maxsize = 128) +def read_static_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_voice(audio_path, fps) + + +def read_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 48000 + channel_total = 2 + chunk_size = 1024 * 240 + step_size = 1024 * 180 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = batch_extract_voice(audio, chunk_size, step_size) + audio = prepare_voice(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None + + +def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: + if is_audio(audio_path): + audio_frames = read_static_audio(audio_path, fps) + if frame_number in range(len(audio_frames)): + return audio_frames[frame_number] + return None + + +def get_voice_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: + if is_audio(audio_path): + voice_frames = read_static_voice(audio_path, fps) + if frame_number in range(len(voice_frames)): + return voice_frames[frame_number] + return None + + +def create_empty_audio_frame() -> AudioFrame: + mel_filter_total = 80 + step_size = 16 + audio_frame = numpy.zeros((mel_filter_total, step_size)).astype(numpy.int16) + return audio_frame + + +def prepare_audio(audio : numpy.ndarray[Any, Any]) -> Audio: + if audio.ndim > 1: + audio = numpy.mean(audio, axis = 1) + audio = audio / numpy.max(numpy.abs(audio), axis = 0) + audio = scipy.signal.lfilter([ 1.0, -0.97 ], [ 1.0 ], audio) + return audio + + +def prepare_voice(audio : numpy.ndarray[Any, Any]) -> Audio: + sample_rate = 48000 + resample_rate = 16000 + + audio = scipy.signal.resample(audio, int(len(audio) * resample_rate / sample_rate)) + audio = prepare_audio(audio) + return audio + + +def convert_hertz_to_mel(hertz : float) -> float: + return 2595 * numpy.log10(1 + hertz / 700) + + +def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, Any]: + return 700 * (10 ** (mel / 2595) - 1) + + +def create_mel_filter_bank() -> MelFilterBank: + mel_filter_total = 80 + mel_bin_total = 800 + sample_rate = 16000 + min_frequency = 55.0 + max_frequency = 7600.0 + mel_filter_bank = numpy.zeros((mel_filter_total, mel_bin_total // 2 + 1)) + mel_frequency_range = numpy.linspace(convert_hertz_to_mel(min_frequency), convert_hertz_to_mel(max_frequency), mel_filter_total + 2) + indices = numpy.floor((mel_bin_total + 1) * convert_mel_to_hertz(mel_frequency_range) / sample_rate).astype(numpy.int16) + + for index in range(mel_filter_total): + start = indices[index] + end = indices[index + 1] + mel_filter_bank[index, start:end] = scipy.signal.windows.triang(end - start) + return mel_filter_bank + + +def create_spectrogram(audio : Audio) -> Spectrogram: + mel_bin_total = 800 + mel_bin_overlap = 600 + mel_filter_bank = create_mel_filter_bank() + spectrogram = scipy.signal.stft(audio, nperseg = mel_bin_total, nfft = mel_bin_total, noverlap = mel_bin_overlap)[2] + spectrogram = numpy.dot(mel_filter_bank, numpy.abs(spectrogram)) + return spectrogram + + +def extract_audio_frames(spectrogram : Spectrogram, fps : Fps) -> List[AudioFrame]: + mel_filter_total = 80 + step_size = 16 + audio_frames = [] + indices = numpy.arange(0, spectrogram.shape[1], mel_filter_total / fps).astype(numpy.int16) + indices = indices[indices >= step_size] + + for index in indices: + start = max(0, index - step_size) + audio_frames.append(spectrogram[:, start:index]) + return audio_frames diff --git a/deepfuze/choices.py b/deepfuze/choices.py new file mode 100755 index 0000000..a063960 --- /dev/null +++ b/deepfuze/choices.py @@ -0,0 +1,37 @@ +from typing import List, Dict + +from deepfuze.typing import VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceDetectorModel, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder, OutputVideoPreset +from deepfuze.common_helper import create_int_range, create_float_range + +video_memory_strategies : List[VideoMemoryStrategy] = [ 'strict', 'moderate', 'tolerant' ] +face_analyser_orders : List[FaceAnalyserOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ] +face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior' ] +face_analyser_genders : List[FaceAnalyserGender] = [ 'female', 'male' ] +face_detector_set : Dict[FaceDetectorModel, List[str]] =\ +{ + 'many': [ '640x640' ], + 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'scrfd': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'yoloface': [ '640x640' ], + 'yunet': [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ] +} +face_selector_modes : List[FaceSelectorMode] = [ 'many', 'one', 'reference' ] +face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ] +face_mask_regions : List[FaceMaskRegion] = [ 'skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip' ] +temp_frame_formats : List[TempFrameFormat] = [ 'bmp', 'jpg', 'png' ] +output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf' ] +output_video_presets : List[OutputVideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] + +image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ] +video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160, 4320 ] + +execution_thread_count_range : List[int] = create_int_range(1, 128, 1) +execution_queue_count_range : List[int] = create_int_range(1, 32, 1) +system_memory_limit_range : List[int] = create_int_range(0, 128, 1) +face_detector_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_landmarker_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_blur_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_padding_range : List[int] = create_int_range(0, 100, 1) +reference_face_distance_range : List[float] = create_float_range(0.0, 1.5, 0.05) +output_image_quality_range : List[int] = create_int_range(0, 100, 1) +output_video_quality_range : List[int] = create_int_range(0, 100, 1) diff --git a/deepfuze/common_helper.py b/deepfuze/common_helper.py new file mode 100644 index 0000000..d37419b --- /dev/null +++ b/deepfuze/common_helper.py @@ -0,0 +1,46 @@ +from typing import List, Any +import platform + + +def create_metavar(ranges : List[Any]) -> str: + return '[' + str(ranges[0]) + '-' + str(ranges[-1]) + ']' + + +def create_int_range(start : int, end : int, step : int) -> List[int]: + int_range = [] + current = start + + while current <= end: + int_range.append(current) + current += step + return int_range + + +def create_float_range(start : float, end : float, step : float) -> List[float]: + float_range = [] + current = start + + while current <= end: + float_range.append(round(current, 2)) + current = round(current + step, 2) + return float_range + + +def is_linux() -> bool: + return to_lower_case(platform.system()) == 'linux' + + +def is_macos() -> bool: + return to_lower_case(platform.system()) == 'darwin' + + +def is_windows() -> bool: + return to_lower_case(platform.system()) == 'windows' + + +def to_lower_case(__string__ : Any) -> str: + return str(__string__).lower() + + +def get_first(__list__ : Any) -> Any: + return next(iter(__list__), None) diff --git a/deepfuze/config.py b/deepfuze/config.py new file mode 100644 index 0000000..160f346 --- /dev/null +++ b/deepfuze/config.py @@ -0,0 +1,91 @@ +from configparser import ConfigParser +from typing import Any, Optional, List + +import deepfuze.globals + +CONFIG = None + + +def get_config() -> ConfigParser: + global CONFIG + + if CONFIG is None: + CONFIG = ConfigParser() + CONFIG.read(deepfuze.globals.config_path, encoding = 'utf-8') + return CONFIG + + +def clear_config() -> None: + global CONFIG + + CONFIG = None + + +def get_str_value(key : str, fallback : Optional[str] = None) -> Optional[str]: + value = get_value_by_notation(key) + + if value or fallback: + return str(value or fallback) + return None + + +def get_int_value(key : str, fallback : Optional[str] = None) -> Optional[int]: + value = get_value_by_notation(key) + + if value or fallback: + return int(value or fallback) + return None + + +def get_float_value(key : str, fallback : Optional[str] = None) -> Optional[float]: + value = get_value_by_notation(key) + + if value or fallback: + return float(value or fallback) + return None + + +def get_bool_value(key : str, fallback : Optional[str] = None) -> Optional[bool]: + value = get_value_by_notation(key) + + if value == 'True' or fallback == 'True': + return True + if value == 'False' or fallback == 'False': + return False + return None + + +def get_str_list(key : str, fallback : Optional[str] = None) -> Optional[List[str]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ str(value) for value in (value or fallback).split(' ') ] + return None + + +def get_int_list(key : str, fallback : Optional[str] = None) -> Optional[List[int]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ int(value) for value in (value or fallback).split(' ') ] + return None + + +def get_float_list(key : str, fallback : Optional[str] = None) -> Optional[List[float]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ float(value) for value in (value or fallback).split(' ') ] + return None + + +def get_value_by_notation(key : str) -> Optional[Any]: + config = get_config() + + if '.' in key: + section, name = key.split('.') + if section in config and name in config[section]: + return config[section][name] + if key in config: + return config[key] + return None diff --git a/deepfuze/content_analyser.py b/deepfuze/content_analyser.py new file mode 100644 index 0000000..71bf91f --- /dev/null +++ b/deepfuze/content_analyser.py @@ -0,0 +1,112 @@ +from typing import Any +from functools import lru_cache +from time import sleep +import cv2 +import numpy +import onnxruntime +from tqdm import tqdm + +import deepfuze.globals +from deepfuze import process_manager, wording +from deepfuze.thread_helper import thread_lock, conditional_thread_semaphore +from deepfuze.typing import VisionFrame, ModelSet, Fps +from deepfuze.execution import apply_execution_provider_options +from deepfuze.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps +from deepfuze.filesystem import resolve_relative_path, is_file +from deepfuze.download import conditional_download + +CONTENT_ANALYSER = None +MODELS : ModelSet =\ +{ + 'open_nsfw': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/open_nsfw.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/open_nsfw.onnx') + } +} +PROBABILITY_LIMIT = 0.80 +RATE_LIMIT = 10 +STREAM_COUNTER = 0 + + +def get_content_analyser() -> Any: + global CONTENT_ANALYSER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if CONTENT_ANALYSER is None: + model_path = MODELS.get('open_nsfw').get('path') + CONTENT_ANALYSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return CONTENT_ANALYSER + + +def clear_content_analyser() -> None: + global CONTENT_ANALYSER + + CONTENT_ANALYSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = MODELS.get('open_nsfw').get('url') + model_path = MODELS.get('open_nsfw').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool: + global STREAM_COUNTER + + STREAM_COUNTER = STREAM_COUNTER + 1 + if STREAM_COUNTER % int(video_fps) == 0: + return analyse_frame(vision_frame) + return False + + +def analyse_frame(vision_frame : VisionFrame) -> bool: + content_analyser = get_content_analyser() + vision_frame = prepare_frame(vision_frame) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + probability = content_analyser.run(None, + { + content_analyser.get_inputs()[0].name: vision_frame + })[0][0][1] + return probability > PROBABILITY_LIMIT + + +def prepare_frame(vision_frame : VisionFrame) -> VisionFrame: + vision_frame = cv2.resize(vision_frame, (224, 224)).astype(numpy.float32) + vision_frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) + vision_frame = numpy.expand_dims(vision_frame, axis = 0) + return vision_frame + + +@lru_cache(maxsize = None) +def analyse_image(image_path : str) -> bool: + frame = read_image(image_path) + return analyse_frame(frame) + + +@lru_cache(maxsize = None) +def analyse_video(video_path : str, start_frame : int, end_frame : int) -> bool: + video_frame_total = count_video_frame_total(video_path) + video_fps = detect_video_fps(video_path) + frame_range = range(start_frame or 0, end_frame or video_frame_total) + rate = 0.0 + counter = 0 + + with tqdm(total = len(frame_range), desc = wording.get('analysing'), unit = 'frame', ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: + for frame_number in frame_range: + if frame_number % int(video_fps) == 0: + frame = get_video_frame(video_path, frame_number) + if analyse_frame(frame): + counter += 1 + rate = counter * int(video_fps) / len(frame_range) * 100 + progress.update() + progress.set_postfix(rate = rate) + return rate > RATE_LIMIT diff --git a/deepfuze/core.py b/deepfuze/core.py new file mode 100755 index 0000000..2f77f79 --- /dev/null +++ b/deepfuze/core.py @@ -0,0 +1,438 @@ +import os + +os.environ['OMP_NUM_THREADS'] = '1' + +import signal +import sys +import warnings +import shutil +import numpy +import onnxruntime +from time import sleep, time +from argparse import ArgumentParser, HelpFormatter + +import deepfuze.choices +import deepfuze.globals +from deepfuze.face_analyser import get_one_face, get_average_face +from deepfuze.face_store import get_reference_faces, append_reference_face +from deepfuze import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording, voice_extractor +from deepfuze.content_analyser import analyse_image, analyse_video +from deepfuze.processors.frame.core import get_frame_processors_modules, load_frame_processor_module +from deepfuze.common_helper import create_metavar, get_first +from deepfuze.execution import encode_execution_providers, decode_execution_providers +from deepfuze.normalizer import normalize_output_path, normalize_padding, normalize_fps +from deepfuze.memory import limit_system_memory +from deepfuze.statistics import conditional_log_statistics +from deepfuze.download import conditional_download +from deepfuze.filesystem import get_temp_frame_paths, get_temp_file_path, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths, resolve_relative_path, list_directory +from deepfuze.ffmpeg import extract_frames, merge_video, copy_image, finalize_image, restore_audio, replace_audio +from deepfuze.vision import read_image, read_static_images, detect_image_resolution, restrict_video_fps, create_image_resolutions, get_video_frame, detect_video_resolution, detect_video_fps, restrict_video_resolution, restrict_image_resolution, create_video_resolutions, pack_resolution, unpack_resolution + +onnxruntime.set_default_logger_severity(3) +warnings.filterwarnings('ignore', category = UserWarning, module = 'gradio') + + +def cli() -> None: + signal.signal(signal.SIGINT, lambda signal_number, frame: destroy()) + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 200), add_help = False) + # general + program.add_argument('-c', '--config', help = wording.get('help.config'), dest = 'config_path', default = 'deepfuze.ini') + apply_config(program) + program.add_argument('-s', '--source', help = wording.get('help.source'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) + program.add_argument('-t', '--target', help = wording.get('help.target'), dest = 'target_path', default = config.get_str_value('general.target_path')) + program.add_argument('-o', '--output', help = wording.get('help.output'), dest = 'output_path', default = config.get_str_value('general.output_path')) + program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') + # misc + group_misc = program.add_argument_group('misc') + group_misc.add_argument('--force-download', help = wording.get('help.force_download'), action = 'store_true', default = config.get_bool_value('misc.force_download')) + group_misc.add_argument('--skip-download', help = wording.get('help.skip_download'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) + group_misc.add_argument('--headless', help = wording.get('help.headless'), action = 'store_true', default = config.get_bool_value('misc.headless')) + group_misc.add_argument('--log-level', help = wording.get('help.log_level'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) + # execution + execution_providers = encode_execution_providers(onnxruntime.get_available_providers()) + group_execution = program.add_argument_group('execution') + group_execution.add_argument('--execution-device-id', help = wording.get('help.execution_device_id'), default = config.get_str_value('execution.face_detector_size', '0')) + group_execution.add_argument('--execution-providers', help = wording.get('help.execution_providers').format(choices = ', '.join(execution_providers)), default = config.get_str_list('execution.execution_providers', 'cpu'), choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') + group_execution.add_argument('--execution-thread-count', help = wording.get('help.execution_thread_count'), type = int, default = config.get_int_value('execution.execution_thread_count', '4'), choices = deepfuze.choices.execution_thread_count_range, metavar = create_metavar(deepfuze.choices.execution_thread_count_range)) + group_execution.add_argument('--execution-queue-count', help = wording.get('help.execution_queue_count'), type = int, default = config.get_int_value('execution.execution_queue_count', '1'), choices = deepfuze.choices.execution_queue_count_range, metavar = create_metavar(deepfuze.choices.execution_queue_count_range)) + # memory + group_memory = program.add_argument_group('memory') + group_memory.add_argument('--video-memory-strategy', help = wording.get('help.video_memory_strategy'), default = config.get_str_value('memory.video_memory_strategy', 'strict'), choices = deepfuze.choices.video_memory_strategies) + group_memory.add_argument('--system-memory-limit', help = wording.get('help.system_memory_limit'), type = int, default = config.get_int_value('memory.system_memory_limit', '0'), choices = deepfuze.choices.system_memory_limit_range, metavar = create_metavar(deepfuze.choices.system_memory_limit_range)) + # face analyser + group_face_analyser = program.add_argument_group('face analyser') + group_face_analyser.add_argument('--face-analyser-order', help = wording.get('help.face_analyser_order'), default = config.get_str_value('face_analyser.face_analyser_order', 'left-right'), choices = deepfuze.choices.face_analyser_orders) + group_face_analyser.add_argument('--face-analyser-age', help = wording.get('help.face_analyser_age'), default = config.get_str_value('face_analyser.face_analyser_age'), choices = deepfuze.choices.face_analyser_ages) + group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('help.face_analyser_gender'), default = config.get_str_value('face_analyser.face_analyser_gender'), choices = deepfuze.choices.face_analyser_genders) + group_face_analyser.add_argument('--face-detector-model', help = wording.get('help.face_detector_model'), default = config.get_str_value('face_analyser.face_detector_model', 'yoloface'), choices = deepfuze.choices.face_detector_set.keys()) + group_face_analyser.add_argument('--face-detector-size', help = wording.get('help.face_detector_size'), default = config.get_str_value('face_analyser.face_detector_size', '640x640')) + group_face_analyser.add_argument('--face-detector-score', help = wording.get('help.face_detector_score'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = deepfuze.choices.face_detector_score_range, metavar = create_metavar(deepfuze.choices.face_detector_score_range)) + group_face_analyser.add_argument('--face-landmarker-score', help = wording.get('help.face_landmarker_score'), type = float, default = config.get_float_value('face_analyser.face_landmarker_score', '0.5'), choices = deepfuze.choices.face_landmarker_score_range, metavar = create_metavar(deepfuze.choices.face_landmarker_score_range)) + # face selector + group_face_selector = program.add_argument_group('face selector') + group_face_selector.add_argument('--face-selector-mode', help = wording.get('help.face_selector_mode'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = deepfuze.choices.face_selector_modes) + group_face_selector.add_argument('--reference-face-position', help = wording.get('help.reference_face_position'), type = int, default = config.get_int_value('face_selector.reference_face_position', '0')) + group_face_selector.add_argument('--reference-face-distance', help = wording.get('help.reference_face_distance'), type = float, default = config.get_float_value('face_selector.reference_face_distance', '0.6'), choices = deepfuze.choices.reference_face_distance_range, metavar = create_metavar(deepfuze.choices.reference_face_distance_range)) + group_face_selector.add_argument('--reference-frame-number', help = wording.get('help.reference_frame_number'), type = int, default = config.get_int_value('face_selector.reference_frame_number', '0')) + # face mask + group_face_mask = program.add_argument_group('face mask') + group_face_mask.add_argument('--face-mask-types', help = wording.get('help.face_mask_types').format(choices = ', '.join(deepfuze.choices.face_mask_types)), default = config.get_str_list('face_mask.face_mask_types', 'box'), choices = deepfuze.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') + group_face_mask.add_argument('--face-mask-blur', help = wording.get('help.face_mask_blur'), type = float, default = config.get_float_value('face_mask.face_mask_blur', '0.3'), choices = deepfuze.choices.face_mask_blur_range, metavar = create_metavar(deepfuze.choices.face_mask_blur_range)) + group_face_mask.add_argument('--face-mask-padding', help = wording.get('help.face_mask_padding'), type = int, default = config.get_int_list('face_mask.face_mask_padding', '0 0 0 0'), nargs = '+') + group_face_mask.add_argument('--face-mask-regions', help = wording.get('help.face_mask_regions').format(choices = ', '.join(deepfuze.choices.face_mask_regions)), default = config.get_str_list('face_mask.face_mask_regions', ' '.join(deepfuze.choices.face_mask_regions)), choices = deepfuze.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') + # frame extraction + group_frame_extraction = program.add_argument_group('frame extraction') + group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('help.trim_frame_start'), type = int, default = deepfuze.config.get_int_value('frame_extraction.trim_frame_start')) + group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('help.trim_frame_end'), type = int, default = deepfuze.config.get_int_value('frame_extraction.trim_frame_end')) + group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('help.temp_frame_format'), default = config.get_str_value('frame_extraction.temp_frame_format', 'png'), choices = deepfuze.choices.temp_frame_formats) + group_frame_extraction.add_argument('--keep-temp', help = wording.get('help.keep_temp'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) + # output creation + group_output_creation = program.add_argument_group('output creation') + group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = deepfuze.choices.output_image_quality_range, metavar = create_metavar(deepfuze.choices.output_image_quality_range)) + group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution')) + group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = deepfuze.choices.output_video_encoders) + group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = deepfuze.choices.output_video_presets) + group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = deepfuze.choices.output_video_quality_range, metavar = create_metavar(deepfuze.choices.output_video_quality_range)) + group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) + group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps')) + group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) + # frame processors + available_frame_processors = list_directory('deepfuze/processors/frame/modules') + program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True) + group_frame_processors = program.add_argument_group('frame processors') + group_frame_processors.add_argument('--frame-processors', help = wording.get('help.frame_processors').format(choices = ', '.join(available_frame_processors)), default = config.get_str_list('frame_processors.frame_processors', 'face_swapper'), nargs = '+') + for frame_processor in available_frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + frame_processor_module.register_args(group_frame_processors) + # uis + available_ui_layouts = list_directory('deepfuze/uis/layouts') + group_uis = program.add_argument_group('uis') + group_uis.add_argument('--open-browser', help=wording.get('help.open_browser'), action = 'store_true', default = config.get_bool_value('uis.open_browser')) + group_uis.add_argument('--ui-layouts', help = wording.get('help.ui_layouts').format(choices = ', '.join(available_ui_layouts)), default = config.get_str_list('uis.ui_layouts', 'default'), nargs = '+') + run(program) + + +def apply_config(program : ArgumentParser) -> None: + known_args = program.parse_known_args() + deepfuze.globals.config_path = get_first(known_args).config_path + + +def validate_args(program : ArgumentParser) -> None: + try: + for action in program._actions: + if action.default: + if isinstance(action.default, list): + for default in action.default: + program._check_value(action, default) + else: + program._check_value(action, action.default) + except Exception as exception: + program.error(str(exception)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + # general + deepfuze.globals.source_paths = args.source_paths + deepfuze.globals.target_path = args.target_path + deepfuze.globals.output_path = args.output_path + # misc + deepfuze.globals.force_download = args.force_download + deepfuze.globals.skip_download = args.skip_download + deepfuze.globals.headless = args.headless + deepfuze.globals.log_level = args.log_level + # execution + deepfuze.globals.execution_device_id = args.execution_device_id + deepfuze.globals.execution_providers = decode_execution_providers(args.execution_providers) + deepfuze.globals.execution_thread_count = args.execution_thread_count + deepfuze.globals.execution_queue_count = args.execution_queue_count + # memory + deepfuze.globals.video_memory_strategy = args.video_memory_strategy + deepfuze.globals.system_memory_limit = args.system_memory_limit + # face analyser + deepfuze.globals.face_analyser_order = args.face_analyser_order + deepfuze.globals.face_analyser_age = args.face_analyser_age + deepfuze.globals.face_analyser_gender = args.face_analyser_gender + deepfuze.globals.face_detector_model = args.face_detector_model + if args.face_detector_size in deepfuze.choices.face_detector_set[args.face_detector_model]: + deepfuze.globals.face_detector_size = args.face_detector_size + else: + deepfuze.globals.face_detector_size = '640x640' + deepfuze.globals.face_detector_score = args.face_detector_score + deepfuze.globals.face_landmarker_score = args.face_landmarker_score + # face selector + deepfuze.globals.face_selector_mode = args.face_selector_mode + deepfuze.globals.reference_face_position = args.reference_face_position + deepfuze.globals.reference_face_distance = args.reference_face_distance + deepfuze.globals.reference_frame_number = args.reference_frame_number + # face mask + deepfuze.globals.face_mask_types = args.face_mask_types + deepfuze.globals.face_mask_blur = args.face_mask_blur + deepfuze.globals.face_mask_padding = normalize_padding(args.face_mask_padding) + deepfuze.globals.face_mask_regions = args.face_mask_regions + # frame extraction + deepfuze.globals.trim_frame_start = args.trim_frame_start + deepfuze.globals.trim_frame_end = args.trim_frame_end + deepfuze.globals.temp_frame_format = args.temp_frame_format + deepfuze.globals.keep_temp = args.keep_temp + # output creation + deepfuze.globals.output_image_quality = args.output_image_quality + if is_image(args.target_path): + output_image_resolution = detect_image_resolution(args.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if args.output_image_resolution in output_image_resolutions: + deepfuze.globals.output_image_resolution = args.output_image_resolution + else: + deepfuze.globals.output_image_resolution = pack_resolution(output_image_resolution) + deepfuze.globals.output_video_encoder = args.output_video_encoder + deepfuze.globals.output_video_preset = args.output_video_preset + deepfuze.globals.output_video_quality = args.output_video_quality + if is_video(args.target_path): + output_video_resolution = detect_video_resolution(args.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + if args.output_video_resolution in output_video_resolutions: + deepfuze.globals.output_video_resolution = args.output_video_resolution + else: + deepfuze.globals.output_video_resolution = pack_resolution(output_video_resolution) + if args.output_video_fps or is_video(args.target_path): + deepfuze.globals.output_video_fps = normalize_fps(args.output_video_fps) or detect_video_fps(args.target_path) + deepfuze.globals.skip_audio = args.skip_audio + # frame processors + available_frame_processors = list_directory('deepfuze/processors/frame/modules') + deepfuze.globals.frame_processors = args.frame_processors + for frame_processor in available_frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + frame_processor_module.apply_args(program) + # uis + deepfuze.globals.open_browser = args.open_browser + deepfuze.globals.ui_layouts = args.ui_layouts + + +def run(program : ArgumentParser) -> None: + validate_args(program) + apply_args(program) + logger.init(deepfuze.globals.log_level) + + if deepfuze.globals.system_memory_limit > 0: + limit_system_memory(deepfuze.globals.system_memory_limit) + if deepfuze.globals.force_download: + force_download() + return + if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check() or not voice_extractor.pre_check(): + return + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + if not frame_processor_module.pre_check(): + return + if deepfuze.globals.headless: + conditional_process() + else: + import deepfuze.uis.core as ui + + for ui_layout in ui.get_ui_layouts_modules(deepfuze.globals.ui_layouts): + if not ui_layout.pre_check(): + return + ui.launch() + + +def destroy() -> None: + process_manager.stop() + while process_manager.is_processing(): + sleep(0.5) + if deepfuze.globals.target_path: + clear_temp(deepfuze.globals.target_path) + sys.exit(0) + + +def pre_check() -> bool: + if sys.version_info < (3, 9): + logger.error(wording.get('python_not_supported').format(version = '3.9'), __name__.upper()) + return False + if not shutil.which('ffmpeg'): + logger.error(wording.get('ffmpeg_not_installed'), __name__.upper()) + return False + return True + + +def conditional_process() -> None: + start_time = time() + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + if not frame_processor_module.pre_process('output'): + return + conditional_append_reference_faces() + if is_image(deepfuze.globals.target_path): + process_image(start_time) + if is_video(deepfuze.globals.target_path): + process_video(start_time) + + +def conditional_append_reference_faces() -> None: + if 'reference' in deepfuze.globals.face_selector_mode and not get_reference_faces(): + source_frames = read_static_images(deepfuze.globals.source_paths) + source_face = get_average_face(source_frames) + if is_video(deepfuze.globals.target_path): + reference_frame = get_video_frame(deepfuze.globals.target_path, deepfuze.globals.reference_frame_number) + else: + reference_frame = read_image(deepfuze.globals.target_path) + reference_face = get_one_face(reference_frame, deepfuze.globals.reference_face_position) + append_reference_face('origin', reference_face) + if source_face and reference_face: + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + abstract_reference_frame = frame_processor_module.get_reference_frame(source_face, reference_face, reference_frame) + if numpy.any(abstract_reference_frame): + reference_frame = abstract_reference_frame + reference_face = get_one_face(reference_frame, deepfuze.globals.reference_face_position) + append_reference_face(frame_processor_module.__name__, reference_face) + + +def force_download() -> None: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + available_frame_processors = list_directory('deepfuze/processors/frame/modules') + model_list =\ + [ + content_analyser.MODELS, + face_analyser.MODELS, + face_masker.MODELS, + voice_extractor.MODELS + ] + + for frame_processor_module in get_frame_processors_modules(available_frame_processors): + if hasattr(frame_processor_module, 'MODELS'): + model_list.append(frame_processor_module.MODELS) + model_urls = [ models[model].get('url') for models in model_list for model in models ] + + conditional_download(download_directory_path, model_urls) + + +def process_image(start_time : float) -> None: + normed_output_path = normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path) + if analyse_image(deepfuze.globals.target_path): + return + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(deepfuze.globals.target_path) + # create temp + logger.debug(wording.get('creating_temp'), __name__.upper()) + create_temp(deepfuze.globals.target_path) + # copy image + process_manager.start() + temp_image_resolution = pack_resolution(restrict_image_resolution(deepfuze.globals.target_path, unpack_resolution(deepfuze.globals.output_image_resolution))) + logger.info(wording.get('copying_image').format(resolution = temp_image_resolution), __name__.upper()) + if copy_image(deepfuze.globals.target_path, temp_image_resolution): + logger.debug(wording.get('copying_image_succeed'), __name__.upper()) + else: + logger.error(wording.get('copying_image_failed'), __name__.upper()) + return + # process image + temp_file_path = get_temp_file_path(deepfuze.globals.target_path) + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + logger.info(wording.get('processing'), frame_processor_module.NAME) + frame_processor_module.process_image(deepfuze.globals.source_paths, temp_file_path, temp_file_path) + frame_processor_module.post_process() + if is_process_stopping(): + return + # finalize image + logger.info(wording.get('finalizing_image').format(resolution = deepfuze.globals.output_image_resolution), __name__.upper()) + if finalize_image(deepfuze.globals.target_path, normed_output_path, deepfuze.globals.output_image_resolution): + logger.debug(wording.get('finalizing_image_succeed'), __name__.upper()) + else: + logger.warn(wording.get('finalizing_image_skipped'), __name__.upper()) + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(deepfuze.globals.target_path) + # validate image + if is_image(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time) % 60) + logger.info(wording.get('processing_image_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() + else: + logger.error(wording.get('processing_image_failed'), __name__.upper()) + process_manager.end() + + +def process_video(start_time : float) -> None: + normed_output_path = normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path) + if analyse_video(deepfuze.globals.target_path, deepfuze.globals.trim_frame_start, deepfuze.globals.trim_frame_end): + return + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(deepfuze.globals.target_path) + # create temp + logger.debug(wording.get('creating_temp'), __name__.upper()) + create_temp(deepfuze.globals.target_path) + # extract frames + process_manager.start() + temp_video_resolution = pack_resolution(restrict_video_resolution(deepfuze.globals.target_path, unpack_resolution(deepfuze.globals.output_video_resolution))) + temp_video_fps = restrict_video_fps(deepfuze.globals.target_path, deepfuze.globals.output_video_fps) + logger.info(wording.get('extracting_frames').format(resolution = temp_video_resolution, fps = temp_video_fps), __name__.upper()) + if extract_frames(deepfuze.globals.target_path, temp_video_resolution, temp_video_fps): + logger.debug(wording.get('extracting_frames_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.error(wording.get('extracting_frames_failed'), __name__.upper()) + return + # process frames + temp_frame_paths = get_temp_frame_paths(deepfuze.globals.target_path) + if temp_frame_paths: + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + logger.info(wording.get('processing'), frame_processor_module.NAME) + frame_processor_module.process_video(deepfuze.globals.source_paths, temp_frame_paths) + frame_processor_module.post_process() + if is_process_stopping(): + return + else: + logger.error(wording.get('temp_frames_not_found'), __name__.upper()) + return + # merge video + logger.info(wording.get('merging_video').format(resolution = deepfuze.globals.output_video_resolution, fps = deepfuze.globals.output_video_fps), __name__.upper()) + if merge_video(deepfuze.globals.target_path, deepfuze.globals.output_video_resolution, deepfuze.globals.output_video_fps): + logger.debug(wording.get('merging_video_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.error(wording.get('merging_video_failed'), __name__.upper()) + return + # handle audio + if deepfuze.globals.skip_audio: + logger.info(wording.get('skipping_audio'), __name__.upper()) + move_temp(deepfuze.globals.target_path, normed_output_path) + else: + if 'lip_syncer' in deepfuze.globals.frame_processors: + source_audio_path = get_first(filter_audio_paths(deepfuze.globals.source_paths)) + if source_audio_path and replace_audio(deepfuze.globals.target_path, source_audio_path, normed_output_path): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(deepfuze.globals.target_path, normed_output_path) + else: + if restore_audio(deepfuze.globals.target_path, normed_output_path, deepfuze.globals.output_video_fps): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(deepfuze.globals.target_path, normed_output_path) + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(deepfuze.globals.target_path) + # validate video + if is_video(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time)) + logger.info(wording.get('processing_video_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() + else: + logger.error(wording.get('processing_video_failed'), __name__.upper()) + process_manager.end() + + +def is_process_stopping() -> bool: + if process_manager.is_stopping(): + process_manager.end() + logger.info(wording.get('processing_stopped'), __name__.upper()) + return process_manager.is_pending() diff --git a/deepfuze/download.py b/deepfuze/download.py new file mode 100644 index 0000000..5e53edd --- /dev/null +++ b/deepfuze/download.py @@ -0,0 +1,49 @@ +import os +import subprocess +import ssl +import urllib.request +from typing import List +from functools import lru_cache +from tqdm import tqdm + +import deepfuze.globals +from deepfuze import wording +from deepfuze.common_helper import is_macos +from deepfuze.filesystem import get_file_size, is_file + +if is_macos(): + ssl._create_default_https_context = ssl._create_unverified_context + + +def conditional_download(download_directory_path : str, urls : List[str]) -> None: + print("here..",download_directory_path) + for url in urls: + download_file_path = os.path.join(download_directory_path, os.path.basename(url)) + initial_size = get_file_size(download_file_path) + download_size = get_download_size(url) + if initial_size < download_size: + with tqdm(total = download_size, initial = initial_size, desc = wording.get('downloading'), unit = 'B', unit_scale = True, unit_divisor = 1024, ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: + subprocess.Popen([ 'curl', '--create-dirs', '--silent', '--insecure', '--location', '--continue-at', '-', '--output', download_file_path, url ]) + current_size = initial_size + while current_size < download_size: + if is_file(download_file_path): + current_size = get_file_size(download_file_path) + progress.update(current_size - progress.n) + if download_size and not is_download_done(url, download_file_path): + os.remove(download_file_path) + conditional_download(download_directory_path, [ url ]) + + +@lru_cache(maxsize = None) +def get_download_size(url : str) -> int: + try: + response = urllib.request.urlopen(url, timeout = 10) + return int(response.getheader('Content-Length')) + except (OSError, ValueError): + return 0 + + +def is_download_done(url : str, file_path : str) -> bool: + if is_file(file_path): + return get_download_size(url) == get_file_size(file_path) + return False diff --git a/deepfuze/execution.py b/deepfuze/execution.py new file mode 100644 index 0000000..00843e1 --- /dev/null +++ b/deepfuze/execution.py @@ -0,0 +1,112 @@ +from typing import List, Any +from functools import lru_cache +import subprocess +import xml.etree.ElementTree as ElementTree +import onnxruntime + +from deepfuze.typing import ExecutionDevice, ValueAndUnit + + +def encode_execution_providers(execution_providers : List[str]) -> List[str]: + return [ execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers ] + + +def decode_execution_providers(execution_providers : List[str]) -> List[str]: + available_execution_providers = onnxruntime.get_available_providers() + encoded_execution_providers = encode_execution_providers(available_execution_providers) + + return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ] + + +def has_execution_provider(execution_provider : str) -> bool: + return execution_provider in onnxruntime.get_available_providers() + + +def apply_execution_provider_options(execution_device_id : str, execution_providers : List[str]) -> List[Any]: + execution_providers_with_options : List[Any] = [] + + for execution_provider in execution_providers: + if execution_provider == 'CUDAExecutionProvider': + execution_providers_with_options.append((execution_provider, + { + 'device_id': execution_device_id, + 'cudnn_conv_algo_search': 'EXHAUSTIVE' if use_exhaustive() else 'DEFAULT' + })) + elif execution_provider == 'OpenVINOExecutionProvider': + execution_providers_with_options.append((execution_provider, + { + 'device_id': execution_device_id, + 'device_type': execution_device_id + '_FP32' + })) + elif execution_provider in [ 'DmlExecutionProvider', 'ROCMExecutionProvider' ]: + execution_providers_with_options.append((execution_provider, + { + 'device_id': execution_device_id + })) + else: + execution_providers_with_options.append(execution_provider) + return execution_providers_with_options + + +def use_exhaustive() -> bool: + execution_devices = detect_static_execution_devices() + product_names = ('GeForce GTX 1630', 'GeForce GTX 1650', 'GeForce GTX 1660') + + return any(execution_device.get('product').get('name').startswith(product_names) for execution_device in execution_devices) + + +def run_nvidia_smi() -> subprocess.Popen[bytes]: + commands = [ 'nvidia-smi', '--query', '--xml-format' ] + return subprocess.Popen(commands, stdout = subprocess.PIPE) + + +@lru_cache(maxsize = None) +def detect_static_execution_devices() -> List[ExecutionDevice]: + return detect_execution_devices() + + +def detect_execution_devices() -> List[ExecutionDevice]: + execution_devices : List[ExecutionDevice] = [] + try: + output, _ = run_nvidia_smi().communicate() + root_element = ElementTree.fromstring(output) + except Exception: + root_element = ElementTree.Element('xml') + + for gpu_element in root_element.findall('gpu'): + execution_devices.append( + { + 'driver_version': root_element.find('driver_version').text, + 'framework': + { + 'name': 'CUDA', + 'version': root_element.find('cuda_version').text + }, + 'product': + { + 'vendor': 'NVIDIA', + 'name': gpu_element.find('product_name').text.replace('NVIDIA ', '') + }, + 'video_memory': + { + 'total': create_value_and_unit(gpu_element.find('fb_memory_usage/total').text), + 'free': create_value_and_unit(gpu_element.find('fb_memory_usage/free').text) + }, + 'utilization': + { + 'gpu': create_value_and_unit(gpu_element.find('utilization/gpu_util').text), + 'memory': create_value_and_unit(gpu_element.find('utilization/memory_util').text) + } + }) + return execution_devices + + +def create_value_and_unit(text : str) -> ValueAndUnit: + value, unit = text.split() + value_and_unit : ValueAndUnit =\ + { + 'value': value, + 'unit': unit + } + + return value_and_unit diff --git a/deepfuze/face_analyser.py b/deepfuze/face_analyser.py new file mode 100644 index 0000000..1cc3c85 --- /dev/null +++ b/deepfuze/face_analyser.py @@ -0,0 +1,586 @@ +from typing import Any, Optional, List, Tuple +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +from deepfuze import process_manager +from deepfuze.common_helper import get_first +from deepfuze.face_helper import estimate_matrix_by_face_landmark_5, warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender +from deepfuze.face_store import get_static_faces, set_static_faces +from deepfuze.execution import apply_execution_provider_options +from deepfuze.download import conditional_download +from deepfuze.filesystem import resolve_relative_path, is_file +from deepfuze.thread_helper import thread_lock, thread_semaphore, conditional_thread_semaphore +from deepfuze.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding +from deepfuze.vision import resize_frame_resolution, unpack_resolution + +FACE_ANALYSER = None +MODELS : ModelSet =\ +{ + 'face_detector_retinaface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/retinaface_10g.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/retinaface_10g.onnx') + }, + 'face_detector_scrfd': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/scrfd_2.5g.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/scrfd_2.5g.onnx') + }, + 'face_detector_yoloface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/yoloface_8n.onnx') + }, + 'face_detector_yunet': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yunet_2023mar.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/yunet_2023mar.onnx') + }, + 'face_recognizer_arcface_blendswap': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/arcface_w600k_r50.onnx') + }, + 'face_recognizer_arcface_inswapper': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/arcface_w600k_r50.onnx') + }, + 'face_recognizer_arcface_simswap': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_simswap.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/arcface_simswap.onnx') + }, + 'face_recognizer_arcface_uniface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/arcface_w600k_r50.onnx') + }, + 'face_landmarker_68': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/2dfan4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/2dfan4.onnx') + }, + 'face_landmarker_68_5': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_landmarker_68_5.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/face_landmarker_68_5.onnx') + }, + 'gender_age': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gender_age.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gender_age.onnx') + } +} + + +def get_face_analyser() -> Any: + global FACE_ANALYSER + + face_detectors = {} + face_landmarkers = {} + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_ANALYSER is None: + if deepfuze.globals.face_detector_model in [ 'many', 'retinaface' ]: + face_detectors['retinaface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_detector_model in [ 'many', 'scrfd' ]: + face_detectors['scrfd'] = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_detector_model in [ 'many', 'yoloface' ]: + face_detectors['yoloface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_detector_model in [ 'yunet' ]: + face_detectors['yunet'] = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) + if deepfuze.globals.face_recognizer_model == 'arcface_blendswap': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_recognizer_model == 'arcface_inswapper': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_inswapper').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_recognizer_model == 'arcface_simswap': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + if deepfuze.globals.face_recognizer_model == 'arcface_uniface': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_uniface').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + face_landmarkers['68'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + face_landmarkers['68_5'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68_5').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + FACE_ANALYSER =\ + { + 'face_detectors': face_detectors, + 'face_recognizer': face_recognizer, + 'face_landmarkers': face_landmarkers, + 'gender_age': gender_age + } + return FACE_ANALYSER + + +def clear_face_analyser() -> Any: + global FACE_ANALYSER + + FACE_ANALYSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_urls =\ + [ + MODELS.get('face_landmarker_68').get('url'), + MODELS.get('face_landmarker_68_5').get('url'), + MODELS.get('gender_age').get('url') + ] + model_paths =\ + [ + MODELS.get('face_landmarker_68').get('path'), + MODELS.get('face_landmarker_68_5').get('path'), + MODELS.get('gender_age').get('path') + ] + + if deepfuze.globals.face_detector_model in [ 'many', 'retinaface' ]: + model_urls.append(MODELS.get('face_detector_retinaface').get('url')) + model_paths.append(MODELS.get('face_detector_retinaface').get('path')) + if deepfuze.globals.face_detector_model in [ 'many', 'scrfd' ]: + model_urls.append(MODELS.get('face_detector_scrfd').get('url')) + model_paths.append(MODELS.get('face_detector_scrfd').get('path')) + if deepfuze.globals.face_detector_model in [ 'many', 'yoloface' ]: + model_urls.append(MODELS.get('face_detector_yoloface').get('url')) + model_paths.append(MODELS.get('face_detector_yoloface').get('path')) + if deepfuze.globals.face_detector_model in [ 'yunet' ]: + model_urls.append(MODELS.get('face_detector_yunet').get('url')) + model_paths.append(MODELS.get('face_detector_yunet').get('path')) + if deepfuze.globals.face_recognizer_model == 'arcface_blendswap': + model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_blendswap').get('path')) + if deepfuze.globals.face_recognizer_model == 'arcface_inswapper': + model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_inswapper').get('path')) + if deepfuze.globals.face_recognizer_model == 'arcface_simswap': + model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_simswap').get('path')) + if deepfuze.globals.face_recognizer_model == 'arcface_uniface': + model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_uniface').get('path')) + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, model_urls) + process_manager.end() + return all(is_file(model_path) for model_path in model_paths) + + +def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('retinaface') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 2 + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + for index, feature_stride in enumerate(feature_strides): + keep_indices = numpy.where(detections[index] >= deepfuze.globals.face_detector_score)[0] + if keep_indices.any(): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( + [ + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height + ])) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) + for score in detections[index][keep_indices]: + score_list.append(score[0]) + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('scrfd') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 2 + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + for index, feature_stride in enumerate(feature_strides): + keep_indices = numpy.where(detections[index] >= deepfuze.globals.face_detector_score)[0] + if keep_indices.any(): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( + [ + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height + ])) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) + for score in detections[index][keep_indices]: + score_list.append(score[0]) + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('yoloface') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + detections = numpy.squeeze(detections).T + bounding_box_raw, score_raw, face_landmark_5_raw = numpy.split(detections, [ 4, 5 ], axis = 1) + keep_indices = numpy.where(score_raw > deepfuze.globals.face_detector_score)[0] + if keep_indices.any(): + bounding_box_raw, face_landmark_5_raw, score_raw = bounding_box_raw[keep_indices], face_landmark_5_raw[keep_indices], score_raw[keep_indices] + for bounding_box in bounding_box_raw: + bounding_box_list.append(numpy.array( + [ + (bounding_box[0] - bounding_box[2] / 2) * ratio_width, + (bounding_box[1] - bounding_box[3] / 2) * ratio_height, + (bounding_box[0] + bounding_box[2] / 2) * ratio_width, + (bounding_box[1] + bounding_box[3] / 2) * ratio_height + ])) + face_landmark_5_raw[:, 0::3] = (face_landmark_5_raw[:, 0::3]) * ratio_width + face_landmark_5_raw[:, 1::3] = (face_landmark_5_raw[:, 1::3]) * ratio_height + for face_landmark_5 in face_landmark_5_raw: + face_landmark_5_list.append(numpy.array(face_landmark_5.reshape(-1, 3)[:, :2])) + score_list = score_raw.ravel().tolist() + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('yunet') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + face_detector.setInputSize((temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + face_detector.setScoreThreshold(deepfuze.globals.face_detector_score) + with thread_semaphore(): + _, detections = face_detector.detect(temp_vision_frame) + if numpy.any(detections): + for detection in detections: + bounding_box_list.append(numpy.array( + [ + detection[0] * ratio_width, + detection[1] * ratio_height, + (detection[0] + detection[2]) * ratio_width, + (detection[1] + detection[3]) * ratio_height + ])) + face_landmark_5_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height ]) + score_list.append(detection[14]) + return bounding_box_list, face_landmark_5_list, score_list + + +def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : str) -> VisionFrame: + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + detect_vision_frame = numpy.zeros((face_detector_height, face_detector_width, 3)) + detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame + detect_vision_frame = (detect_vision_frame - 127.5) / 128.0 + detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return detect_vision_frame + + +def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBox], face_landmark_5_list : List[FaceLandmark5], score_list : List[Score]) -> List[Face]: + faces = [] + if deepfuze.globals.face_detector_score > 0: + sort_indices = numpy.argsort(-numpy.array(score_list)) + bounding_box_list = [ bounding_box_list[index] for index in sort_indices ] + face_landmark_5_list = [face_landmark_5_list[index] for index in sort_indices] + score_list = [ score_list[index] for index in sort_indices ] + iou_threshold = 0.1 if deepfuze.globals.face_detector_model == 'many' else 0.4 + keep_indices = apply_nms(bounding_box_list, iou_threshold) + for index in keep_indices: + bounding_box = bounding_box_list[index] + face_landmark_5_68 = face_landmark_5_list[index] + face_landmark_68_5 = expand_face_landmark_68_from_5(face_landmark_5_68) + face_landmark_68 = face_landmark_68_5 + face_landmark_68_score = 0.0 + if deepfuze.globals.face_landmarker_score > 0: + face_landmark_68, face_landmark_68_score = detect_face_landmark_68(vision_frame, bounding_box) + if face_landmark_68_score > deepfuze.globals.face_landmarker_score: + face_landmark_5_68 = convert_face_landmark_68_to_5(face_landmark_68) + landmarks : FaceLandmarkSet =\ + { + '5': face_landmark_5_list[index], + '5/68': face_landmark_5_68, + '68': face_landmark_68, + '68/5': face_landmark_68_5 + } + scores : FaceScoreSet = \ + { + 'detector': score_list[index], + 'landmarker': face_landmark_68_score + } + embedding, normed_embedding = calc_embedding(vision_frame, landmarks.get('5/68')) + gender, age = detect_gender_age(vision_frame, bounding_box) + faces.append(Face( + bounding_box = bounding_box, + landmarks = landmarks, + scores = scores, + embedding = embedding, + normed_embedding = normed_embedding, + gender = gender, + age = age + )) + return faces + + +def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5) -> Tuple[Embedding, Embedding]: + face_recognizer = get_face_analyser().get('face_recognizer') + crop_vision_frame, matrix = warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, 'arcface_112_v2', (112, 112)) + crop_vision_frame = crop_vision_frame / 127.5 - 1 + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + embedding = face_recognizer.run(None, + { + face_recognizer.get_inputs()[0].name: crop_vision_frame + })[0] + embedding = embedding.ravel() + normed_embedding = embedding / numpy.linalg.norm(embedding) + return embedding, normed_embedding + + +def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[FaceLandmark68, Score]: + face_landmarker = get_face_analyser().get('face_landmarkers').get('68') + scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max() + translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256)) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_RGB2Lab) + if numpy.mean(crop_vision_frame[:, :, 0]) < 30: + crop_vision_frame[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_vision_frame[:, :, 0]) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_Lab2RGB) + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + face_landmark_68, face_heatmap = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ crop_vision_frame ] + }) + face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 + face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256 + face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) + face_landmark_68 = face_landmark_68.reshape(-1, 2) + face_landmark_68_score = numpy.amax(face_heatmap, axis = (2, 3)) + face_landmark_68_score = numpy.mean(face_landmark_68_score) + return face_landmark_68, face_landmark_68_score + + +def expand_face_landmark_68_from_5(face_landmark_5 : FaceLandmark5) -> FaceLandmark68: + face_landmarker = get_face_analyser().get('face_landmarkers').get('68_5') + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, 'ffhq_512', (1, 1)) + face_landmark_5 = cv2.transform(face_landmark_5.reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + face_landmark_68_5 = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ face_landmark_5 ] + })[0][0] + face_landmark_68_5 = cv2.transform(face_landmark_68_5.reshape(1, -1, 2), cv2.invertAffineTransform(affine_matrix)).reshape(-1, 2) + return face_landmark_68_5 + + +def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]: + gender_age = get_face_analyser().get('gender_age') + bounding_box = bounding_box.reshape(2, -1) + scale = 64 / numpy.subtract(*bounding_box[::-1]).max() + translation = 48 - bounding_box.sum(axis = 0) * scale * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (96, 96)) + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + prediction = gender_age.run(None, + { + gender_age.get_inputs()[0].name: crop_vision_frame + })[0][0] + gender = int(numpy.argmax(prediction[:2])) + age = int(numpy.round(prediction[2] * 100)) + return gender, age + + +def get_one_face(vision_frame : VisionFrame, position : int = 0) -> Optional[Face]: + many_faces = get_many_faces(vision_frame) + if many_faces: + try: + return many_faces[position] + except IndexError: + return many_faces[-1] + return None + + +def get_average_face(vision_frames : List[VisionFrame], position : int = 0) -> Optional[Face]: + average_face = None + faces = [] + embedding_list = [] + normed_embedding_list = [] + + for vision_frame in vision_frames: + face = get_one_face(vision_frame, position) + if face: + faces.append(face) + embedding_list.append(face.embedding) + normed_embedding_list.append(face.normed_embedding) + if faces: + first_face = get_first(faces) + average_face = Face( + bounding_box = first_face.bounding_box, + landmarks = first_face.landmarks, + scores = first_face.scores, + embedding = numpy.mean(embedding_list, axis = 0), + normed_embedding = numpy.mean(normed_embedding_list, axis = 0), + gender = first_face.gender, + age = first_face.age + ) + return average_face + + +def get_many_faces(vision_frame : VisionFrame) -> List[Face]: + faces = [] + try: + faces_cache = get_static_faces(vision_frame) + if faces_cache: + faces = faces_cache + else: + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + if deepfuze.globals.face_detector_model in [ 'many', 'retinaface']: + bounding_box_list_retinaface, face_landmark_5_list_retinaface, score_list_retinaface = detect_with_retinaface(vision_frame, deepfuze.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_retinaface) + face_landmark_5_list.extend(face_landmark_5_list_retinaface) + score_list.extend(score_list_retinaface) + if deepfuze.globals.face_detector_model in [ 'many', 'scrfd' ]: + bounding_box_list_scrfd, face_landmark_5_list_scrfd, score_list_scrfd = detect_with_scrfd(vision_frame, deepfuze.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_scrfd) + face_landmark_5_list.extend(face_landmark_5_list_scrfd) + score_list.extend(score_list_scrfd) + if deepfuze.globals.face_detector_model in [ 'many', 'yoloface' ]: + bounding_box_list_yoloface, face_landmark_5_list_yoloface, score_list_yoloface = detect_with_yoloface(vision_frame, deepfuze.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yoloface) + face_landmark_5_list.extend(face_landmark_5_list_yoloface) + score_list.extend(score_list_yoloface) + if deepfuze.globals.face_detector_model in [ 'yunet' ]: + bounding_box_list_yunet, face_landmark_5_list_yunet, score_list_yunet = detect_with_yunet(vision_frame, deepfuze.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yunet) + face_landmark_5_list.extend(face_landmark_5_list_yunet) + score_list.extend(score_list_yunet) + if bounding_box_list and face_landmark_5_list and score_list: + faces = create_faces(vision_frame, bounding_box_list, face_landmark_5_list, score_list) + if faces: + set_static_faces(vision_frame, faces) + if deepfuze.globals.face_analyser_order: + faces = sort_by_order(faces, deepfuze.globals.face_analyser_order) + if deepfuze.globals.face_analyser_age: + faces = filter_by_age(faces, deepfuze.globals.face_analyser_age) + if deepfuze.globals.face_analyser_gender: + faces = filter_by_gender(faces, deepfuze.globals.face_analyser_gender) + except (AttributeError, ValueError): + pass + return faces + + +def find_similar_faces(reference_faces : FaceSet, vision_frame : VisionFrame, face_distance : float) -> List[Face]: + similar_faces : List[Face] = [] + many_faces = get_many_faces(vision_frame) + + if reference_faces: + for reference_set in reference_faces: + if not similar_faces: + for reference_face in reference_faces[reference_set]: + for face in many_faces: + if compare_faces(face, reference_face, face_distance): + similar_faces.append(face) + return similar_faces + + +def compare_faces(face : Face, reference_face : Face, face_distance : float) -> bool: + current_face_distance = calc_face_distance(face, reference_face) + return current_face_distance < face_distance + + +def calc_face_distance(face : Face, reference_face : Face) -> float: + if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'): + return 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding) + return 0 + + +def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: + if order == 'left-right': + return sorted(faces, key = lambda face: face.bounding_box[0]) + if order == 'right-left': + return sorted(faces, key = lambda face: face.bounding_box[0], reverse = True) + if order == 'top-bottom': + return sorted(faces, key = lambda face: face.bounding_box[1]) + if order == 'bottom-top': + return sorted(faces, key = lambda face: face.bounding_box[1], reverse = True) + if order == 'small-large': + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1])) + if order == 'large-small': + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1]), reverse = True) + if order == 'best-worst': + return sorted(faces, key = lambda face: face.scores.get('detector'), reverse = True) + if order == 'worst-best': + return sorted(faces, key = lambda face: face.scores.get('detector')) + return faces + + +def filter_by_age(faces : List[Face], age : FaceAnalyserAge) -> List[Face]: + filter_faces = [] + for face in faces: + if categorize_age(face.age) == age: + filter_faces.append(face) + return filter_faces + + +def filter_by_gender(faces : List[Face], gender : FaceAnalyserGender) -> List[Face]: + filter_faces = [] + for face in faces: + if categorize_gender(face.gender) == gender: + filter_faces.append(face) + return filter_faces diff --git a/deepfuze/face_helper.py b/deepfuze/face_helper.py new file mode 100644 index 0000000..ffc8725 --- /dev/null +++ b/deepfuze/face_helper.py @@ -0,0 +1,169 @@ +from typing import Any, Tuple, List +from cv2.typing import Size +from functools import lru_cache +import cv2 +import numpy + +from deepfuze.typing import BoundingBox, FaceLandmark5, FaceLandmark68, VisionFrame, Mask, Matrix, Translation, WarpTemplate, WarpTemplateSet, FaceAnalyserAge, FaceAnalyserGender + +WARP_TEMPLATES : WarpTemplateSet =\ +{ + 'arcface_112_v1': numpy.array( + [ + [ 0.35473214, 0.45658929 ], + [ 0.64526786, 0.45658929 ], + [ 0.50000000, 0.61154464 ], + [ 0.37913393, 0.77687500 ], + [ 0.62086607, 0.77687500 ] + ]), + 'arcface_112_v2': numpy.array( + [ + [ 0.34191607, 0.46157411 ], + [ 0.65653393, 0.45983393 ], + [ 0.50022500, 0.64050536 ], + [ 0.37097589, 0.82469196 ], + [ 0.63151696, 0.82325089 ] + ]), + 'arcface_128_v2': numpy.array( + [ + [ 0.36167656, 0.40387734 ], + [ 0.63696719, 0.40235469 ], + [ 0.50019687, 0.56044219 ], + [ 0.38710391, 0.72160547 ], + [ 0.61507734, 0.72034453 ] + ]), + 'ffhq_512': numpy.array( + [ + [ 0.37691676, 0.46864664 ], + [ 0.62285697, 0.46912813 ], + [ 0.50123859, 0.61331904 ], + [ 0.39308822, 0.72541100 ], + [ 0.61150205, 0.72490465 ] + ]) +} + + +def estimate_matrix_by_face_landmark_5(face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Matrix: + normed_warp_template = WARP_TEMPLATES.get(warp_template) * crop_size + affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_warp_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] + return affine_matrix + + +def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, warp_template, crop_size) + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) + return crop_vision_frame, affine_matrix + + +def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : BoundingBox, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + source_points = numpy.array([ [ bounding_box[0], bounding_box[1] ], [bounding_box[2], bounding_box[1] ], [ bounding_box[0], bounding_box[3] ] ]).astype(numpy.float32) + target_points = numpy.array([ [ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ] ]).astype(numpy.float32) + affine_matrix = cv2.getAffineTransform(source_points, target_points) + if bounding_box[2] - bounding_box[0] > crop_size[0] or bounding_box[3] - bounding_box[1] > crop_size[1]: + interpolation_method = cv2.INTER_AREA + else: + interpolation_method = cv2.INTER_LINEAR + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, flags = interpolation_method) + return crop_vision_frame, affine_matrix + + +def warp_face_by_translation(temp_vision_frame : VisionFrame, translation : Translation, scale : float, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = numpy.array([ [ scale, 0, translation[0] ], [ 0, scale, translation[1] ] ]) + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size) + return crop_vision_frame, affine_matrix + + +def paste_back(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, crop_mask : Mask, affine_matrix : Matrix) -> VisionFrame: + inverse_matrix = cv2.invertAffineTransform(affine_matrix) + temp_size = temp_vision_frame.shape[:2][::-1] + inverse_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_size).clip(0, 1) + inverse_vision_frame = cv2.warpAffine(crop_vision_frame, inverse_matrix, temp_size, borderMode = cv2.BORDER_REPLICATE) + paste_vision_frame = temp_vision_frame.copy() + paste_vision_frame[:, :, 0] = inverse_mask * inverse_vision_frame[:, :, 0] + (1 - inverse_mask) * temp_vision_frame[:, :, 0] + paste_vision_frame[:, :, 1] = inverse_mask * inverse_vision_frame[:, :, 1] + (1 - inverse_mask) * temp_vision_frame[:, :, 1] + paste_vision_frame[:, :, 2] = inverse_mask * inverse_vision_frame[:, :, 2] + (1 - inverse_mask) * temp_vision_frame[:, :, 2] + return paste_vision_frame + + +@lru_cache(maxsize = None) +def create_static_anchors(feature_stride : int, anchor_total : int, stride_height : int, stride_width : int) -> numpy.ndarray[Any, Any]: + y, x = numpy.mgrid[:stride_height, :stride_width][::-1] + anchors = numpy.stack((y, x), axis = -1) + anchors = (anchors * feature_stride).reshape((-1, 2)) + anchors = numpy.stack([ anchors ] * anchor_total, axis = 1).reshape((-1, 2)) + return anchors + + +def create_bounding_box_from_face_landmark_68(face_landmark_68 : FaceLandmark68) -> BoundingBox: + min_x, min_y = numpy.min(face_landmark_68, axis = 0) + max_x, max_y = numpy.max(face_landmark_68, axis = 0) + bounding_box = numpy.array([ min_x, min_y, max_x, max_y ]).astype(numpy.int16) + return bounding_box + + +def distance_to_bounding_box(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> BoundingBox: + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + bounding_box = numpy.column_stack([ x1, y1, x2, y2 ]) + return bounding_box + + +def distance_to_face_landmark_5(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> FaceLandmark5: + x = points[:, 0::2] + distance[:, 0::2] + y = points[:, 1::2] + distance[:, 1::2] + face_landmark_5 = numpy.stack((x, y), axis = -1) + return face_landmark_5 + + +def convert_face_landmark_68_to_5(face_landmark_68 : FaceLandmark68) -> FaceLandmark5: + face_landmark_5 = numpy.array( + [ + numpy.mean(face_landmark_68[36:42], axis = 0), + numpy.mean(face_landmark_68[42:48], axis = 0), + face_landmark_68[30], + face_landmark_68[48], + face_landmark_68[54] + ]) + return face_landmark_5 + + +def apply_nms(bounding_box_list : List[BoundingBox], iou_threshold : float) -> List[int]: + keep_indices = [] + dimension_list = numpy.reshape(bounding_box_list, (-1, 4)) + x1 = dimension_list[:, 0] + y1 = dimension_list[:, 1] + x2 = dimension_list[:, 2] + y2 = dimension_list[:, 3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + indices = numpy.arange(len(bounding_box_list)) + while indices.size > 0: + index = indices[0] + remain_indices = indices[1:] + keep_indices.append(index) + xx1 = numpy.maximum(x1[index], x1[remain_indices]) + yy1 = numpy.maximum(y1[index], y1[remain_indices]) + xx2 = numpy.minimum(x2[index], x2[remain_indices]) + yy2 = numpy.minimum(y2[index], y2[remain_indices]) + width = numpy.maximum(0, xx2 - xx1 + 1) + height = numpy.maximum(0, yy2 - yy1 + 1) + iou = width * height / (areas[index] + areas[remain_indices] - width * height) + indices = indices[numpy.where(iou <= iou_threshold)[0] + 1] + return keep_indices + + +def categorize_age(age : int) -> FaceAnalyserAge: + if age < 13: + return 'child' + elif age < 19: + return 'teen' + elif age < 60: + return 'adult' + return 'senior' + + +def categorize_gender(gender : int) -> FaceAnalyserGender: + if gender == 0: + return 'female' + return 'male' diff --git a/deepfuze/face_masker.py b/deepfuze/face_masker.py new file mode 100755 index 0000000..bf3c366 --- /dev/null +++ b/deepfuze/face_masker.py @@ -0,0 +1,155 @@ +from typing import Any, Dict, List +from cv2.typing import Size +from functools import lru_cache +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +from deepfuze import process_manager +from deepfuze.thread_helper import thread_lock, conditional_thread_semaphore +from deepfuze.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet +from deepfuze.execution import apply_execution_provider_options +from deepfuze.filesystem import resolve_relative_path, is_file +from deepfuze.download import conditional_download + +FACE_OCCLUDER = None +FACE_PARSER = None +MODELS : ModelSet =\ +{ + 'face_occluder': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_occluder.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/face_occluder.onnx') + }, + 'face_parser': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_parser.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/face_parser.onnx') + } +} +FACE_MASK_REGIONS : Dict[FaceMaskRegion, int] =\ +{ + 'skin': 1, + 'left-eyebrow': 2, + 'right-eyebrow': 3, + 'left-eye': 4, + 'right-eye': 5, + 'glasses': 6, + 'nose': 10, + 'mouth': 11, + 'upper-lip': 12, + 'lower-lip': 13 +} + + +def get_face_occluder() -> Any: + global FACE_OCCLUDER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_OCCLUDER is None: + model_path = MODELS.get('face_occluder').get('path') + FACE_OCCLUDER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FACE_OCCLUDER + + +def get_face_parser() -> Any: + global FACE_PARSER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_PARSER is None: + model_path = MODELS.get('face_parser').get('path') + FACE_PARSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FACE_PARSER + + +def clear_face_occluder() -> None: + global FACE_OCCLUDER + + FACE_OCCLUDER = None + + +def clear_face_parser() -> None: + global FACE_PARSER + + FACE_PARSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_urls =\ + [ + MODELS.get('face_occluder').get('url'), + MODELS.get('face_parser').get('url') + ] + model_paths =\ + [ + MODELS.get('face_occluder').get('path'), + MODELS.get('face_parser').get('path') + ] + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, model_urls) + process_manager.end() + return all(is_file(model_path) for model_path in model_paths) + + +@lru_cache(maxsize = None) +def create_static_box_mask(crop_size : Size, face_mask_blur : float, face_mask_padding : Padding) -> Mask: + blur_amount = int(crop_size[0] * 0.5 * face_mask_blur) + blur_area = max(blur_amount // 2, 1) + box_mask : Mask = numpy.ones(crop_size, numpy.float32) + box_mask[:max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)), :] = 0 + box_mask[-max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)):, :] = 0 + box_mask[:, :max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100))] = 0 + box_mask[:, -max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100)):] = 0 + if blur_amount > 0: + box_mask = cv2.GaussianBlur(box_mask, (0, 0), blur_amount * 0.25) + return box_mask + + +def create_occlusion_mask(crop_vision_frame : VisionFrame) -> Mask: + face_occluder = get_face_occluder() + prepare_vision_frame = cv2.resize(crop_vision_frame, face_occluder.get_inputs()[0].shape[1:3][::-1]) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255 + prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + occlusion_mask : Mask = face_occluder.run(None, + { + face_occluder.get_inputs()[0].name: prepare_vision_frame + })[0][0] + occlusion_mask = occlusion_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32) + occlusion_mask = cv2.resize(occlusion_mask, crop_vision_frame.shape[:2][::-1]) + occlusion_mask = (cv2.GaussianBlur(occlusion_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 + return occlusion_mask + + +def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List[FaceMaskRegion]) -> Mask: + face_parser = get_face_parser() + prepare_vision_frame = cv2.flip(cv2.resize(crop_vision_frame, (512, 512)), 1) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32)[:, :, ::-1] / 127.5 - 1 + prepare_vision_frame = prepare_vision_frame.transpose(0, 3, 1, 2) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + region_mask : Mask = face_parser.run(None, + { + face_parser.get_inputs()[0].name: prepare_vision_frame + })[0][0] + region_mask = numpy.isin(region_mask.argmax(0), [ FACE_MASK_REGIONS[region] for region in face_mask_regions ]) + region_mask = cv2.resize(region_mask.astype(numpy.float32), crop_vision_frame.shape[:2][::-1]) + region_mask = (cv2.GaussianBlur(region_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 + return region_mask + + +def create_mouth_mask(face_landmark_68 : FaceLandmark68) -> Mask: + convex_hull = cv2.convexHull(face_landmark_68[numpy.r_[3:14, 31:36]].astype(numpy.int32)) + mouth_mask : Mask = numpy.zeros((512, 512)).astype(numpy.float32) + mouth_mask = cv2.fillConvexPoly(mouth_mask, convex_hull, 1.0) + mouth_mask = cv2.erode(mouth_mask.clip(0, 1), numpy.ones((21, 3))) + mouth_mask = cv2.GaussianBlur(mouth_mask, (0, 0), sigmaX = 1, sigmaY = 15) + return mouth_mask diff --git a/deepfuze/face_store.py b/deepfuze/face_store.py new file mode 100644 index 0000000..b05fdbf --- /dev/null +++ b/deepfuze/face_store.py @@ -0,0 +1,48 @@ +from typing import Optional, List +import hashlib +import numpy + +from deepfuze.typing import VisionFrame, Face, FaceStore, FaceSet + +FACE_STORE: FaceStore =\ +{ + 'static_faces': {}, + 'reference_faces': {} +} + + +def get_static_faces(vision_frame : VisionFrame) -> Optional[List[Face]]: + frame_hash = create_frame_hash(vision_frame) + if frame_hash in FACE_STORE['static_faces']: + return FACE_STORE['static_faces'][frame_hash] + return None + + +def set_static_faces(vision_frame : VisionFrame, faces : List[Face]) -> None: + frame_hash = create_frame_hash(vision_frame) + if frame_hash: + FACE_STORE['static_faces'][frame_hash] = faces + + +def clear_static_faces() -> None: + FACE_STORE['static_faces'] = {} + + +def create_frame_hash(vision_frame : VisionFrame) -> Optional[str]: + return hashlib.sha1(vision_frame.tobytes()).hexdigest() if numpy.any(vision_frame) else None + + +def get_reference_faces() -> Optional[FaceSet]: + if FACE_STORE['reference_faces']: + return FACE_STORE['reference_faces'] + return None + + +def append_reference_face(name : str, face : Face) -> None: + if name not in FACE_STORE['reference_faces']: + FACE_STORE['reference_faces'][name] = [] + FACE_STORE['reference_faces'][name].append(face) + + +def clear_reference_faces() -> None: + FACE_STORE['reference_faces'] = {} diff --git a/deepfuze/ffmpeg.py b/deepfuze/ffmpeg.py new file mode 100644 index 0000000..a2aebba --- /dev/null +++ b/deepfuze/ffmpeg.py @@ -0,0 +1,146 @@ +from typing import List, Optional +import os +import subprocess +import filetype + +import deepfuze.globals +from deepfuze import logger, process_manager +from deepfuze.typing import OutputVideoPreset, Fps, AudioBuffer +from deepfuze.filesystem import get_temp_frames_pattern, get_temp_file_path +from deepfuze.vision import restrict_video_fps + + +def run_ffmpeg(args : List[str]) -> bool: + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] + commands.extend(args) + process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE) + + while process_manager.is_processing(): + try: + if deepfuze.globals.log_level == 'debug': + log_debug(process) + return process.wait(timeout = 0.5) == 0 + except subprocess.TimeoutExpired: + continue + return process.returncode == 0 + + +def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] + commands.extend(args) + return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) + + +def log_debug(process : subprocess.Popen[bytes]) -> None: + _, stderr = process.communicate() + errors = stderr.decode().split(os.linesep) + + for error in errors: + if error.strip(): + logger.debug(error.strip(), __name__.upper()) + + +def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps) -> bool: + trim_frame_start = deepfuze.globals.trim_frame_start + trim_frame_end = deepfuze.globals.trim_frame_end + temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') + commands = [ '-i', target_path, '-s', str(temp_video_resolution), '-q:v', '0' ] + + if trim_frame_start is not None and trim_frame_end is not None: + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ]) + elif trim_frame_start is not None: + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(temp_video_fps) ]) + elif trim_frame_end is not None: + commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ]) + else: + commands.extend([ '-vf', 'fps=' + str(temp_video_fps) ]) + commands.extend([ '-vsync', '0', temp_frames_pattern ]) + return run_ffmpeg(commands) + + +def merge_video(target_path : str, output_video_resolution : str, output_video_fps : Fps) -> bool: + temp_video_fps = restrict_video_fps(target_path, output_video_fps) + temp_file_path = get_temp_file_path(target_path) + temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') + commands = [ '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-s', str(output_video_resolution), '-c:v', deepfuze.globals.output_video_encoder ] + + if deepfuze.globals.output_video_encoder in [ 'libx264', 'libx265' ]: + output_video_compression = round(51 - (deepfuze.globals.output_video_quality * 0.51)) + commands.extend([ '-crf', str(output_video_compression), '-preset', deepfuze.globals.output_video_preset ]) + if deepfuze.globals.output_video_encoder in [ 'libvpx-vp9' ]: + output_video_compression = round(63 - (deepfuze.globals.output_video_quality * 0.63)) + commands.extend([ '-crf', str(output_video_compression) ]) + if deepfuze.globals.output_video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: + output_video_compression = round(51 - (deepfuze.globals.output_video_quality * 0.51)) + commands.extend([ '-cq', str(output_video_compression), '-preset', map_nvenc_preset(deepfuze.globals.output_video_preset) ]) + if deepfuze.globals.output_video_encoder in [ 'h264_amf', 'hevc_amf' ]: + output_video_compression = round(51 - (deepfuze.globals.output_video_quality * 0.51)) + commands.extend([ '-qp_i', str(output_video_compression), '-qp_p', str(output_video_compression), '-quality', map_amf_preset(deepfuze.globals.output_video_preset) ]) + commands.extend([ '-vf', 'framerate=fps=' + str(output_video_fps), '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_file_path ]) + return run_ffmpeg(commands) + + +def copy_image(target_path : str, temp_image_resolution : str) -> bool: + temp_file_path = get_temp_file_path(target_path) + is_webp = filetype.guess_mime(target_path) == 'image/webp' + temp_image_compression = 100 if is_webp else 0 + commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(temp_image_compression), '-y', temp_file_path ] + return run_ffmpeg(commands) + + +def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool: + temp_file_path = get_temp_file_path(target_path) + output_image_compression = round(31 - (deepfuze.globals.output_image_quality * 0.31)) + commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ] + return run_ffmpeg(commands) + + +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-'] + process = open_ffmpeg(commands) + audio_buffer, _ = process.communicate() + if process.returncode == 0: + return audio_buffer + return None + + +def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) -> bool: + trim_frame_start = deepfuze.globals.trim_frame_start + trim_frame_end = deepfuze.globals.trim_frame_end + temp_file_path = get_temp_file_path(target_path) + commands = [ '-i', temp_file_path ] + + if trim_frame_start is not None: + start_time = trim_frame_start / output_video_fps + commands.extend([ '-ss', str(start_time) ]) + if trim_frame_end is not None: + end_time = trim_frame_end / output_video_fps + commands.extend([ '-to', str(end_time) ]) + commands.extend([ '-i', target_path, '-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ]) + return run_ffmpeg(commands) + + +def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: + temp_file_path = get_temp_file_path(target_path) + commands = [ '-i', temp_file_path, '-i', audio_path, '-af', 'apad', '-shortest', '-y', output_path ] + return run_ffmpeg(commands) + + +def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]: + return 'fast' + if output_video_preset == 'medium': + return 'medium' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'slow' + return None + + +def map_amf_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: + return 'speed' + if output_video_preset in [ 'faster', 'fast', 'medium' ]: + return 'balanced' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'quality' + return None diff --git a/deepfuze/filesystem.py b/deepfuze/filesystem.py new file mode 100644 index 0000000..ff965d6 --- /dev/null +++ b/deepfuze/filesystem.py @@ -0,0 +1,135 @@ +from typing import List, Optional +import glob +import os +import shutil +import tempfile +import filetype +from pathlib import Path + +import deepfuze.globals +from deepfuze.common_helper import is_windows + +if is_windows(): + import ctypes + + +def get_temp_frame_paths(target_path : str) -> List[str]: + temp_frames_pattern = get_temp_frames_pattern(target_path, '*') + return sorted(glob.glob(temp_frames_pattern)) + + +def get_temp_frames_pattern(target_path : str, temp_frame_prefix : str) -> str: + temp_directory_path = get_temp_directory_path(target_path) + return os.path.join(temp_directory_path, temp_frame_prefix + '.' + deepfuze.globals.temp_frame_format) + + +def get_temp_file_path(target_path : str) -> str: + _, target_extension = os.path.splitext(os.path.basename(target_path)) + temp_directory_path = get_temp_directory_path(target_path) + return os.path.join(temp_directory_path, 'temp' + target_extension) + + +def get_temp_directory_path(target_path : str) -> str: + target_name, _ = os.path.splitext(os.path.basename(target_path)) + temp_directory_path = os.path.join(tempfile.gettempdir(), 'facefusion') + return os.path.join(temp_directory_path, target_name) + + +def create_temp(target_path : str) -> None: + temp_directory_path = get_temp_directory_path(target_path) + Path(temp_directory_path).mkdir(parents = True, exist_ok = True) + + +def move_temp(target_path : str, output_path : str) -> None: + temp_file_path = get_temp_file_path(target_path) + + if is_file(temp_file_path): + if is_file(output_path): + os.remove(output_path) + shutil.move(temp_file_path, output_path) + + +def clear_temp(target_path : str) -> None: + temp_directory_path = get_temp_directory_path(target_path) + parent_directory_path = os.path.dirname(temp_directory_path) + + if not deepfuze.globals.keep_temp and is_directory(temp_directory_path): + shutil.rmtree(temp_directory_path, ignore_errors = True) + if os.path.exists(parent_directory_path) and not os.listdir(parent_directory_path): + os.rmdir(parent_directory_path) + + +def get_file_size(file_path : str) -> int: + if is_file(file_path): + return os.path.getsize(file_path) + return 0 + + +def is_file(file_path : str) -> bool: + return bool(file_path and os.path.isfile(file_path)) + + +def is_directory(directory_path : str) -> bool: + return bool(directory_path and os.path.isdir(directory_path)) + + +def is_audio(audio_path : str) -> bool: + return is_file(audio_path) and filetype.helpers.is_audio(audio_path) + + +def has_audio(audio_paths : List[str]) -> bool: + if audio_paths: + return any(is_audio(audio_path) for audio_path in audio_paths) + return False + + +def is_image(image_path : str) -> bool: + return is_file(image_path) and filetype.helpers.is_image(image_path) + + +def has_image(image_paths: List[str]) -> bool: + if image_paths: + return any(is_image(image_path) for image_path in image_paths) + return False + + +def is_video(video_path : str) -> bool: + return is_file(video_path) and filetype.helpers.is_video(video_path) + + +def filter_audio_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_audio(path) ] + return [] + + +def filter_image_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_image(path) ] + return [] + + +def resolve_relative_path(path : str) -> str: + return os.path.abspath(os.path.join(os.path.dirname(__file__), path)) + + +def list_directory(directory_path : str) -> Optional[List[str]]: + if is_directory(directory_path): + files = os.listdir(directory_path) + files = [ Path(file).stem for file in files if not Path(file).stem.startswith(('.', '__')) ] + return sorted(files) + return None + + +def sanitize_path_for_windows(full_path : str) -> Optional[str]: + buffer_size = 0 + + while True: + unicode_buffer = ctypes.create_unicode_buffer(buffer_size) + buffer_threshold = ctypes.windll.kernel32.GetShortPathNameW(full_path, unicode_buffer, buffer_size) #type:ignore[attr-defined] + + if buffer_size > buffer_threshold: + return unicode_buffer.value + if buffer_threshold == 0: + return None + buffer_size = buffer_threshold diff --git a/deepfuze/globals.py b/deepfuze/globals.py new file mode 100755 index 0000000..9b1a941 --- /dev/null +++ b/deepfuze/globals.py @@ -0,0 +1,60 @@ +from typing import List, Optional + +from deepfuze.typing import LogLevel, VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, OutputVideoEncoder, OutputVideoPreset, FaceDetectorModel, FaceRecognizerModel, TempFrameFormat, Padding + +# general +config_path : Optional[str] = None +source_paths : Optional[List[str]] = None +target_path : Optional[str] = None +output_path : Optional[str] = None +# misc +force_download : Optional[bool] = None +skip_download : Optional[bool] = None +headless : Optional[bool] = None +log_level : Optional[LogLevel] = None +# execution +execution_device_id : Optional[str] = None +execution_providers : List[str] = [] +execution_thread_count : Optional[int] = None +execution_queue_count : Optional[int] = None +# memory +video_memory_strategy : Optional[VideoMemoryStrategy] = None +system_memory_limit : Optional[int] = None +# face analyser +face_analyser_order : Optional[FaceAnalyserOrder] = None +face_analyser_age : Optional[FaceAnalyserAge] = None +face_analyser_gender : Optional[FaceAnalyserGender] = None +face_detector_model : Optional[FaceDetectorModel] = None +face_detector_size : Optional[str] = None +face_detector_score : Optional[float] = None +face_landmarker_score : Optional[float] = None +face_recognizer_model : Optional[FaceRecognizerModel] = None +# face selector +face_selector_mode : Optional[FaceSelectorMode] = None +reference_face_position : Optional[int] = None +reference_face_distance : Optional[float] = None +reference_frame_number : Optional[int] = None +# face mask +face_mask_types : Optional[List[FaceMaskType]] = None +face_mask_blur : Optional[float] = None +face_mask_padding : Optional[Padding] = None +face_mask_regions : Optional[List[FaceMaskRegion]] = None +# frame extraction +trim_frame_start : Optional[int] = None +trim_frame_end : Optional[int] = None +temp_frame_format : Optional[TempFrameFormat] = None +keep_temp : Optional[bool] = None +# output creation +output_image_quality : Optional[int] = None +output_image_resolution : Optional[str] = None +output_video_encoder : Optional[OutputVideoEncoder] = None +output_video_preset : Optional[OutputVideoPreset] = None +output_video_quality : Optional[int] = None +output_video_resolution : Optional[str] = None +output_video_fps : Optional[float] = None +skip_audio : Optional[bool] = None +# frame processors +frame_processors : List[str] = [] +# uis +open_browser : Optional[bool] = None +ui_layouts : List[str] = [] diff --git a/deepfuze/installer.py b/deepfuze/installer.py new file mode 100644 index 0000000..bb18203 --- /dev/null +++ b/deepfuze/installer.py @@ -0,0 +1,77 @@ +from typing import Dict, Tuple +import sys +import os +import tempfile +import subprocess +import inquirer +from argparse import ArgumentParser, HelpFormatter + +from deepfuze import metadata, wording +from deepfuze.common_helper import is_linux, is_macos, is_windows + +if is_macos(): + os.environ['SYSTEM_VERSION_COMPAT'] = '0' + +ONNXRUNTIMES : Dict[str, Tuple[str, str]] = {} + +if is_macos(): + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.3') +else: + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.3') + ONNXRUNTIMES['cuda-12.2'] = ('onnxruntime-gpu', '1.17.1') + ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.17.1') + ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.15.0') +if is_linux(): + ONNXRUNTIMES['rocm-5.4.2'] = ('onnxruntime-rocm', '1.16.3') + ONNXRUNTIMES['rocm-5.6'] = ('onnxruntime-rocm', '1.16.3') +if is_windows(): + ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.17.3') + + +def cli() -> None: + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 200)) + program.add_argument('--onnxruntime', help = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) + program.add_argument('--skip-conda', help = wording.get('help.skip_conda'), action = 'store_true') + program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') + run(program) + + +def run(program : ArgumentParser) -> None: + args = program.parse_args() + python_id = 'cp' + str(sys.version_info.major) + str(sys.version_info.minor) + + if not args.skip_conda and 'CONDA_PREFIX' not in os.environ: + sys.stdout.write(wording.get('conda_not_activated') + os.linesep) + sys.exit(1) + if args.onnxruntime: + answers =\ + { + 'onnxruntime': args.onnxruntime + } + else: + answers = inquirer.prompt( + [ + inquirer.List('onnxruntime', message = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = list(ONNXRUNTIMES.keys())) + ]) + if answers: + onnxruntime = answers['onnxruntime'] + onnxruntime_name, onnxruntime_version = ONNXRUNTIMES[onnxruntime] + + subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) + if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': + if python_id in [ 'cp39', 'cp310', 'cp311' ]: + rocm_version = onnxruntime.replace('-', '') + rocm_version = rocm_version.replace('.', '') + wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' + wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) + wheel_url = 'https://download.onnxruntime.ai/' + wheel_name + subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) + subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) + subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) + os.remove(wheel_path) + else: + subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) + if onnxruntime == 'cuda-12.2': + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) + else: + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ]) diff --git a/deepfuze/logger.py b/deepfuze/logger.py new file mode 100644 index 0000000..7409851 --- /dev/null +++ b/deepfuze/logger.py @@ -0,0 +1,47 @@ +from typing import Dict +from logging import basicConfig, getLogger, Logger, DEBUG, INFO, WARNING, ERROR + +from deepfuze.typing import LogLevel + + +def init(log_level : LogLevel) -> None: + basicConfig(format = None) + get_package_logger().setLevel(get_log_levels()[log_level]) + + +def get_package_logger() -> Logger: + return getLogger('facefusion') + + +def debug(message : str, scope : str) -> None: + get_package_logger().debug('[' + scope + '] ' + message) + + +def info(message : str, scope : str) -> None: + get_package_logger().info('[' + scope + '] ' + message) + + +def warn(message : str, scope : str) -> None: + get_package_logger().warning('[' + scope + '] ' + message) + + +def error(message : str, scope : str) -> None: + get_package_logger().error('[' + scope + '] ' + message) + + +def enable() -> None: + get_package_logger().disabled = False + + +def disable() -> None: + get_package_logger().disabled = True + + +def get_log_levels() -> Dict[LogLevel, int]: + return\ + { + 'error': ERROR, + 'warn': WARNING, + 'info': INFO, + 'debug': DEBUG + } diff --git a/deepfuze/memory.py b/deepfuze/memory.py new file mode 100644 index 0000000..7b746be --- /dev/null +++ b/deepfuze/memory.py @@ -0,0 +1,21 @@ +from deepfuze.common_helper import is_macos, is_windows + +if is_windows(): + import ctypes +else: + import resource + + +def limit_system_memory(system_memory_limit : int = 1) -> bool: + if is_macos(): + system_memory_limit = system_memory_limit * (1024 ** 6) + else: + system_memory_limit = system_memory_limit * (1024 ** 3) + try: + if is_windows(): + ctypes.windll.kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(system_memory_limit), ctypes.c_size_t(system_memory_limit)) #type:ignore[attr-defined] + else: + resource.setrlimit(resource.RLIMIT_DATA, (system_memory_limit, system_memory_limit)) + return True + except Exception: + return False diff --git a/deepfuze/metadata.py b/deepfuze/metadata.py new file mode 100644 index 0000000..402cdc6 --- /dev/null +++ b/deepfuze/metadata.py @@ -0,0 +1,13 @@ +METADATA =\ +{ + 'name': 'FaceFusion', + 'description': 'Next generation face swapper and enhancer', + 'version': '2.6.0', + 'license': 'MIT', + 'author': 'Henry Ruhs', + 'url': 'https://deepfuze.io' +} + + +def get(key : str) -> str: + return METADATA[key] diff --git a/deepfuze/normalizer.py b/deepfuze/normalizer.py new file mode 100644 index 0000000..5068dce --- /dev/null +++ b/deepfuze/normalizer.py @@ -0,0 +1,39 @@ +from typing import List, Optional +import hashlib +import os + +import deepfuze.globals +from deepfuze.filesystem import is_directory +from deepfuze.typing import Padding, Fps + + +def normalize_output_path(target_path : Optional[str], output_path : Optional[str]) -> Optional[str]: + if target_path and output_path: + target_name, target_extension = os.path.splitext(os.path.basename(target_path)) + if is_directory(output_path): + output_hash = hashlib.sha1(str(deepfuze.globals.__dict__).encode('utf-8')).hexdigest()[:8] + output_name = target_name + '-' + output_hash + return os.path.join(output_path, output_name + target_extension) + output_name, output_extension = os.path.splitext(os.path.basename(output_path)) + output_directory_path = os.path.dirname(output_path) + if is_directory(output_directory_path) and output_extension: + return os.path.join(output_directory_path, output_name + target_extension) + return None + + +def normalize_padding(padding : Optional[List[int]]) -> Optional[Padding]: + if padding and len(padding) == 1: + return tuple([ padding[0] ] * 4) #type:ignore[return-value] + if padding and len(padding) == 2: + return tuple([ padding[0], padding[1], padding[0], padding[1] ]) #type:ignore[return-value] + if padding and len(padding) == 3: + return tuple([ padding[0], padding[1], padding[2], padding[1] ]) #type:ignore[return-value] + if padding and len(padding) == 4: + return tuple(padding) #type:ignore[return-value] + return None + + +def normalize_fps(fps : Optional[float]) -> Optional[Fps]: + if fps is not None: + return max(1.0, min(fps, 60.0)) + return None diff --git a/deepfuze/process_manager.py b/deepfuze/process_manager.py new file mode 100644 index 0000000..1dfa0dd --- /dev/null +++ b/deepfuze/process_manager.py @@ -0,0 +1,53 @@ +from typing import Generator, List + +from deepfuze.typing import QueuePayload, ProcessState + +PROCESS_STATE : ProcessState = 'pending' + + +def get_process_state() -> ProcessState: + return PROCESS_STATE + + +def set_process_state(process_state : ProcessState) -> None: + global PROCESS_STATE + + PROCESS_STATE = process_state + + +def is_checking() -> bool: + return get_process_state() == 'checking' + + +def is_processing() -> bool: + return get_process_state() == 'processing' + + +def is_stopping() -> bool: + return get_process_state() == 'stopping' + + +def is_pending() -> bool: + return get_process_state() == 'pending' + + +def check() -> None: + set_process_state('checking') + + +def start() -> None: + set_process_state('processing') + + +def stop() -> None: + set_process_state('stopping') + + +def end() -> None: + set_process_state('pending') + + +def manage(queue_payloads : List[QueuePayload]) -> Generator[QueuePayload, None, None]: + for query_payload in queue_payloads: + if is_processing(): + yield query_payload diff --git a/deepfuze/processors/__init__.py b/deepfuze/processors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/processors/__pycache__/__init__.cpython-310.pyc b/deepfuze/processors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..c08eed1 Binary files /dev/null and b/deepfuze/processors/__pycache__/__init__.cpython-310.pyc differ diff --git a/deepfuze/processors/__pycache__/__init__.cpython-311.pyc b/deepfuze/processors/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..96438c6 Binary files /dev/null and b/deepfuze/processors/__pycache__/__init__.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/__init__.py b/deepfuze/processors/frame/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/processors/frame/__pycache__/__init__.cpython-310.pyc b/deepfuze/processors/frame/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..36eb4f3 Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/__init__.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/__init__.cpython-311.pyc b/deepfuze/processors/frame/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..5720c24 Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/__init__.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/choices.cpython-310.pyc b/deepfuze/processors/frame/__pycache__/choices.cpython-310.pyc new file mode 100644 index 0000000..93efcdf Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/choices.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/choices.cpython-311.pyc b/deepfuze/processors/frame/__pycache__/choices.cpython-311.pyc new file mode 100644 index 0000000..320bd46 Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/choices.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/core.cpython-310.pyc b/deepfuze/processors/frame/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000..c01a8ba Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/core.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/core.cpython-311.pyc b/deepfuze/processors/frame/__pycache__/core.cpython-311.pyc new file mode 100644 index 0000000..8a7818e Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/core.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/globals.cpython-310.pyc b/deepfuze/processors/frame/__pycache__/globals.cpython-310.pyc new file mode 100644 index 0000000..518cdfc Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/globals.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/globals.cpython-311.pyc b/deepfuze/processors/frame/__pycache__/globals.cpython-311.pyc new file mode 100644 index 0000000..4d035fa Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/globals.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/typings.cpython-310.pyc b/deepfuze/processors/frame/__pycache__/typings.cpython-310.pyc new file mode 100644 index 0000000..c7ff5d2 Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/typings.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/__pycache__/typings.cpython-311.pyc b/deepfuze/processors/frame/__pycache__/typings.cpython-311.pyc new file mode 100644 index 0000000..3c16fe3 Binary files /dev/null and b/deepfuze/processors/frame/__pycache__/typings.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/choices.py b/deepfuze/processors/frame/choices.py new file mode 100755 index 0000000..55b9940 --- /dev/null +++ b/deepfuze/processors/frame/choices.py @@ -0,0 +1,16 @@ +from typing import List + +from deepfuze.common_helper import create_int_range +from deepfuze.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel + +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender' ] +face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus' ] +face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256' ] +frame_colorizer_models : List[FrameColorizerModel] = [ 'ddcolor', 'ddcolor_artistic', 'deoldify', 'deoldify_artistic', 'deoldify_stable' ] +frame_colorizer_sizes : List[str] = [ '192x192', '256x256', '384x384', '512x512' ] +frame_enhancer_models : List[FrameEnhancerModel] = [ 'clear_reality_x4', 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4', 'ultra_sharp_x4' ] +lip_syncer_models : List[LipSyncerModel] = [ 'wav2lip_gan' ] + +face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_colorizer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) diff --git a/deepfuze/processors/frame/core.py b/deepfuze/processors/frame/core.py new file mode 100644 index 0000000..f0bec91 --- /dev/null +++ b/deepfuze/processors/frame/core.py @@ -0,0 +1,116 @@ +import os +import sys +import importlib +from concurrent.futures import ThreadPoolExecutor, as_completed +from queue import Queue +from types import ModuleType +from typing import Any, List +from tqdm import tqdm + +import deepfuze.globals +from deepfuze.typing import ProcessFrames, QueuePayload +from deepfuze.execution import encode_execution_providers +from deepfuze import logger, wording + +FRAME_PROCESSORS_MODULES : List[ModuleType] = [] +FRAME_PROCESSORS_METHODS =\ +[ + 'get_frame_processor', + 'clear_frame_processor', + 'get_options', + 'set_options', + 'register_args', + 'apply_args', + 'pre_check', + 'post_check', + 'pre_process', + 'post_process', + 'get_reference_frame', + 'process_frame', + 'process_frames', + 'process_image', + 'process_video' +] + + +def load_frame_processor_module(frame_processor : str) -> Any: + try: + frame_processor_module = importlib.import_module('deepfuze.processors.frame.modules.' + frame_processor) + for method_name in FRAME_PROCESSORS_METHODS: + if not hasattr(frame_processor_module, method_name): + raise NotImplementedError + except ModuleNotFoundError as exception: + logger.error(wording.get('frame_processor_not_loaded').format(frame_processor = frame_processor), __name__.upper()) + logger.debug(exception.msg, __name__.upper()) + sys.exit(1) + except NotImplementedError: + logger.error(wording.get('frame_processor_not_implemented').format(frame_processor = frame_processor), __name__.upper()) + sys.exit(1) + return frame_processor_module + + +def get_frame_processors_modules(frame_processors : List[str]) -> List[ModuleType]: + global FRAME_PROCESSORS_MODULES + + if not FRAME_PROCESSORS_MODULES: + for frame_processor in frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + FRAME_PROCESSORS_MODULES.append(frame_processor_module) + return FRAME_PROCESSORS_MODULES + + +def clear_frame_processors_modules() -> None: + global FRAME_PROCESSORS_MODULES + + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + frame_processor_module.clear_frame_processor() + FRAME_PROCESSORS_MODULES = [] + + +def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : ProcessFrames) -> None: + queue_payloads = create_queue_payloads(temp_frame_paths) + with tqdm(total = len(queue_payloads), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: + progress.set_postfix( + { + 'execution_providers': encode_execution_providers(deepfuze.globals.execution_providers), + 'execution_thread_count': deepfuze.globals.execution_thread_count, + 'execution_queue_count': deepfuze.globals.execution_queue_count + }) + with ThreadPoolExecutor(max_workers = deepfuze.globals.execution_thread_count) as executor: + futures = [] + queue : Queue[QueuePayload] = create_queue(queue_payloads) + queue_per_future = max(len(queue_payloads) // deepfuze.globals.execution_thread_count * deepfuze.globals.execution_queue_count, 1) + while not queue.empty(): + future = executor.submit(process_frames, source_paths, pick_queue(queue, queue_per_future), progress.update) + futures.append(future) + for future_done in as_completed(futures): + future_done.result() + + +def create_queue(queue_payloads : List[QueuePayload]) -> Queue[QueuePayload]: + queue : Queue[QueuePayload] = Queue() + for queue_payload in queue_payloads: + queue.put(queue_payload) + return queue + + +def pick_queue(queue : Queue[QueuePayload], queue_per_future : int) -> List[QueuePayload]: + queues = [] + for _ in range(queue_per_future): + if not queue.empty(): + queues.append(queue.get()) + return queues + + +def create_queue_payloads(temp_frame_paths : List[str]) -> List[QueuePayload]: + queue_payloads = [] + temp_frame_paths = sorted(temp_frame_paths, key = os.path.basename) + + for frame_number, frame_path in enumerate(temp_frame_paths): + frame_payload : QueuePayload =\ + { + 'frame_number': frame_number, + 'frame_path': frame_path + } + queue_payloads.append(frame_payload) + return queue_payloads diff --git a/deepfuze/processors/frame/globals.py b/deepfuze/processors/frame/globals.py new file mode 100755 index 0000000..a94be83 --- /dev/null +++ b/deepfuze/processors/frame/globals.py @@ -0,0 +1,14 @@ +from typing import List, Optional + +from deepfuze.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel + +face_debugger_items : Optional[List[FaceDebuggerItem]] = None +face_enhancer_model : Optional[FaceEnhancerModel] = None +face_enhancer_blend : Optional[int] = None +face_swapper_model : Optional[FaceSwapperModel] = None +frame_colorizer_model : Optional[FrameColorizerModel] = None +frame_colorizer_blend : Optional[int] = None +frame_colorizer_size : Optional[str] = None +frame_enhancer_model : Optional[FrameEnhancerModel] = None +frame_enhancer_blend : Optional[int] = None +lip_syncer_model : Optional[LipSyncerModel] = None diff --git a/deepfuze/processors/frame/modules/__init__.py b/deepfuze/processors/frame/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..7a49965 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..36321b5 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/__init__.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc new file mode 100644 index 0000000..4828999 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-311.pyc new file mode 100644 index 0000000..d726eef Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_debugger.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc new file mode 100644 index 0000000..c0bc8d9 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-311.pyc new file mode 100644 index 0000000..25adbdc Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_enhancer.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc new file mode 100644 index 0000000..59e522c Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-311.pyc new file mode 100644 index 0000000..9262bfd Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/face_swapper.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc new file mode 100644 index 0000000..54ed46a Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-311.pyc new file mode 100644 index 0000000..4de53d7 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/frame_colorizer.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc new file mode 100644 index 0000000..df8ddbc Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-311.pyc new file mode 100644 index 0000000..5e04369 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/frame_enhancer.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc b/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc new file mode 100644 index 0000000..f3c0e56 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc differ diff --git a/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-311.pyc b/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-311.pyc new file mode 100644 index 0000000..4f179b2 Binary files /dev/null and b/deepfuze/processors/frame/modules/__pycache__/lip_syncer.cpython-311.pyc differ diff --git a/deepfuze/processors/frame/modules/face_debugger.py b/deepfuze/processors/frame/modules/face_debugger.py new file mode 100755 index 0000000..dbd3a25 --- /dev/null +++ b/deepfuze/processors/frame/modules/face_debugger.py @@ -0,0 +1,192 @@ +from typing import Any, List, Literal +from argparse import ArgumentParser +import cv2 +import numpy + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, wording +from deepfuze.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from deepfuze.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from deepfuze.face_helper import warp_face_by_face_landmark_5, categorize_age, categorize_gender +from deepfuze.face_store import get_reference_faces +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.typing import Face, VisionFrame, UpdateProgress, ProcessMode, QueuePayload +from deepfuze.vision import read_image, read_static_image, write_image +from deepfuze.processors.frame.typings import FaceDebuggerInputs +from deepfuze.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices + +NAME = __name__.upper() + + +def get_frame_processor() -> None: + pass + + +def clear_frame_processor() -> None: + pass + + +def get_options(key : Literal['model']) -> None: + pass + + +def set_options(key : Literal['model'], value : Any) -> None: + pass + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--face-debugger-items', help = wording.get('help.face_debugger_items').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'face-landmark-5/68 face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_debugger_items = args.face_debugger_items + + +def pre_check() -> bool: + return True + + +def post_check() -> bool: + return True + + +def pre_process(mode : ProcessMode) -> bool: + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + + +def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + primary_color = (0, 0, 255) + secondary_color = (0, 255, 0) + tertiary_color = (255, 255, 0) + bounding_box = target_face.bounding_box.astype(numpy.int32) + temp_vision_frame = temp_vision_frame.copy() + has_face_landmark_5_fallback = numpy.array_equal(target_face.landmarks.get('5'), target_face.landmarks.get('5/68')) + has_face_landmark_68_fallback = numpy.array_equal(target_face.landmarks.get('68'), target_face.landmarks.get('68/5')) + + if 'bounding-box' in frame_processors_globals.face_debugger_items: + cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), primary_color, 2) + if 'face-mask' in frame_processors_globals.face_debugger_items: + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'arcface_128_v2', (512, 512)) + inverse_matrix = cv2.invertAffineTransform(affine_matrix) + temp_size = temp_vision_frame.shape[:2][::-1] + crop_mask_list = [] + if 'box' in deepfuze.globals.face_mask_types: + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], 0, deepfuze.globals.face_mask_padding) + crop_mask_list.append(box_mask) + if 'occlusion' in deepfuze.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + if 'region' in deepfuze.globals.face_mask_types: + region_mask = create_region_mask(crop_vision_frame, deepfuze.globals.face_mask_regions) + crop_mask_list.append(region_mask) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + crop_mask = (crop_mask * 255).astype(numpy.uint8) + inverse_vision_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_size) + inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] + inverse_vision_frame[inverse_vision_frame > 0] = 255 + inverse_contours = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] + cv2.drawContours(temp_vision_frame, inverse_contours, -1, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) + if 'face-landmark-5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5')): + face_landmark_5 = target_face.landmarks.get('5').astype(numpy.int32) + for index in range(face_landmark_5.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) + if 'face-landmark-5/68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5/68')): + face_landmark_5_68 = target_face.landmarks.get('5/68').astype(numpy.int32) + for index in range(face_landmark_5_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5_68[index][0], face_landmark_5_68[index][1]), 3, tertiary_color if has_face_landmark_5_fallback else secondary_color, -1) + if 'face-landmark-68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, tertiary_color if has_face_landmark_68_fallback else secondary_color, -1) + if 'face-landmark-68/5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68/5').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, primary_color, -1) + if bounding_box[3] - bounding_box[1] > 50 and bounding_box[2] - bounding_box[0] > 50: + top = bounding_box[1] + left = bounding_box[0] - 20 + if 'face-detector-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('detector'), 2)) + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'face-landmarker-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('landmarker'), 2)) + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) + if 'age' in frame_processors_globals.face_debugger_items: + face_age_text = categorize_age(target_face.age) + top = top + 20 + cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'gender' in frame_processors_globals.face_debugger_items: + face_gender_text = categorize_gender(target_face.gender) + top = top + 20 + cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') + + if deepfuze.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = debug_face(target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = debug_face(target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, deepfuze.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = debug_face(similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/modules/face_enhancer.py b/deepfuze/processors/frame/modules/face_enhancer.py new file mode 100755 index 0000000..de58d77 --- /dev/null +++ b/deepfuze/processors/frame/modules/face_enhancer.py @@ -0,0 +1,301 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, logger, wording +from deepfuze.face_analyser import get_many_faces, clear_face_analyser, find_similar_faces, get_one_face +from deepfuze.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder +from deepfuze.face_helper import warp_face_by_face_landmark_5, paste_back +from deepfuze.execution import apply_execution_provider_options +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.face_store import get_reference_faces +from deepfuze.normalizer import normalize_output_path +from deepfuze.thread_helper import thread_lock, thread_semaphore +from deepfuze.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from deepfuze.common_helper import create_metavar +from deepfuze.filesystem import is_file, is_image, is_video, resolve_relative_path +from deepfuze.download import conditional_download, is_download_done +from deepfuze.vision import read_image, read_static_image, write_image +from deepfuze.processors.frame.typings import FaceEnhancerInputs +from deepfuze.processors.frame import globals as frame_processors_globals +from deepfuze.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'codeformer': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/codeformer.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/codeformer.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.2': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.2.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gfpgan_1.2.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.3': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.3.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gfpgan_1.3.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gfpgan_1.4.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gpen_bfr_256': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_256.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gpen_bfr_256.onnx'), + 'template': 'arcface_128_v2', + 'size': (256, 256) + }, + 'gpen_bfr_512': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_512.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gpen_bfr_512.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gpen_bfr_1024': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_1024.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gpen_bfr_1024.onnx'), + 'template': 'ffhq_512', + 'size': (1024, 1024) + }, + 'gpen_bfr_2048': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_2048.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/gpen_bfr_2048.onnx'), + 'template': 'ffhq_512', + 'size': (2048, 2048) + }, + 'restoreformer_plus_plus': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/restoreformer_plus_plus.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/restoreformer_plus_plus.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.face_enhancer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--face-enhancer-model', help = wording.get('help.face_enhancer_model'), default = config.get_str_value('frame_processors.face_enhancer_model', 'gfpgan_1.4'), choices = frame_processors_choices.face_enhancer_models) + program.add_argument('--face-enhancer-blend', help = wording.get('help.face_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.face_enhancer_blend', '80'), choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_enhancer_model = args.face_enhancer_model + frame_processors_globals.face_enhancer_blend = args.face_enhancer_blend + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(deepfuze.globals.target_path) and not is_video(deepfuze.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + + +def enhance_face(target_face: Face, temp_vision_frame : VisionFrame) -> VisionFrame: + model_template = get_options('model').get('template') + model_size = get_options('model').get('size') + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], deepfuze.globals.face_mask_blur, (0, 0, 0, 0)) + crop_mask_list =\ + [ + box_mask + ] + + if 'occlusion' in deepfuze.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_enhance(crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + return temp_vision_frame + + +def apply_enhance(crop_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + frame_processor_inputs = {} + + for frame_processor_input in frame_processor.get_inputs(): + if frame_processor_input.name == 'input': + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame + if frame_processor_input.name == 'weight': + weight = numpy.array([ 1 ]).astype(numpy.double) + frame_processor_inputs[frame_processor_input.name] = weight + with thread_semaphore(): + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - 0.5) / 0.5 + crop_vision_frame = numpy.expand_dims(crop_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.clip(crop_vision_frame, -1, 1) + crop_vision_frame = (crop_vision_frame + 1) / 2 + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame.astype(numpy.uint8)[:, :, ::-1] + return crop_vision_frame + + +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: + face_enhancer_blend = 1 - (frame_processors_globals.face_enhancer_blend / 100) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return enhance_face(target_face, temp_vision_frame) + + +def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') + + if deepfuze.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = enhance_face(target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = enhance_face(target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, deepfuze.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = enhance_face(similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_path : str, target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/modules/face_swapper.py b/deepfuze/processors/frame/modules/face_swapper.py new file mode 100755 index 0000000..49f5068 --- /dev/null +++ b/deepfuze/processors/frame/modules/face_swapper.py @@ -0,0 +1,369 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import numpy +import onnx +import onnxruntime +from onnx import numpy_helper + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, logger, wording +from deepfuze.execution import has_execution_provider, apply_execution_provider_options +from deepfuze.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser +from deepfuze.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from deepfuze.face_helper import warp_face_by_face_landmark_5, paste_back +from deepfuze.face_store import get_reference_faces +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.normalizer import normalize_output_path +from deepfuze.thread_helper import thread_lock, conditional_thread_semaphore +from deepfuze.typing import Face, Embedding, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from deepfuze.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path +from deepfuze.download import conditional_download, is_download_done +from deepfuze.vision import read_image, read_static_image, read_static_images, write_image +from deepfuze.processors.frame.typings import FaceSwapperInputs +from deepfuze.processors.frame import globals as frame_processors_globals +from deepfuze.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +MODEL_INITIALIZER = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'blendswap_256': + { + 'type': 'blendswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/blendswap_256.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/blendswap_256.onnx'), + 'template': 'ffhq_512', + 'size': (256, 256), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'inswapper_128': + { + 'type': 'inswapper', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/inswapper_128.onnx'), + 'template': 'arcface_128_v2', + 'size': (128, 128), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'inswapper_128_fp16': + { + 'type': 'inswapper', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128_fp16.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/inswapper_128_fp16.onnx'), + 'template': 'arcface_128_v2', + 'size': (128, 128), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'simswap_256': + { + 'type': 'simswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_256.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/simswap_256.onnx'), + 'template': 'arcface_112_v1', + 'size': (256, 256), + 'mean': [ 0.485, 0.456, 0.406 ], + 'standard_deviation': [ 0.229, 0.224, 0.225 ] + }, + 'simswap_512_unofficial': + { + 'type': 'simswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_512_unofficial.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/simswap_512_unofficial.onnx'), + 'template': 'arcface_112_v1', + 'size': (512, 512), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'uniface_256': + { + 'type': 'uniface', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/uniface_256.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/uniface_256.onnx'), + 'template': 'ffhq_512', + 'size': (256, 256), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_model_initializer() -> Any: + global MODEL_INITIALIZER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if MODEL_INITIALIZER is None: + model_path = get_options('model').get('path') + model = onnx.load(model_path) + MODEL_INITIALIZER = numpy_helper.to_array(model.graph.initializer[-1]) + return MODEL_INITIALIZER + + +def clear_model_initializer() -> None: + global MODEL_INITIALIZER + + MODEL_INITIALIZER = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.face_swapper_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + if has_execution_provider('CoreMLExecutionProvider') or has_execution_provider('OpenVINOExecutionProvider'): + face_swapper_model_fallback = 'inswapper_128' + else: + face_swapper_model_fallback = 'inswapper_128_fp16' + program.add_argument('--face-swapper-model', help = wording.get('help.face_swapper_model'), default = config.get_str_value('frame_processors.face_swapper_model', face_swapper_model_fallback), choices = frame_processors_choices.face_swapper_models) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_swapper_model = args.face_swapper_model + if args.face_swapper_model == 'blendswap_256': + deepfuze.globals.face_recognizer_model = 'arcface_blendswap' + if args.face_swapper_model == 'inswapper_128' or args.face_swapper_model == 'inswapper_128_fp16': + deepfuze.globals.face_recognizer_model = 'arcface_inswapper' + if args.face_swapper_model == 'simswap_256' or args.face_swapper_model == 'simswap_512_unofficial': + deepfuze.globals.face_recognizer_model = 'arcface_simswap' + if args.face_swapper_model == 'uniface_256': + deepfuze.globals.face_recognizer_model = 'arcface_uniface' + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if not has_image(deepfuze.globals.source_paths): + logger.error(wording.get('select_image_source') + wording.get('exclamation_mark'), NAME) + return False + source_image_paths = filter_image_paths(deepfuze.globals.source_paths) + source_frames = read_static_images(source_image_paths) + for source_frame in source_frames: + if not get_one_face(source_frame): + logger.error(wording.get('no_source_face_detected') + wording.get('exclamation_mark'), NAME) + return False + if mode in [ 'output', 'preview' ] and not is_image(deepfuze.globals.target_path) and not is_video(deepfuze.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_model_initializer() + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + + +def swap_face(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + model_template = get_options('model').get('template') + model_size = get_options('model').get('size') + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) + crop_mask_list = [] + + if 'box' in deepfuze.globals.face_mask_types: + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], deepfuze.globals.face_mask_blur, deepfuze.globals.face_mask_padding) + crop_mask_list.append(box_mask) + if 'occlusion' in deepfuze.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_swap(source_face, crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) + if 'region' in deepfuze.globals.face_mask_types: + region_mask = create_region_mask(crop_vision_frame, deepfuze.globals.face_mask_regions) + crop_mask_list.append(region_mask) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + temp_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return temp_vision_frame + + +def apply_swap(source_face : Face, crop_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + model_type = get_options('model').get('type') + frame_processor_inputs = {} + + for frame_processor_input in frame_processor.get_inputs(): + if frame_processor_input.name == 'source': + if model_type == 'blendswap' or model_type == 'uniface': + frame_processor_inputs[frame_processor_input.name] = prepare_source_frame(source_face) + else: + frame_processor_inputs[frame_processor_input.name] = prepare_source_embedding(source_face) + if frame_processor_input.name == 'target': + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame + + +def prepare_source_frame(source_face : Face) -> VisionFrame: + model_type = get_options('model').get('type') + source_vision_frame = read_static_image(deepfuze.globals.source_paths[0]) + if model_type == 'blendswap': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'arcface_112_v2', (112, 112)) + if model_type == 'uniface': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'ffhq_512', (256, 256)) + source_vision_frame = source_vision_frame[:, :, ::-1] / 255.0 + source_vision_frame = source_vision_frame.transpose(2, 0, 1) + source_vision_frame = numpy.expand_dims(source_vision_frame, axis = 0).astype(numpy.float32) + return source_vision_frame + + +def prepare_source_embedding(source_face : Face) -> Embedding: + model_type = get_options('model').get('type') + if model_type == 'inswapper': + model_initializer = get_model_initializer() + source_embedding = source_face.embedding.reshape((1, -1)) + source_embedding = numpy.dot(source_embedding, model_initializer) / numpy.linalg.norm(source_embedding) + else: + source_embedding = source_face.normed_embedding.reshape(1, -1) + return source_embedding + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + model_mean = get_options('model').get('mean') + model_standard_deviation = get_options('model').get('standard_deviation') + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - model_mean) / model_standard_deviation + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0).astype(numpy.float32) + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame[:, :, ::-1] + return crop_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return swap_face(source_face, target_face, temp_vision_frame) + + +def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + source_face = inputs.get('source_face') + target_vision_frame = inputs.get('target_vision_frame') + + if deepfuze.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, deepfuze.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_frames = read_static_images(source_paths) + source_face = get_average_face(source_frames) + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_frames = read_static_images(source_paths) + source_face = get_average_face(source_frames) + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/modules/frame_colorizer.py b/deepfuze/processors/frame/modules/frame_colorizer.py new file mode 100644 index 0000000..1305746 --- /dev/null +++ b/deepfuze/processors/frame/modules/frame_colorizer.py @@ -0,0 +1,241 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, logger, wording +from deepfuze.face_analyser import clear_face_analyser +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.execution import apply_execution_provider_options +from deepfuze.normalizer import normalize_output_path +from deepfuze.thread_helper import thread_lock, thread_semaphore +from deepfuze.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from deepfuze.common_helper import create_metavar +from deepfuze.filesystem import is_file, resolve_relative_path, is_image, is_video +from deepfuze.download import conditional_download, is_download_done +from deepfuze.vision import read_image, read_static_image, write_image, unpack_resolution +from deepfuze.processors.frame.typings import FrameColorizerInputs +from deepfuze.processors.frame import globals as frame_processors_globals +from deepfuze.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'ddcolor': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/ddcolor.onnx') + }, + 'ddcolor_artistic': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor_artistic.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/ddcolor_artistic.onnx') + }, + 'deoldify': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/deoldify.onnx') + }, + 'deoldify_artistic': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify_artistic.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/deoldify_artistic.onnx') + }, + 'deoldify_stable': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify_stable.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/deoldify_stable.onnx') + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.frame_colorizer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--frame-colorizer-model', help = wording.get('help.frame_colorizer_model'), default = config.get_str_value('frame_processors.frame_colorizer_model', 'ddcolor'), choices = frame_processors_choices.frame_colorizer_models) + program.add_argument('--frame-colorizer-blend', help = wording.get('help.frame_colorizer_blend'), type = int, default = config.get_int_value('frame_processors.frame_colorizer_blend', '100'), choices = frame_processors_choices.frame_colorizer_blend_range, metavar = create_metavar(frame_processors_choices.frame_colorizer_blend_range)) + program.add_argument('--frame-colorizer-size', help = wording.get('help.frame_colorizer_size'), type = str, default = config.get_str_value('frame_processors.frame_colorizer_size', '256x256'), choices = frame_processors_choices.frame_colorizer_sizes) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.frame_colorizer_model = args.frame_colorizer_model + frame_processors_globals.frame_colorizer_blend = args.frame_colorizer_blend + frame_processors_globals.frame_colorizer_size = args.frame_colorizer_size + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(deepfuze.globals.target_path) and not is_video(deepfuze.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + + +def colorize_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + prepare_vision_frame = prepare_temp_frame(temp_vision_frame) + with thread_semaphore(): + color_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name: prepare_vision_frame + })[0][0] + color_vision_frame = merge_color_frame(temp_vision_frame, color_vision_frame) + color_vision_frame = blend_frame(temp_vision_frame, color_vision_frame) + return color_vision_frame + + +def prepare_temp_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + model_size = unpack_resolution(frame_processors_globals.frame_colorizer_size) + model_type = get_options('model').get('type') + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2GRAY) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_GRAY2RGB) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_RGB2LAB)[:, :, :1] + temp_vision_frame = numpy.concatenate((temp_vision_frame, numpy.zeros_like(temp_vision_frame), numpy.zeros_like(temp_vision_frame)), axis = -1) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_LAB2RGB) + temp_vision_frame = cv2.resize(temp_vision_frame, model_size) + temp_vision_frame = temp_vision_frame.transpose((2, 0, 1)) + temp_vision_frame = numpy.expand_dims(temp_vision_frame, axis = 0).astype(numpy.float32) + return temp_vision_frame + + +def merge_color_frame(temp_vision_frame : VisionFrame, color_vision_frame : VisionFrame) -> VisionFrame: + model_type = get_options('model').get('type') + color_vision_frame = color_vision_frame.transpose(1, 2, 0) + color_vision_frame = cv2.resize(color_vision_frame, (temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2LAB)[:, :, :1] + color_vision_frame = numpy.concatenate((temp_vision_frame, color_vision_frame), axis = -1) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + color_vision_frame = (color_vision_frame * 255.0).round().astype(numpy.uint8) + if model_type == 'deoldify': + temp_blue_channel, _, _ = cv2.split(temp_vision_frame) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2RGB).astype(numpy.uint8) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2LAB) + _, color_green_channel, color_red_channel = cv2.split(color_vision_frame) + color_vision_frame = cv2.merge((temp_blue_channel, color_green_channel, color_red_channel)) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + return color_vision_frame + + +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: + frame_colorizer_blend = 1 - (frame_processors_globals.frame_colorizer_blend / 100) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_colorizer_blend, paste_vision_frame, 1 - frame_colorizer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FrameColorizerInputs) -> VisionFrame: + target_vision_frame = inputs.get('target_vision_frame') + return colorize_frame(target_vision_frame) + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/modules/frame_enhancer.py b/deepfuze/processors/frame/modules/frame_enhancer.py new file mode 100644 index 0000000..40d2f3e --- /dev/null +++ b/deepfuze/processors/frame/modules/frame_enhancer.py @@ -0,0 +1,263 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, logger, wording +from deepfuze.face_analyser import clear_face_analyser +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.execution import apply_execution_provider_options +from deepfuze.normalizer import normalize_output_path +from deepfuze.thread_helper import thread_lock, conditional_thread_semaphore +from deepfuze.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from deepfuze.common_helper import create_metavar +from deepfuze.filesystem import is_file, resolve_relative_path, is_image, is_video +from deepfuze.download import conditional_download, is_download_done +from deepfuze.vision import read_image, read_static_image, write_image, merge_tile_frames, create_tile_frames +from deepfuze.processors.frame.typings import FrameEnhancerInputs +from deepfuze.processors.frame import globals as frame_processors_globals +from deepfuze.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'clear_reality_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/clear_reality_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/clear_reality_x4.onnx'), + 'size': (128, 8, 4), + 'scale': 4 + }, + 'lsdir_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/lsdir_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/lsdir_x4.onnx'), + 'size': (128, 8, 4), + 'scale': 4 + }, + 'nomos8k_sc_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/nomos8k_sc_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/nomos8k_sc_x4.onnx'), + 'size': (128, 8, 4), + 'scale': 4 + }, + 'real_esrgan_x2': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/real_esrgan_x2.onnx'), + 'size': (256, 16, 8), + 'scale': 2 + }, + 'real_esrgan_x2_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2_fp16.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/real_esrgan_x2_fp16.onnx'), + 'size': (256, 16, 8), + 'scale': 2 + }, + 'real_esrgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/real_esrgan_x4.onnx'), + 'size': (256, 16, 8), + 'scale': 4 + }, + 'real_esrgan_x4_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4_fp16.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/real_esrgan_x4_fp16.onnx'), + 'size': (256, 16, 8), + 'scale': 4 + }, + 'real_hatgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_hatgan_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/real_hatgan_x4.onnx'), + 'size': (256, 16, 8), + 'scale': 4 + }, + 'span_kendata_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/span_kendata_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/span_kendata_x4.onnx'), + 'size': (128, 8, 4), + 'scale': 4 + }, + 'ultra_sharp_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ultra_sharp_x4.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/ultra_sharp_x4.onnx'), + 'size': (128, 8, 4), + 'scale': 4 + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.frame_enhancer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--frame-enhancer-model', help = wording.get('help.frame_enhancer_model'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'span_kendata_x4'), choices = frame_processors_choices.frame_enhancer_models) + program.add_argument('--frame-enhancer-blend', help = wording.get('help.frame_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.frame_enhancer_model = args.frame_enhancer_model + frame_processors_globals.frame_enhancer_blend = args.frame_enhancer_blend + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(deepfuze.globals.target_path) and not is_video(deepfuze.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + + +def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + size = get_options('model').get('size') + scale = get_options('model').get('scale') + temp_height, temp_width = temp_vision_frame.shape[:2] + tile_vision_frames, pad_width, pad_height = create_tile_frames(temp_vision_frame, size) + + for index, tile_vision_frame in enumerate(tile_vision_frames): + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + tile_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name : prepare_tile_frame(tile_vision_frame) + })[0] + tile_vision_frames[index] = normalize_tile_frame(tile_vision_frame) + merge_vision_frame = merge_tile_frames(tile_vision_frames, temp_width * scale, temp_height * scale, pad_width * scale, pad_height * scale, (size[0] * scale, size[1] * scale, size[2] * scale)) + temp_vision_frame = blend_frame(temp_vision_frame, merge_vision_frame) + return temp_vision_frame + + +def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = numpy.expand_dims(vision_tile_frame[:, :, ::-1], axis = 0) + vision_tile_frame = vision_tile_frame.transpose(0, 3, 1, 2) + vision_tile_frame = vision_tile_frame.astype(numpy.float32) / 255 + return vision_tile_frame + + +def normalize_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = vision_tile_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 + vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:, :, ::-1] + return vision_tile_frame + + +def blend_frame(temp_vision_frame : VisionFrame, merge_vision_frame : VisionFrame) -> VisionFrame: + frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) + temp_vision_frame = cv2.resize(temp_vision_frame, (merge_vision_frame.shape[1], merge_vision_frame.shape[0])) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, merge_vision_frame, 1 - frame_enhancer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: + target_vision_frame = inputs.get('target_vision_frame') + return enhance_frame(target_vision_frame) + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/modules/lip_syncer.py b/deepfuze/processors/frame/modules/lip_syncer.py new file mode 100755 index 0000000..599eb49 --- /dev/null +++ b/deepfuze/processors/frame/modules/lip_syncer.py @@ -0,0 +1,260 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import deepfuze.globals +import deepfuze.processors.frame.core as frame_processors +from deepfuze import config, process_manager, logger, wording +from deepfuze.execution import apply_execution_provider_options +from deepfuze.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from deepfuze.face_masker import create_static_box_mask, create_occlusion_mask, create_mouth_mask, clear_face_occluder, clear_face_parser +from deepfuze.face_helper import warp_face_by_face_landmark_5, warp_face_by_bounding_box, paste_back, create_bounding_box_from_face_landmark_68 +from deepfuze.face_store import get_reference_faces +from deepfuze.content_analyser import clear_content_analyser +from deepfuze.normalizer import normalize_output_path +from deepfuze.thread_helper import thread_lock, conditional_thread_semaphore +from deepfuze.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload +from deepfuze.filesystem import is_file, has_audio, resolve_relative_path +from deepfuze.download import conditional_download, is_download_done +from deepfuze.audio import read_static_voice, get_voice_frame, create_empty_audio_frame +from deepfuze.filesystem import is_image, is_video, filter_audio_paths +from deepfuze.common_helper import get_first +from deepfuze.vision import read_image, read_static_image, write_image, restrict_video_fps +from deepfuze.processors.frame.typings import LipSyncerInputs +from deepfuze.voice_extractor import clear_voice_extractor +from deepfuze.processors.frame import globals as frame_processors_globals +from deepfuze.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'wav2lip_gan': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/wav2lip_gan.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/wav2lip_gan.onnx') + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.lip_syncer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--lip-syncer-model', help = wording.get('help.lip_syncer_model'), default = config.get_str_value('frame_processors.lip_syncer_model', 'wav2lip_gan'), choices = frame_processors_choices.lip_syncer_models) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.lip_syncer_model = args.lip_syncer_model + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not deepfuze.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if not has_audio(deepfuze.globals.source_paths): + logger.error(wording.get('select_audio_source') + wording.get('exclamation_mark'), NAME) + return False + if mode in [ 'output', 'preview' ] and not is_image(deepfuze.globals.target_path) and not is_video(deepfuze.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + read_static_voice.cache_clear() + if deepfuze.globals.video_memory_strategy == 'strict' or deepfuze.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if deepfuze.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + clear_voice_extractor() + + +def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + crop_mask_list = [] + temp_audio_frame = prepare_audio_frame(temp_audio_frame) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'ffhq_512', (512, 512)) + face_landmark_68 = cv2.transform(target_face.landmarks.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + bounding_box = create_bounding_box_from_face_landmark_68(face_landmark_68) + bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 + mouth_mask = create_mouth_mask(face_landmark_68) + crop_mask_list.append(mouth_mask) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], deepfuze.globals.face_mask_blur, deepfuze.globals.face_mask_padding) + crop_mask_list.append(box_mask) + + if 'occlusion' in deepfuze.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + close_vision_frame, close_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, (96, 96)) + close_vision_frame = prepare_crop_frame(close_vision_frame) + with conditional_thread_semaphore(deepfuze.globals.execution_providers): + close_vision_frame = frame_processor.run(None, + { + 'source': temp_audio_frame, + 'target': close_vision_frame + })[0] + crop_vision_frame = normalize_crop_frame(close_vision_frame) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, cv2.invertAffineTransform(close_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) + crop_mask = numpy.minimum.reduce(crop_mask_list) + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return paste_vision_frame + + +def prepare_audio_frame(temp_audio_frame : AudioFrame) -> AudioFrame: + temp_audio_frame = numpy.maximum(numpy.exp(-5 * numpy.log(10)), temp_audio_frame) + temp_audio_frame = numpy.log10(temp_audio_frame) * 1.6 + 3.2 + temp_audio_frame = temp_audio_frame.clip(-4, 4).astype(numpy.float32) + temp_audio_frame = numpy.expand_dims(temp_audio_frame, axis = (0, 1)) + return temp_audio_frame + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + prepare_vision_frame = crop_vision_frame.copy() + prepare_vision_frame[:, 48:] = 0 + crop_vision_frame = numpy.concatenate((prepare_vision_frame, crop_vision_frame), axis = 3) + crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0 + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[0].transpose(1, 2, 0) + crop_vision_frame = crop_vision_frame.clip(0, 1) * 255 + crop_vision_frame = crop_vision_frame.astype(numpy.uint8) + return crop_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : LipSyncerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + source_audio_frame = inputs.get('source_audio_frame') + target_vision_frame = inputs.get('target_vision_frame') + + if deepfuze.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if deepfuze.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, deepfuze.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(source_paths)) + temp_video_fps = restrict_video_fps(deepfuze.globals.target_path, deepfuze.globals.output_video_fps) + + for queue_payload in process_manager.manage(queue_payloads): + frame_number = queue_payload['frame_number'] + target_vision_path = queue_payload['frame_path'] + source_audio_frame = get_voice_frame(source_audio_path, temp_video_fps, frame_number) + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_audio_frame = create_empty_audio_frame() + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + source_audio_paths = filter_audio_paths(deepfuze.globals.source_paths) + temp_video_fps = restrict_video_fps(deepfuze.globals.target_path, deepfuze.globals.output_video_fps) + for source_audio_path in source_audio_paths: + read_static_voice(source_audio_path, temp_video_fps) + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/deepfuze/processors/frame/typings.py b/deepfuze/processors/frame/typings.py new file mode 100644 index 0000000..b13c67b --- /dev/null +++ b/deepfuze/processors/frame/typings.py @@ -0,0 +1,41 @@ +from typing import Literal, TypedDict + +from deepfuze.typing import Face, FaceSet, AudioFrame, VisionFrame + +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender'] +FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus'] +FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256'] +FrameColorizerModel = Literal['ddcolor', 'ddcolor_artistic', 'deoldify', 'deoldify_artistic', 'deoldify_stable'] +FrameEnhancerModel = Literal['clear_reality_x4', 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4', 'ultra_sharp_x4'] +LipSyncerModel = Literal['wav2lip_gan'] + +FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceEnhancerInputs = TypedDict('FaceEnhancerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceSwapperInputs = TypedDict('FaceSwapperInputs', +{ + 'reference_faces' : FaceSet, + 'source_face' : Face, + 'target_vision_frame' : VisionFrame +}) +FrameColorizerInputs = TypedDict('FrameColorizerInputs', +{ + 'target_vision_frame' : VisionFrame +}) +FrameEnhancerInputs = TypedDict('FrameEnhancerInputs', +{ + 'target_vision_frame' : VisionFrame +}) +LipSyncerInputs = TypedDict('LipSyncerInputs', +{ + 'reference_faces' : FaceSet, + 'source_audio_frame' : AudioFrame, + 'target_vision_frame' : VisionFrame +}) diff --git a/deepfuze/statistics.py b/deepfuze/statistics.py new file mode 100644 index 0000000..90dbb35 --- /dev/null +++ b/deepfuze/statistics.py @@ -0,0 +1,51 @@ +from typing import Any, Dict +import numpy + +import deepfuze.globals +from deepfuze.face_store import FACE_STORE +from deepfuze.typing import FaceSet +from deepfuze import logger + + +def create_statistics(static_faces : FaceSet) -> Dict[str, Any]: + face_detector_score_list = [] + face_landmarker_score_list = [] + statistics =\ + { + 'min_face_detector_score': 0, + 'min_face_landmarker_score': 0, + 'max_face_detector_score': 0, + 'max_face_landmarker_score': 0, + 'average_face_detector_score': 0, + 'average_face_landmarker_score': 0, + 'total_face_landmark_5_fallbacks': 0, + 'total_frames_with_faces': 0, + 'total_faces': 0 + } + + for faces in static_faces.values(): + statistics['total_frames_with_faces'] = statistics.get('total_frames_with_faces') + 1 + for face in faces: + statistics['total_faces'] = statistics.get('total_faces') + 1 + face_detector_score_list.append(face.scores.get('detector')) + face_landmarker_score_list.append(face.scores.get('landmarker')) + if numpy.array_equal(face.landmarks.get('5'), face.landmarks.get('5/68')): + statistics['total_face_landmark_5_fallbacks'] = statistics.get('total_face_landmark_5_fallbacks') + 1 + + if face_detector_score_list: + statistics['min_face_detector_score'] = round(min(face_detector_score_list), 2) + statistics['max_face_detector_score'] = round(max(face_detector_score_list), 2) + statistics['average_face_detector_score'] = round(numpy.mean(face_detector_score_list), 2) + if face_landmarker_score_list: + statistics['min_face_landmarker_score'] = round(min(face_landmarker_score_list), 2) + statistics['max_face_landmarker_score'] = round(max(face_landmarker_score_list), 2) + statistics['average_face_landmarker_score'] = round(numpy.mean(face_landmarker_score_list), 2) + return statistics + + +def conditional_log_statistics() -> None: + if deepfuze.globals.log_level == 'debug': + statistics = create_statistics(FACE_STORE.get('static_faces')) + + for name, value in statistics.items(): + logger.debug(str(name) + ': ' + str(value), __name__.upper()) diff --git a/deepfuze/thread_helper.py b/deepfuze/thread_helper.py new file mode 100644 index 0000000..c08c6f1 --- /dev/null +++ b/deepfuze/thread_helper.py @@ -0,0 +1,21 @@ +from typing import List, Union, ContextManager +import threading +from contextlib import nullcontext + +THREAD_LOCK : threading.Lock = threading.Lock() +THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() +NULL_CONTEXT : ContextManager[None] = nullcontext() + + +def thread_lock() -> threading.Lock: + return THREAD_LOCK + + +def thread_semaphore() -> threading.Semaphore: + return THREAD_SEMAPHORE + + +def conditional_thread_semaphore(execution_providers : List[str]) -> Union[threading.Semaphore, ContextManager[None]]: + if 'DmlExecutionProvider' in execution_providers: + return THREAD_SEMAPHORE + return NULL_CONTEXT diff --git a/deepfuze/typing.py b/deepfuze/typing.py new file mode 100755 index 0000000..bc05f80 --- /dev/null +++ b/deepfuze/typing.py @@ -0,0 +1,122 @@ +from typing import Any, Literal, Callable, List, Tuple, Dict, TypedDict +from collections import namedtuple +import numpy + +BoundingBox = numpy.ndarray[Any, Any] +FaceLandmark5 = numpy.ndarray[Any, Any] +FaceLandmark68 = numpy.ndarray[Any, Any] +FaceLandmarkSet = TypedDict('FaceLandmarkSet', +{ + '5' : FaceLandmark5, #type:ignore[valid-type] + '5/68' : FaceLandmark5, #type:ignore[valid-type] + '68' : FaceLandmark68, #type:ignore[valid-type] + '68/5' : FaceLandmark68 #type:ignore[valid-type] +}) +Score = float +FaceScoreSet = TypedDict('FaceScoreSet', +{ + 'detector' : Score, + 'landmarker' : Score +}) +Embedding = numpy.ndarray[Any, Any] +Face = namedtuple('Face', +[ + 'bounding_box', + 'landmarks', + 'scores', + 'embedding', + 'normed_embedding', + 'gender', + 'age' +]) +FaceSet = Dict[str, List[Face]] +FaceStore = TypedDict('FaceStore', +{ + 'static_faces' : FaceSet, + 'reference_faces': FaceSet +}) + +VisionFrame = numpy.ndarray[Any, Any] +Mask = numpy.ndarray[Any, Any] +Matrix = numpy.ndarray[Any, Any] +Translation = numpy.ndarray[Any, Any] + +AudioBuffer = bytes +Audio = numpy.ndarray[Any, Any] +AudioChunk = numpy.ndarray[Any, Any] +AudioFrame = numpy.ndarray[Any, Any] +Spectrogram = numpy.ndarray[Any, Any] +MelFilterBank = numpy.ndarray[Any, Any] + +Fps = float +Padding = Tuple[int, int, int, int] +Resolution = Tuple[int, int] + +ProcessState = Literal['checking', 'processing', 'stopping', 'pending'] +QueuePayload = TypedDict('QueuePayload', +{ + 'frame_number' : int, + 'frame_path' : str +}) +UpdateProgress = Callable[[int], None] +ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProgress], None] + +WarpTemplate = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] +WarpTemplateSet = Dict[WarpTemplate, numpy.ndarray[Any, Any]] +ProcessMode = Literal['output', 'preview', 'stream'] + +LogLevel = Literal['error', 'warn', 'info', 'debug'] +VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant'] +FaceSelectorMode = Literal['many', 'one', 'reference'] +FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] +FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior'] +FaceAnalyserGender = Literal['female', 'male'] +FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yoloface', 'yunet'] +FaceDetectorTweak = Literal['low-luminance', 'high-luminance'] +FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap', 'arcface_uniface'] +FaceMaskType = Literal['box', 'occlusion', 'region'] +FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip'] +TempFrameFormat = Literal['jpg', 'png', 'bmp'] +OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf'] +OutputVideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'] + +ModelValue = Dict[str, Any] +ModelSet = Dict[str, ModelValue] +OptionsWithModel = TypedDict('OptionsWithModel', +{ + 'model' : ModelValue +}) + +ValueAndUnit = TypedDict('ValueAndUnit', +{ + 'value' : str, + 'unit' : str +}) +ExecutionDeviceFramework = TypedDict('ExecutionDeviceFramework', +{ + 'name' : str, + 'version' : str +}) +ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct', +{ + 'vendor' : str, + 'name' : str +}) +ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory', +{ + 'total' : ValueAndUnit, + 'free' : ValueAndUnit +}) +ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization', +{ + 'gpu' : ValueAndUnit, + 'memory' : ValueAndUnit +}) +ExecutionDevice = TypedDict('ExecutionDevice', +{ + 'driver_version' : str, + 'framework' : ExecutionDeviceFramework, + 'product' : ExecutionDeviceProduct, + 'video_memory' : ExecutionDeviceVideoMemory, + 'utilization' : ExecutionDeviceUtilization +}) diff --git a/deepfuze/uis/__init__.py b/deepfuze/uis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/uis/__pycache__/__init__.cpython-310.pyc b/deepfuze/uis/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..f7bd9d4 Binary files /dev/null and b/deepfuze/uis/__pycache__/__init__.cpython-310.pyc differ diff --git a/deepfuze/uis/__pycache__/__init__.cpython-311.pyc b/deepfuze/uis/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..bdc550c Binary files /dev/null and b/deepfuze/uis/__pycache__/__init__.cpython-311.pyc differ diff --git a/deepfuze/uis/__pycache__/choices.cpython-310.pyc b/deepfuze/uis/__pycache__/choices.cpython-310.pyc new file mode 100644 index 0000000..99acf2f Binary files /dev/null and b/deepfuze/uis/__pycache__/choices.cpython-310.pyc differ diff --git a/deepfuze/uis/__pycache__/choices.cpython-311.pyc b/deepfuze/uis/__pycache__/choices.cpython-311.pyc new file mode 100644 index 0000000..976d9e7 Binary files /dev/null and b/deepfuze/uis/__pycache__/choices.cpython-311.pyc differ diff --git a/deepfuze/uis/__pycache__/core.cpython-310.pyc b/deepfuze/uis/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000..d1f87c8 Binary files /dev/null and b/deepfuze/uis/__pycache__/core.cpython-310.pyc differ diff --git a/deepfuze/uis/__pycache__/core.cpython-311.pyc b/deepfuze/uis/__pycache__/core.cpython-311.pyc new file mode 100644 index 0000000..da6f763 Binary files /dev/null and b/deepfuze/uis/__pycache__/core.cpython-311.pyc differ diff --git a/deepfuze/uis/__pycache__/overrides.cpython-310.pyc b/deepfuze/uis/__pycache__/overrides.cpython-310.pyc new file mode 100644 index 0000000..7046b58 Binary files /dev/null and b/deepfuze/uis/__pycache__/overrides.cpython-310.pyc differ diff --git a/deepfuze/uis/__pycache__/overrides.cpython-311.pyc b/deepfuze/uis/__pycache__/overrides.cpython-311.pyc new file mode 100644 index 0000000..0f22ea6 Binary files /dev/null and b/deepfuze/uis/__pycache__/overrides.cpython-311.pyc differ diff --git a/deepfuze/uis/__pycache__/typing.cpython-310.pyc b/deepfuze/uis/__pycache__/typing.cpython-310.pyc new file mode 100644 index 0000000..b10302b Binary files /dev/null and b/deepfuze/uis/__pycache__/typing.cpython-310.pyc differ diff --git a/deepfuze/uis/__pycache__/typing.cpython-311.pyc b/deepfuze/uis/__pycache__/typing.cpython-311.pyc new file mode 100644 index 0000000..33ad37f Binary files /dev/null and b/deepfuze/uis/__pycache__/typing.cpython-311.pyc differ diff --git a/deepfuze/uis/assets/fixes.css b/deepfuze/uis/assets/fixes.css new file mode 100644 index 0000000..f65a7cf --- /dev/null +++ b/deepfuze/uis/assets/fixes.css @@ -0,0 +1,7 @@ +:root:root:root button:not([class]) +{ + border-radius: 0.375rem; + float: left; + overflow: hidden; + width: 100%; +} diff --git a/deepfuze/uis/assets/overrides.css b/deepfuze/uis/assets/overrides.css new file mode 100644 index 0000000..744ed3b --- /dev/null +++ b/deepfuze/uis/assets/overrides.css @@ -0,0 +1,58 @@ +:root:root:root input[type="number"] +{ + max-width: 6rem; +} + +:root:root:root [type="checkbox"], +:root:root:root [type="radio"] +{ + border-radius: 50%; + height: 1.125rem; + width: 1.125rem; +} + +:root:root:root input[type="range"] +{ + height: 0.5rem; +} + +:root:root:root input[type="range"]::-moz-range-thumb, +:root:root:root input[type="range"]::-webkit-slider-thumb +{ + background: var(--neutral-300); + border: unset; + border-radius: 50%; + height: 1.125rem; + width: 1.125rem; +} + +:root:root:root input[type="range"]::-webkit-slider-thumb +{ + margin-top: 0.375rem; +} + +:root:root:root .grid-wrap.fixed-height +{ + min-height: unset; +} + +:root:root:root .grid-container +{ + grid-auto-rows: minmax(5em, 1fr); + grid-template-columns: repeat(var(--grid-cols), minmax(5em, 1fr)); + grid-template-rows: repeat(var(--grid-rows), minmax(5em, 1fr)); +} + +:root:root:root .tab-nav > button +{ + border: unset; + border-bottom: 0.125rem solid transparent; + font-size: 1.125em; + margin: 0.5rem 1rem; + padding: 0; +} + +:root:root:root .tab-nav > button.selected +{ + border-bottom: 0.125rem solid; +} diff --git a/deepfuze/uis/choices.py b/deepfuze/uis/choices.py new file mode 100644 index 0000000..6d49d7e --- /dev/null +++ b/deepfuze/uis/choices.py @@ -0,0 +1,7 @@ +from typing import List + +from deepfuze.uis.typing import WebcamMode + +common_options : List[str] = [ 'keep-temp', 'skip-audio', 'skip-download' ] +webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] +webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] diff --git a/deepfuze/uis/components/__init__.py b/deepfuze/uis/components/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deepfuze/uis/components/__pycache__/__init__.cpython-310.pyc b/deepfuze/uis/components/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..45a66f9 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/__init__.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/__init__.cpython-311.pyc b/deepfuze/uis/components/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..61f6ad6 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/__init__.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/about.cpython-310.pyc b/deepfuze/uis/components/__pycache__/about.cpython-310.pyc new file mode 100644 index 0000000..94ff507 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/about.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/about.cpython-311.pyc b/deepfuze/uis/components/__pycache__/about.cpython-311.pyc new file mode 100644 index 0000000..4fd813d Binary files /dev/null and b/deepfuze/uis/components/__pycache__/about.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/common_options.cpython-310.pyc b/deepfuze/uis/components/__pycache__/common_options.cpython-310.pyc new file mode 100644 index 0000000..67fdd6f Binary files /dev/null and b/deepfuze/uis/components/__pycache__/common_options.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/common_options.cpython-311.pyc b/deepfuze/uis/components/__pycache__/common_options.cpython-311.pyc new file mode 100644 index 0000000..8c33a6d Binary files /dev/null and b/deepfuze/uis/components/__pycache__/common_options.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution.cpython-310.pyc b/deepfuze/uis/components/__pycache__/execution.cpython-310.pyc new file mode 100644 index 0000000..cb85b5c Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution.cpython-311.pyc b/deepfuze/uis/components/__pycache__/execution.cpython-311.pyc new file mode 100644 index 0000000..1320124 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-310.pyc b/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-310.pyc new file mode 100644 index 0000000..3bb0a71 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-311.pyc b/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-311.pyc new file mode 100644 index 0000000..d3bc1d5 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution_queue_count.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-310.pyc b/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-310.pyc new file mode 100644 index 0000000..19047c4 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-311.pyc b/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-311.pyc new file mode 100644 index 0000000..a7ba094 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/execution_thread_count.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_analyser.cpython-310.pyc b/deepfuze/uis/components/__pycache__/face_analyser.cpython-310.pyc new file mode 100644 index 0000000..0efdcd9 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_analyser.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_analyser.cpython-311.pyc b/deepfuze/uis/components/__pycache__/face_analyser.cpython-311.pyc new file mode 100644 index 0000000..2740206 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_analyser.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_masker.cpython-310.pyc b/deepfuze/uis/components/__pycache__/face_masker.cpython-310.pyc new file mode 100644 index 0000000..0e67628 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_masker.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_masker.cpython-311.pyc b/deepfuze/uis/components/__pycache__/face_masker.cpython-311.pyc new file mode 100644 index 0000000..b73f558 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_masker.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_selector.cpython-310.pyc b/deepfuze/uis/components/__pycache__/face_selector.cpython-310.pyc new file mode 100644 index 0000000..76f1330 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_selector.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/face_selector.cpython-311.pyc b/deepfuze/uis/components/__pycache__/face_selector.cpython-311.pyc new file mode 100644 index 0000000..0956303 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/face_selector.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/frame_processors.cpython-310.pyc b/deepfuze/uis/components/__pycache__/frame_processors.cpython-310.pyc new file mode 100644 index 0000000..675ff8f Binary files /dev/null and b/deepfuze/uis/components/__pycache__/frame_processors.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/frame_processors.cpython-311.pyc b/deepfuze/uis/components/__pycache__/frame_processors.cpython-311.pyc new file mode 100644 index 0000000..bdcb5af Binary files /dev/null and b/deepfuze/uis/components/__pycache__/frame_processors.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-310.pyc b/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-310.pyc new file mode 100644 index 0000000..b295b53 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-311.pyc b/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-311.pyc new file mode 100644 index 0000000..ac099f7 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/frame_processors_options.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/memory.cpython-310.pyc b/deepfuze/uis/components/__pycache__/memory.cpython-310.pyc new file mode 100644 index 0000000..4a27a83 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/memory.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/memory.cpython-311.pyc b/deepfuze/uis/components/__pycache__/memory.cpython-311.pyc new file mode 100644 index 0000000..912959b Binary files /dev/null and b/deepfuze/uis/components/__pycache__/memory.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/output.cpython-310.pyc b/deepfuze/uis/components/__pycache__/output.cpython-310.pyc new file mode 100644 index 0000000..8634cf4 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/output.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/output.cpython-311.pyc b/deepfuze/uis/components/__pycache__/output.cpython-311.pyc new file mode 100644 index 0000000..fbde086 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/output.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/output_options.cpython-310.pyc b/deepfuze/uis/components/__pycache__/output_options.cpython-310.pyc new file mode 100644 index 0000000..bd2651e Binary files /dev/null and b/deepfuze/uis/components/__pycache__/output_options.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/output_options.cpython-311.pyc b/deepfuze/uis/components/__pycache__/output_options.cpython-311.pyc new file mode 100644 index 0000000..9c04a02 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/output_options.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/preview.cpython-310.pyc b/deepfuze/uis/components/__pycache__/preview.cpython-310.pyc new file mode 100644 index 0000000..64e5602 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/preview.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/preview.cpython-311.pyc b/deepfuze/uis/components/__pycache__/preview.cpython-311.pyc new file mode 100644 index 0000000..1e877c3 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/preview.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/source.cpython-310.pyc b/deepfuze/uis/components/__pycache__/source.cpython-310.pyc new file mode 100644 index 0000000..d1bc5c3 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/source.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/source.cpython-311.pyc b/deepfuze/uis/components/__pycache__/source.cpython-311.pyc new file mode 100644 index 0000000..22162eb Binary files /dev/null and b/deepfuze/uis/components/__pycache__/source.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/target.cpython-310.pyc b/deepfuze/uis/components/__pycache__/target.cpython-310.pyc new file mode 100644 index 0000000..543be35 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/target.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/target.cpython-311.pyc b/deepfuze/uis/components/__pycache__/target.cpython-311.pyc new file mode 100644 index 0000000..4df9984 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/target.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/temp_frame.cpython-310.pyc b/deepfuze/uis/components/__pycache__/temp_frame.cpython-310.pyc new file mode 100644 index 0000000..d30fb0a Binary files /dev/null and b/deepfuze/uis/components/__pycache__/temp_frame.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/temp_frame.cpython-311.pyc b/deepfuze/uis/components/__pycache__/temp_frame.cpython-311.pyc new file mode 100644 index 0000000..cca91e3 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/temp_frame.cpython-311.pyc differ diff --git a/deepfuze/uis/components/__pycache__/trim_frame.cpython-310.pyc b/deepfuze/uis/components/__pycache__/trim_frame.cpython-310.pyc new file mode 100644 index 0000000..4973b1e Binary files /dev/null and b/deepfuze/uis/components/__pycache__/trim_frame.cpython-310.pyc differ diff --git a/deepfuze/uis/components/__pycache__/trim_frame.cpython-311.pyc b/deepfuze/uis/components/__pycache__/trim_frame.cpython-311.pyc new file mode 100644 index 0000000..92af250 Binary files /dev/null and b/deepfuze/uis/components/__pycache__/trim_frame.cpython-311.pyc differ diff --git a/deepfuze/uis/components/about.py b/deepfuze/uis/components/about.py new file mode 100644 index 0000000..0bd55be --- /dev/null +++ b/deepfuze/uis/components/about.py @@ -0,0 +1,23 @@ +from typing import Optional +import gradio + +from deepfuze import metadata, wording + +ABOUT_BUTTON : Optional[gradio.HTML] = None +DONATE_BUTTON : Optional[gradio.HTML] = None + + +def render() -> None: + global ABOUT_BUTTON + global DONATE_BUTTON + + ABOUT_BUTTON = gradio.Button( + value = metadata.get('name') + ' ' + metadata.get('version'), + variant = 'primary', + link = metadata.get('url') + ) + DONATE_BUTTON = gradio.Button( + value = wording.get('uis.donate_button'), + link = 'https://donate.deepfuze.io', + size = 'sm' + ) diff --git a/deepfuze/uis/components/benchmark.py b/deepfuze/uis/components/benchmark.py new file mode 100644 index 0000000..3a27731 --- /dev/null +++ b/deepfuze/uis/components/benchmark.py @@ -0,0 +1,140 @@ +from typing import Any, Optional, List, Dict, Generator +from time import sleep, perf_counter +import tempfile +import statistics +import gradio + +import deepfuze.globals +from deepfuze import process_manager, wording +from deepfuze.face_store import clear_static_faces +from deepfuze.processors.frame.core import get_frame_processors_modules +from deepfuze.vision import count_video_frame_total, detect_video_resolution, detect_video_fps, pack_resolution +from deepfuze.core import conditional_process +from deepfuze.memory import limit_system_memory +from deepfuze.filesystem import clear_temp +from deepfuze.uis.core import get_ui_component + +BENCHMARK_RESULTS_DATAFRAME : Optional[gradio.Dataframe] = None +BENCHMARK_START_BUTTON : Optional[gradio.Button] = None +BENCHMARK_CLEAR_BUTTON : Optional[gradio.Button] = None +BENCHMARKS : Dict[str, str] =\ +{ + '240p': '../../models/facefusion/examples/target-240p.mp4', + '360p': '../../models/facefusion/examples/target-360p.mp4', + '540p': '../../models/facefusion/examples/target-540p.mp4', + '720p': '../../models/facefusion/examples/target-720p.mp4', + '1080p': '../../models/facefusion/examples/target-1080p.mp4', + '1440p': '../../models/facefusion/examples/target-1440p.mp4', + '2160p': '../../models/facefusion/examples/target-2160p.mp4' +} + + +def render() -> None: + global BENCHMARK_RESULTS_DATAFRAME + global BENCHMARK_START_BUTTON + global BENCHMARK_CLEAR_BUTTON + + BENCHMARK_RESULTS_DATAFRAME = gradio.Dataframe( + label = wording.get('uis.benchmark_results_dataframe'), + headers = + [ + 'target_path', + 'benchmark_cycles', + 'average_run', + 'fastest_run', + 'slowest_run', + 'relative_fps' + ], + datatype = + [ + 'str', + 'number', + 'number', + 'number', + 'number', + 'number' + ] + ) + BENCHMARK_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + BENCHMARK_CLEAR_BUTTON = gradio.Button( + value = wording.get('uis.clear_button'), + size = 'sm' + ) + + +def listen() -> None: + benchmark_runs_checkbox_group = get_ui_component('benchmark_runs_checkbox_group') + benchmark_cycles_slider = get_ui_component('benchmark_cycles_slider') + + if benchmark_runs_checkbox_group and benchmark_cycles_slider: + BENCHMARK_START_BUTTON.click(start, inputs = [ benchmark_runs_checkbox_group, benchmark_cycles_slider ], outputs = BENCHMARK_RESULTS_DATAFRAME) + BENCHMARK_CLEAR_BUTTON.click(clear, outputs = BENCHMARK_RESULTS_DATAFRAME) + + +def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]: + deepfuze.globals.source_paths = [ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ] + deepfuze.globals.output_path = tempfile.gettempdir() + deepfuze.globals.face_landmarker_score = 0 + deepfuze.globals.temp_frame_format = 'bmp' + deepfuze.globals.output_video_preset = 'ultrafast' + benchmark_results = [] + target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ] + + if target_paths: + pre_process() + for target_path in target_paths: + deepfuze.globals.target_path = target_path + benchmark_results.append(benchmark(benchmark_cycles)) + yield benchmark_results + post_process() + + +def pre_process() -> None: + if deepfuze.globals.system_memory_limit > 0: + limit_system_memory(deepfuze.globals.system_memory_limit) + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + frame_processor_module.get_frame_processor() + + +def post_process() -> None: + clear_static_faces() + + +def benchmark(benchmark_cycles : int) -> List[Any]: + process_times = [] + video_frame_total = count_video_frame_total(deepfuze.globals.target_path) + output_video_resolution = detect_video_resolution(deepfuze.globals.target_path) + deepfuze.globals.output_video_resolution = pack_resolution(output_video_resolution) + deepfuze.globals.output_video_fps = detect_video_fps(deepfuze.globals.target_path) + + for index in range(benchmark_cycles): + start_time = perf_counter() + conditional_process() + end_time = perf_counter() + process_times.append(end_time - start_time) + average_run = round(statistics.mean(process_times), 2) + fastest_run = round(min(process_times), 2) + slowest_run = round(max(process_times), 2) + relative_fps = round(video_frame_total * benchmark_cycles / sum(process_times), 2) + + return\ + [ + deepfuze.globals.target_path, + benchmark_cycles, + average_run, + fastest_run, + slowest_run, + relative_fps + ] + + +def clear() -> gradio.Dataframe: + while process_manager.is_processing(): + sleep(0.5) + if deepfuze.globals.target_path: + clear_temp(deepfuze.globals.target_path) + return gradio.Dataframe(value = None) diff --git a/deepfuze/uis/components/benchmark_options.py b/deepfuze/uis/components/benchmark_options.py new file mode 100644 index 0000000..46302a0 --- /dev/null +++ b/deepfuze/uis/components/benchmark_options.py @@ -0,0 +1,29 @@ +from typing import Optional +import gradio + +from deepfuze import wording +from deepfuze.uis.core import register_ui_component +from deepfuze.uis.components.benchmark import BENCHMARKS + +BENCHMARK_RUNS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +BENCHMARK_CYCLES_SLIDER : Optional[gradio.Button] = None + + +def render() -> None: + global BENCHMARK_RUNS_CHECKBOX_GROUP + global BENCHMARK_CYCLES_SLIDER + + BENCHMARK_RUNS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.benchmark_runs_checkbox_group'), + value = list(BENCHMARKS.keys()), + choices = list(BENCHMARKS.keys()) + ) + BENCHMARK_CYCLES_SLIDER = gradio.Slider( + label = wording.get('uis.benchmark_cycles_slider'), + value = 5, + step = 1, + minimum = 1, + maximum = 10 + ) + register_ui_component('benchmark_runs_checkbox_group', BENCHMARK_RUNS_CHECKBOX_GROUP) + register_ui_component('benchmark_cycles_slider', BENCHMARK_CYCLES_SLIDER) diff --git a/deepfuze/uis/components/common_options.py b/deepfuze/uis/components/common_options.py new file mode 100644 index 0000000..8227d5a --- /dev/null +++ b/deepfuze/uis/components/common_options.py @@ -0,0 +1,35 @@ +from typing import Optional, List +import gradio + +import deepfuze.globals +from deepfuze import wording +from deepfuze.uis import choices as uis_choices + +COMMON_OPTIONS_CHECKBOX_GROUP : Optional[gradio.Checkboxgroup] = None + + +def render() -> None: + global COMMON_OPTIONS_CHECKBOX_GROUP + + value = [] + if deepfuze.globals.keep_temp: + value.append('keep-temp') + if deepfuze.globals.skip_audio: + value.append('skip-audio') + if deepfuze.globals.skip_download: + value.append('skip-download') + COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup( + label = wording.get('uis.common_options_checkbox_group'), + choices = uis_choices.common_options, + value = value + ) + + +def listen() -> None: + COMMON_OPTIONS_CHECKBOX_GROUP.change(update, inputs = COMMON_OPTIONS_CHECKBOX_GROUP) + + +def update(common_options : List[str]) -> None: + deepfuze.globals.keep_temp = 'keep-temp' in common_options + deepfuze.globals.skip_audio = 'skip-audio' in common_options + deepfuze.globals.skip_download = 'skip-download' in common_options diff --git a/deepfuze/uis/components/execution.py b/deepfuze/uis/components/execution.py new file mode 100644 index 0000000..583a1da --- /dev/null +++ b/deepfuze/uis/components/execution.py @@ -0,0 +1,33 @@ +from typing import List, Optional +import gradio +import onnxruntime + +import deepfuze.globals +from deepfuze import wording +from deepfuze.face_analyser import clear_face_analyser +from deepfuze.processors.frame.core import clear_frame_processors_modules +from deepfuze.execution import encode_execution_providers, decode_execution_providers + +EXECUTION_PROVIDERS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global EXECUTION_PROVIDERS_CHECKBOX_GROUP + + EXECUTION_PROVIDERS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.execution_providers_checkbox_group'), + choices = encode_execution_providers(onnxruntime.get_available_providers()), + value = encode_execution_providers(deepfuze.globals.execution_providers) + ) + + +def listen() -> None: + EXECUTION_PROVIDERS_CHECKBOX_GROUP.change(update_execution_providers, inputs = EXECUTION_PROVIDERS_CHECKBOX_GROUP, outputs = EXECUTION_PROVIDERS_CHECKBOX_GROUP) + + +def update_execution_providers(execution_providers : List[str]) -> gradio.CheckboxGroup: + clear_face_analyser() + clear_frame_processors_modules() + execution_providers = execution_providers or encode_execution_providers(onnxruntime.get_available_providers()) + deepfuze.globals.execution_providers = decode_execution_providers(execution_providers) + return gradio.CheckboxGroup(value = execution_providers) diff --git a/deepfuze/uis/components/execution_queue_count.py b/deepfuze/uis/components/execution_queue_count.py new file mode 100644 index 0000000..2cfe3a4 --- /dev/null +++ b/deepfuze/uis/components/execution_queue_count.py @@ -0,0 +1,28 @@ +from typing import Optional +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording + +EXECUTION_QUEUE_COUNT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global EXECUTION_QUEUE_COUNT_SLIDER + + EXECUTION_QUEUE_COUNT_SLIDER = gradio.Slider( + label = wording.get('uis.execution_queue_count_slider'), + value = deepfuze.globals.execution_queue_count, + step = deepfuze.choices.execution_queue_count_range[1] - deepfuze.choices.execution_queue_count_range[0], + minimum = deepfuze.choices.execution_queue_count_range[0], + maximum = deepfuze.choices.execution_queue_count_range[-1] + ) + + +def listen() -> None: + EXECUTION_QUEUE_COUNT_SLIDER.release(update_execution_queue_count, inputs = EXECUTION_QUEUE_COUNT_SLIDER) + + +def update_execution_queue_count(execution_queue_count : int = 1) -> None: + deepfuze.globals.execution_queue_count = execution_queue_count diff --git a/deepfuze/uis/components/execution_thread_count.py b/deepfuze/uis/components/execution_thread_count.py new file mode 100644 index 0000000..be83a96 --- /dev/null +++ b/deepfuze/uis/components/execution_thread_count.py @@ -0,0 +1,29 @@ +from typing import Optional +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording + +EXECUTION_THREAD_COUNT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global EXECUTION_THREAD_COUNT_SLIDER + + EXECUTION_THREAD_COUNT_SLIDER = gradio.Slider( + label = wording.get('uis.execution_thread_count_slider'), + value = deepfuze.globals.execution_thread_count, + step = deepfuze.choices.execution_thread_count_range[1] - deepfuze.choices.execution_thread_count_range[0], + minimum = deepfuze.choices.execution_thread_count_range[0], + maximum = deepfuze.choices.execution_thread_count_range[-1] + ) + + +def listen() -> None: + EXECUTION_THREAD_COUNT_SLIDER.release(update_execution_thread_count, inputs = EXECUTION_THREAD_COUNT_SLIDER) + + +def update_execution_thread_count(execution_thread_count : int = 1) -> None: + deepfuze.globals.execution_thread_count = execution_thread_count + diff --git a/deepfuze/uis/components/face_analyser.py b/deepfuze/uis/components/face_analyser.py new file mode 100644 index 0000000..f4bfd2f --- /dev/null +++ b/deepfuze/uis/components/face_analyser.py @@ -0,0 +1,123 @@ +from typing import Optional, Dict, Any, Tuple + +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import face_analyser, wording +from deepfuze.typing import FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceDetectorModel +from deepfuze.uis.core import register_ui_component + +FACE_ANALYSER_ORDER_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ANALYSER_AGE_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ANALYSER_GENDER_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_SIZE_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_SCORE_SLIDER : Optional[gradio.Slider] = None +FACE_LANDMARKER_SCORE_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global FACE_ANALYSER_ORDER_DROPDOWN + global FACE_ANALYSER_AGE_DROPDOWN + global FACE_ANALYSER_GENDER_DROPDOWN + global FACE_DETECTOR_MODEL_DROPDOWN + global FACE_DETECTOR_SIZE_DROPDOWN + global FACE_DETECTOR_SCORE_SLIDER + global FACE_LANDMARKER_SCORE_SLIDER + + face_detector_size_dropdown_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.face_detector_size_dropdown'), + 'value': deepfuze.globals.face_detector_size + } + if deepfuze.globals.face_detector_size in deepfuze.choices.face_detector_set[deepfuze.globals.face_detector_model]: + face_detector_size_dropdown_args['choices'] = deepfuze.choices.face_detector_set[deepfuze.globals.face_detector_model] + with gradio.Row(): + FACE_ANALYSER_ORDER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_order_dropdown'), + choices = deepfuze.choices.face_analyser_orders, + value = deepfuze.globals.face_analyser_order + ) + FACE_ANALYSER_AGE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_age_dropdown'), + choices = [ 'none' ] + deepfuze.choices.face_analyser_ages, + value = deepfuze.globals.face_analyser_age or 'none' + ) + FACE_ANALYSER_GENDER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_gender_dropdown'), + choices = [ 'none' ] + deepfuze.choices.face_analyser_genders, + value = deepfuze.globals.face_analyser_gender or 'none' + ) + FACE_DETECTOR_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_detector_model_dropdown'), + choices = deepfuze.choices.face_detector_set.keys(), + value = deepfuze.globals.face_detector_model + ) + FACE_DETECTOR_SIZE_DROPDOWN = gradio.Dropdown(**face_detector_size_dropdown_args) + with gradio.Row(): + FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_detector_score_slider'), + value = deepfuze.globals.face_detector_score, + step = deepfuze.choices.face_detector_score_range[1] - deepfuze.choices.face_detector_score_range[0], + minimum = deepfuze.choices.face_detector_score_range[0], + maximum = deepfuze.choices.face_detector_score_range[-1] + ) + FACE_LANDMARKER_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_landmarker_score_slider'), + value = deepfuze.globals.face_landmarker_score, + step = deepfuze.choices.face_landmarker_score_range[1] - deepfuze.choices.face_landmarker_score_range[0], + minimum = deepfuze.choices.face_landmarker_score_range[0], + maximum = deepfuze.choices.face_landmarker_score_range[-1] + ) + register_ui_component('face_analyser_order_dropdown', FACE_ANALYSER_ORDER_DROPDOWN) + register_ui_component('face_analyser_age_dropdown', FACE_ANALYSER_AGE_DROPDOWN) + register_ui_component('face_analyser_gender_dropdown', FACE_ANALYSER_GENDER_DROPDOWN) + register_ui_component('face_detector_model_dropdown', FACE_DETECTOR_MODEL_DROPDOWN) + register_ui_component('face_detector_size_dropdown', FACE_DETECTOR_SIZE_DROPDOWN) + register_ui_component('face_detector_score_slider', FACE_DETECTOR_SCORE_SLIDER) + register_ui_component('face_landmarker_score_slider', FACE_LANDMARKER_SCORE_SLIDER) + + +def listen() -> None: + FACE_ANALYSER_ORDER_DROPDOWN.change(update_face_analyser_order, inputs = FACE_ANALYSER_ORDER_DROPDOWN) + FACE_ANALYSER_AGE_DROPDOWN.change(update_face_analyser_age, inputs = FACE_ANALYSER_AGE_DROPDOWN) + FACE_ANALYSER_GENDER_DROPDOWN.change(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) + FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN, outputs = [ FACE_DETECTOR_MODEL_DROPDOWN, FACE_DETECTOR_SIZE_DROPDOWN ]) + FACE_DETECTOR_SIZE_DROPDOWN.change(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) + FACE_DETECTOR_SCORE_SLIDER.release(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) + FACE_LANDMARKER_SCORE_SLIDER.release(update_face_landmarker_score, inputs = FACE_LANDMARKER_SCORE_SLIDER) + + +def update_face_analyser_order(face_analyser_order : FaceAnalyserOrder) -> None: + deepfuze.globals.face_analyser_order = face_analyser_order if face_analyser_order != 'none' else None + + +def update_face_analyser_age(face_analyser_age : FaceAnalyserAge) -> None: + deepfuze.globals.face_analyser_age = face_analyser_age if face_analyser_age != 'none' else None + + +def update_face_analyser_gender(face_analyser_gender : FaceAnalyserGender) -> None: + deepfuze.globals.face_analyser_gender = face_analyser_gender if face_analyser_gender != 'none' else None + + +def update_face_detector_model(face_detector_model : FaceDetectorModel) -> Tuple[gradio.Dropdown, gradio.Dropdown]: + deepfuze.globals.face_detector_model = face_detector_model + update_face_detector_size('640x640') + if face_analyser.pre_check(): + if deepfuze.globals.face_detector_size in deepfuze.choices.face_detector_set[face_detector_model]: + return gradio.Dropdown(value = deepfuze.globals.face_detector_model), gradio.Dropdown(value = deepfuze.globals.face_detector_size, choices = deepfuze.choices.face_detector_set[face_detector_model]) + return gradio.Dropdown(value = deepfuze.globals.face_detector_model), gradio.Dropdown(value = deepfuze.globals.face_detector_size, choices = [ deepfuze.globals.face_detector_size ]) + return gradio.Dropdown(), gradio.Dropdown() + + +def update_face_detector_size(face_detector_size : str) -> None: + deepfuze.globals.face_detector_size = face_detector_size + + +def update_face_detector_score(face_detector_score : float) -> None: + deepfuze.globals.face_detector_score = face_detector_score + + +def update_face_landmarker_score(face_landmarker_score : float) -> None: + deepfuze.globals.face_landmarker_score = face_landmarker_score diff --git a/deepfuze/uis/components/face_masker.py b/deepfuze/uis/components/face_masker.py new file mode 100755 index 0000000..7805f99 --- /dev/null +++ b/deepfuze/uis/components/face_masker.py @@ -0,0 +1,119 @@ +from typing import Optional, Tuple, List +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording +from deepfuze.typing import FaceMaskType, FaceMaskRegion +from deepfuze.uis.core import register_ui_component + +FACE_MASK_TYPES_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +FACE_MASK_BLUR_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_BOX_GROUP : Optional[gradio.Group] = None +FACE_MASK_REGION_GROUP : Optional[gradio.Group] = None +FACE_MASK_PADDING_TOP_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_RIGHT_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_BOTTOM_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_LEFT_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_REGION_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global FACE_MASK_TYPES_CHECKBOX_GROUP + global FACE_MASK_BLUR_SLIDER + global FACE_MASK_BOX_GROUP + global FACE_MASK_REGION_GROUP + global FACE_MASK_PADDING_TOP_SLIDER + global FACE_MASK_PADDING_RIGHT_SLIDER + global FACE_MASK_PADDING_BOTTOM_SLIDER + global FACE_MASK_PADDING_LEFT_SLIDER + global FACE_MASK_REGION_CHECKBOX_GROUP + + has_box_mask = 'box' in deepfuze.globals.face_mask_types + has_region_mask = 'region' in deepfuze.globals.face_mask_types + FACE_MASK_TYPES_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_mask_types_checkbox_group'), + choices = deepfuze.choices.face_mask_types, + value = deepfuze.globals.face_mask_types + ) + with gradio.Group(visible = has_box_mask) as FACE_MASK_BOX_GROUP: + FACE_MASK_BLUR_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_blur_slider'), + step = deepfuze.choices.face_mask_blur_range[1] - deepfuze.choices.face_mask_blur_range[0], + minimum = deepfuze.choices.face_mask_blur_range[0], + maximum = deepfuze.choices.face_mask_blur_range[-1], + value = deepfuze.globals.face_mask_blur + ) + with gradio.Row(): + FACE_MASK_PADDING_TOP_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_top_slider'), + step = deepfuze.choices.face_mask_padding_range[1] - deepfuze.choices.face_mask_padding_range[0], + minimum = deepfuze.choices.face_mask_padding_range[0], + maximum = deepfuze.choices.face_mask_padding_range[-1], + value = deepfuze.globals.face_mask_padding[0] + ) + FACE_MASK_PADDING_RIGHT_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_right_slider'), + step = deepfuze.choices.face_mask_padding_range[1] - deepfuze.choices.face_mask_padding_range[0], + minimum = deepfuze.choices.face_mask_padding_range[0], + maximum = deepfuze.choices.face_mask_padding_range[-1], + value = deepfuze.globals.face_mask_padding[1] + ) + with gradio.Row(): + FACE_MASK_PADDING_BOTTOM_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_bottom_slider'), + step = deepfuze.choices.face_mask_padding_range[1] - deepfuze.choices.face_mask_padding_range[0], + minimum = deepfuze.choices.face_mask_padding_range[0], + maximum = deepfuze.choices.face_mask_padding_range[-1], + value = deepfuze.globals.face_mask_padding[2] + ) + FACE_MASK_PADDING_LEFT_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_left_slider'), + step = deepfuze.choices.face_mask_padding_range[1] - deepfuze.choices.face_mask_padding_range[0], + minimum = deepfuze.choices.face_mask_padding_range[0], + maximum = deepfuze.choices.face_mask_padding_range[-1], + value = deepfuze.globals.face_mask_padding[3] + ) + with gradio.Row(): + FACE_MASK_REGION_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_mask_region_checkbox_group'), + choices = deepfuze.choices.face_mask_regions, + value = deepfuze.globals.face_mask_regions, + visible = has_region_mask + ) + register_ui_component('face_mask_types_checkbox_group', FACE_MASK_TYPES_CHECKBOX_GROUP) + register_ui_component('face_mask_blur_slider', FACE_MASK_BLUR_SLIDER) + register_ui_component('face_mask_padding_top_slider', FACE_MASK_PADDING_TOP_SLIDER) + register_ui_component('face_mask_padding_right_slider', FACE_MASK_PADDING_RIGHT_SLIDER) + register_ui_component('face_mask_padding_bottom_slider', FACE_MASK_PADDING_BOTTOM_SLIDER) + register_ui_component('face_mask_padding_left_slider', FACE_MASK_PADDING_LEFT_SLIDER) + register_ui_component('face_mask_region_checkbox_group', FACE_MASK_REGION_CHECKBOX_GROUP) + + +def listen() -> None: + FACE_MASK_TYPES_CHECKBOX_GROUP.change(update_face_mask_type, inputs = FACE_MASK_TYPES_CHECKBOX_GROUP, outputs = [ FACE_MASK_TYPES_CHECKBOX_GROUP, FACE_MASK_BOX_GROUP, FACE_MASK_REGION_CHECKBOX_GROUP ]) + FACE_MASK_BLUR_SLIDER.release(update_face_mask_blur, inputs = FACE_MASK_BLUR_SLIDER) + FACE_MASK_REGION_CHECKBOX_GROUP.change(update_face_mask_regions, inputs = FACE_MASK_REGION_CHECKBOX_GROUP, outputs = FACE_MASK_REGION_CHECKBOX_GROUP) + face_mask_padding_sliders = [ FACE_MASK_PADDING_TOP_SLIDER, FACE_MASK_PADDING_RIGHT_SLIDER, FACE_MASK_PADDING_BOTTOM_SLIDER, FACE_MASK_PADDING_LEFT_SLIDER ] + for face_mask_padding_slider in face_mask_padding_sliders: + face_mask_padding_slider.release(update_face_mask_padding, inputs = face_mask_padding_sliders) + + +def update_face_mask_type(face_mask_types : List[FaceMaskType]) -> Tuple[gradio.CheckboxGroup, gradio.Group, gradio.CheckboxGroup]: + deepfuze.globals.face_mask_types = face_mask_types or deepfuze.choices.face_mask_types + has_box_mask = 'box' in face_mask_types + has_region_mask = 'region' in face_mask_types + return gradio.CheckboxGroup(value = deepfuze.globals.face_mask_types), gradio.Group(visible = has_box_mask), gradio.CheckboxGroup(visible = has_region_mask) + + +def update_face_mask_blur(face_mask_blur : float) -> None: + deepfuze.globals.face_mask_blur = face_mask_blur + + +def update_face_mask_padding(face_mask_padding_top : int, face_mask_padding_right : int, face_mask_padding_bottom : int, face_mask_padding_left : int) -> None: + deepfuze.globals.face_mask_padding = (face_mask_padding_top, face_mask_padding_right, face_mask_padding_bottom, face_mask_padding_left) + + +def update_face_mask_regions(face_mask_regions : List[FaceMaskRegion]) -> gradio.CheckboxGroup: + deepfuze.globals.face_mask_regions = face_mask_regions or deepfuze.choices.face_mask_regions + return gradio.CheckboxGroup(value = deepfuze.globals.face_mask_regions) diff --git a/deepfuze/uis/components/face_selector.py b/deepfuze/uis/components/face_selector.py new file mode 100644 index 0000000..eea2920 --- /dev/null +++ b/deepfuze/uis/components/face_selector.py @@ -0,0 +1,165 @@ +from typing import List, Optional, Tuple, Any, Dict + +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording +from deepfuze.face_store import clear_static_faces, clear_reference_faces +from deepfuze.vision import get_video_frame, read_static_image, normalize_frame_color +from deepfuze.filesystem import is_image, is_video +from deepfuze.face_analyser import get_many_faces +from deepfuze.typing import VisionFrame, FaceSelectorMode +from deepfuze.uis.core import get_ui_component, get_ui_components, register_ui_component + +FACE_SELECTOR_MODE_DROPDOWN : Optional[gradio.Dropdown] = None +REFERENCE_FACE_POSITION_GALLERY : Optional[gradio.Gallery] = None +REFERENCE_FACE_DISTANCE_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global FACE_SELECTOR_MODE_DROPDOWN + global REFERENCE_FACE_POSITION_GALLERY + global REFERENCE_FACE_DISTANCE_SLIDER + + reference_face_gallery_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.reference_face_gallery'), + 'object_fit': 'cover', + 'columns': 8, + 'allow_preview': False, + 'visible': 'reference' in deepfuze.globals.face_selector_mode + } + if is_image(deepfuze.globals.target_path): + reference_frame = read_static_image(deepfuze.globals.target_path) + reference_face_gallery_args['value'] = extract_gallery_frames(reference_frame) + if is_video(deepfuze.globals.target_path): + reference_frame = get_video_frame(deepfuze.globals.target_path, deepfuze.globals.reference_frame_number) + reference_face_gallery_args['value'] = extract_gallery_frames(reference_frame) + FACE_SELECTOR_MODE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_selector_mode_dropdown'), + choices = deepfuze.choices.face_selector_modes, + value = deepfuze.globals.face_selector_mode + ) + REFERENCE_FACE_POSITION_GALLERY = gradio.Gallery(**reference_face_gallery_args) + REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider( + label = wording.get('uis.reference_face_distance_slider'), + value = deepfuze.globals.reference_face_distance, + step = deepfuze.choices.reference_face_distance_range[1] - deepfuze.choices.reference_face_distance_range[0], + minimum = deepfuze.choices.reference_face_distance_range[0], + maximum = deepfuze.choices.reference_face_distance_range[-1], + visible = 'reference' in deepfuze.globals.face_selector_mode + ) + register_ui_component('face_selector_mode_dropdown', FACE_SELECTOR_MODE_DROPDOWN) + register_ui_component('reference_face_position_gallery', REFERENCE_FACE_POSITION_GALLERY) + register_ui_component('reference_face_distance_slider', REFERENCE_FACE_DISTANCE_SLIDER) + + +def listen() -> None: + FACE_SELECTOR_MODE_DROPDOWN.change(update_face_selector_mode, inputs = FACE_SELECTOR_MODE_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ]) + REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_reference_face_position) + REFERENCE_FACE_DISTANCE_SLIDER.release(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_reference_face_position) + getattr(ui_component, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown' + ]): + ui_component.change(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_detector_model_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_detector_score_slider', + 'face_landmarker_score_slider' + ]): + ui_component.release(clear_and_update_reference_position_gallery, outputs=REFERENCE_FACE_POSITION_GALLERY) + + preview_frame_slider = get_ui_component('preview_frame_slider') + if preview_frame_slider: + preview_frame_slider.change(update_reference_frame_number, inputs = preview_frame_slider) + preview_frame_slider.release(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + +def update_face_selector_mode(face_selector_mode : FaceSelectorMode) -> Tuple[gradio.Gallery, gradio.Slider]: + if face_selector_mode == 'many': + deepfuze.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'one': + deepfuze.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'reference': + deepfuze.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = True), gradio.Slider(visible = True) + + +def clear_and_update_reference_face_position(event : gradio.SelectData) -> gradio.Gallery: + clear_reference_faces() + clear_static_faces() + update_reference_face_position(event.index) + return update_reference_position_gallery() + + +def update_reference_face_position(reference_face_position : int = 0) -> None: + deepfuze.globals.reference_face_position = reference_face_position + + +def update_reference_face_distance(reference_face_distance : float) -> None: + deepfuze.globals.reference_face_distance = reference_face_distance + + +def update_reference_frame_number(reference_frame_number : int) -> None: + deepfuze.globals.reference_frame_number = reference_frame_number + + +def clear_and_update_reference_position_gallery() -> gradio.Gallery: + clear_reference_faces() + clear_static_faces() + return update_reference_position_gallery() + + +def update_reference_position_gallery() -> gradio.Gallery: + gallery_vision_frames = [] + if is_image(deepfuze.globals.target_path): + temp_vision_frame = read_static_image(deepfuze.globals.target_path) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + if is_video(deepfuze.globals.target_path): + temp_vision_frame = get_video_frame(deepfuze.globals.target_path, deepfuze.globals.reference_frame_number) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + if gallery_vision_frames: + return gradio.Gallery(value = gallery_vision_frames) + return gradio.Gallery(value = None) + + +def extract_gallery_frames(temp_vision_frame : VisionFrame) -> List[VisionFrame]: + gallery_vision_frames = [] + faces = get_many_faces(temp_vision_frame) + + for face in faces: + start_x, start_y, end_x, end_y = map(int, face.bounding_box) + padding_x = int((end_x - start_x) * 0.25) + padding_y = int((end_y - start_y) * 0.25) + start_x = max(0, start_x - padding_x) + start_y = max(0, start_y - padding_y) + end_x = max(0, end_x + padding_x) + end_y = max(0, end_y + padding_y) + crop_vision_frame = temp_vision_frame[start_y:end_y, start_x:end_x] + crop_vision_frame = normalize_frame_color(crop_vision_frame) + gallery_vision_frames.append(crop_vision_frame) + return gallery_vision_frames diff --git a/deepfuze/uis/components/frame_processors.py b/deepfuze/uis/components/frame_processors.py new file mode 100644 index 0000000..185fe11 --- /dev/null +++ b/deepfuze/uis/components/frame_processors.py @@ -0,0 +1,40 @@ +from typing import List, Optional +import gradio + +import deepfuze.globals +from deepfuze import wording +from deepfuze.processors.frame.core import load_frame_processor_module, clear_frame_processors_modules +from deepfuze.filesystem import list_directory +from deepfuze.uis.core import register_ui_component + +FRAME_PROCESSORS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global FRAME_PROCESSORS_CHECKBOX_GROUP + + FRAME_PROCESSORS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.frame_processors_checkbox_group'), + choices = sort_frame_processors(deepfuze.globals.frame_processors), + value = deepfuze.globals.frame_processors + ) + register_ui_component('frame_processors_checkbox_group', FRAME_PROCESSORS_CHECKBOX_GROUP) + + +def listen() -> None: + FRAME_PROCESSORS_CHECKBOX_GROUP.change(update_frame_processors, inputs = FRAME_PROCESSORS_CHECKBOX_GROUP, outputs = FRAME_PROCESSORS_CHECKBOX_GROUP) + + +def update_frame_processors(frame_processors : List[str]) -> gradio.CheckboxGroup: + deepfuze.globals.frame_processors = frame_processors + clear_frame_processors_modules() + for frame_processor in frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + if not frame_processor_module.pre_check(): + return gradio.CheckboxGroup() + return gradio.CheckboxGroup(value = deepfuze.globals.frame_processors, choices = sort_frame_processors(deepfuze.globals.frame_processors)) + + +def sort_frame_processors(frame_processors : List[str]) -> list[str]: + available_frame_processors = list_directory('facefusion/processors/frame/modules') + return sorted(available_frame_processors, key = lambda frame_processor : frame_processors.index(frame_processor) if frame_processor in frame_processors else len(frame_processors)) diff --git a/deepfuze/uis/components/frame_processors_options.py b/deepfuze/uis/components/frame_processors_options.py new file mode 100755 index 0000000..759eb76 --- /dev/null +++ b/deepfuze/uis/components/frame_processors_options.py @@ -0,0 +1,216 @@ +from typing import List, Optional, Tuple +import gradio + +import deepfuze.globals +from deepfuze import face_analyser, wording +from deepfuze.processors.frame.core import load_frame_processor_module +from deepfuze.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices +from deepfuze.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel +from deepfuze.uis.core import get_ui_component, register_ui_component + +FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +FACE_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None +FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_BLEND_SLIDER : Optional[gradio.Slider] = None +FRAME_COLORIZER_SIZE_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None +LIP_SYNCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None + + +def render() -> None: + global FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP + global FACE_ENHANCER_MODEL_DROPDOWN + global FACE_ENHANCER_BLEND_SLIDER + global FACE_SWAPPER_MODEL_DROPDOWN + global FRAME_COLORIZER_MODEL_DROPDOWN + global FRAME_COLORIZER_BLEND_SLIDER + global FRAME_COLORIZER_SIZE_DROPDOWN + global FRAME_ENHANCER_MODEL_DROPDOWN + global FRAME_ENHANCER_BLEND_SLIDER + global LIP_SYNCER_MODEL_DROPDOWN + + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_debugger_items_checkbox_group'), + choices = frame_processors_choices.face_debugger_items, + value = frame_processors_globals.face_debugger_items, + visible = 'face_debugger' in deepfuze.globals.frame_processors + ) + FACE_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_enhancer_model_dropdown'), + choices = frame_processors_choices.face_enhancer_models, + value = frame_processors_globals.face_enhancer_model, + visible = 'face_enhancer' in deepfuze.globals.frame_processors + ) + FACE_ENHANCER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.face_enhancer_blend_slider'), + value = frame_processors_globals.face_enhancer_blend, + step = frame_processors_choices.face_enhancer_blend_range[1] - frame_processors_choices.face_enhancer_blend_range[0], + minimum = frame_processors_choices.face_enhancer_blend_range[0], + maximum = frame_processors_choices.face_enhancer_blend_range[-1], + visible = 'face_enhancer' in deepfuze.globals.frame_processors + ) + FACE_SWAPPER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_swapper_model_dropdown'), + choices = frame_processors_choices.face_swapper_models, + value = frame_processors_globals.face_swapper_model, + visible = 'face_swapper' in deepfuze.globals.frame_processors + ) + FRAME_COLORIZER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_colorizer_model_dropdown'), + choices = frame_processors_choices.frame_colorizer_models, + value = frame_processors_globals.frame_colorizer_model, + visible = 'frame_colorizer' in deepfuze.globals.frame_processors + ) + FRAME_COLORIZER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.frame_colorizer_blend_slider'), + value = frame_processors_globals.frame_colorizer_blend, + step = frame_processors_choices.frame_colorizer_blend_range[1] - frame_processors_choices.frame_colorizer_blend_range[0], + minimum = frame_processors_choices.frame_colorizer_blend_range[0], + maximum = frame_processors_choices.frame_colorizer_blend_range[-1], + visible = 'frame_colorizer' in deepfuze.globals.frame_processors + ) + FRAME_COLORIZER_SIZE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_colorizer_size_dropdown'), + choices = frame_processors_choices.frame_colorizer_sizes, + value = frame_processors_globals.frame_colorizer_size, + visible = 'frame_colorizer' in deepfuze.globals.frame_processors + ) + FRAME_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_enhancer_model_dropdown'), + choices = frame_processors_choices.frame_enhancer_models, + value = frame_processors_globals.frame_enhancer_model, + visible = 'frame_enhancer' in deepfuze.globals.frame_processors + ) + FRAME_ENHANCER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.frame_enhancer_blend_slider'), + value = frame_processors_globals.frame_enhancer_blend, + step = frame_processors_choices.frame_enhancer_blend_range[1] - frame_processors_choices.frame_enhancer_blend_range[0], + minimum = frame_processors_choices.frame_enhancer_blend_range[0], + maximum = frame_processors_choices.frame_enhancer_blend_range[-1], + visible = 'frame_enhancer' in deepfuze.globals.frame_processors + ) + LIP_SYNCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.lip_syncer_model_dropdown'), + choices = frame_processors_choices.lip_syncer_models, + value = frame_processors_globals.lip_syncer_model, + visible = 'lip_syncer' in deepfuze.globals.frame_processors + ) + register_ui_component('face_debugger_items_checkbox_group', FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + register_ui_component('face_enhancer_model_dropdown', FACE_ENHANCER_MODEL_DROPDOWN) + register_ui_component('face_enhancer_blend_slider', FACE_ENHANCER_BLEND_SLIDER) + register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_model_dropdown', FRAME_COLORIZER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_blend_slider', FRAME_COLORIZER_BLEND_SLIDER) + register_ui_component('frame_colorizer_size_dropdown', FRAME_COLORIZER_SIZE_DROPDOWN) + register_ui_component('frame_enhancer_model_dropdown', FRAME_ENHANCER_MODEL_DROPDOWN) + register_ui_component('frame_enhancer_blend_slider', FRAME_ENHANCER_BLEND_SLIDER) + register_ui_component('lip_syncer_model_dropdown', LIP_SYNCER_MODEL_DROPDOWN) + + +def listen() -> None: + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP.change(update_face_debugger_items, inputs = FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + FACE_ENHANCER_MODEL_DROPDOWN.change(update_face_enhancer_model, inputs = FACE_ENHANCER_MODEL_DROPDOWN, outputs = FACE_ENHANCER_MODEL_DROPDOWN) + FACE_ENHANCER_BLEND_SLIDER.release(update_face_enhancer_blend, inputs = FACE_ENHANCER_BLEND_SLIDER) + FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = FACE_SWAPPER_MODEL_DROPDOWN) + FRAME_COLORIZER_MODEL_DROPDOWN.change(update_frame_colorizer_model, inputs = FRAME_COLORIZER_MODEL_DROPDOWN, outputs = FRAME_COLORIZER_MODEL_DROPDOWN) + FRAME_COLORIZER_BLEND_SLIDER.release(update_frame_colorizer_blend, inputs = FRAME_COLORIZER_BLEND_SLIDER) + FRAME_COLORIZER_SIZE_DROPDOWN.change(update_frame_colorizer_size, inputs = FRAME_COLORIZER_SIZE_DROPDOWN, outputs = FRAME_COLORIZER_SIZE_DROPDOWN) + FRAME_ENHANCER_MODEL_DROPDOWN.change(update_frame_enhancer_model, inputs = FRAME_ENHANCER_MODEL_DROPDOWN, outputs = FRAME_ENHANCER_MODEL_DROPDOWN) + FRAME_ENHANCER_BLEND_SLIDER.release(update_frame_enhancer_blend, inputs = FRAME_ENHANCER_BLEND_SLIDER) + LIP_SYNCER_MODEL_DROPDOWN.change(update_lip_syncer_model, inputs = LIP_SYNCER_MODEL_DROPDOWN, outputs = LIP_SYNCER_MODEL_DROPDOWN) + frame_processors_checkbox_group = get_ui_component('frame_processors_checkbox_group') + if frame_processors_checkbox_group: + frame_processors_checkbox_group.change(update_frame_processors, inputs = frame_processors_checkbox_group, outputs = [ FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FACE_SWAPPER_MODEL_DROPDOWN, FRAME_COLORIZER_MODEL_DROPDOWN, FRAME_COLORIZER_BLEND_SLIDER, FRAME_COLORIZER_SIZE_DROPDOWN, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, LIP_SYNCER_MODEL_DROPDOWN ]) + + +def update_frame_processors(frame_processors : List[str]) -> Tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown]: + has_face_debugger = 'face_debugger' in frame_processors + has_face_enhancer = 'face_enhancer' in frame_processors + has_face_swapper = 'face_swapper' in frame_processors + has_frame_colorizer = 'frame_colorizer' in frame_processors + has_frame_enhancer = 'frame_enhancer' in frame_processors + has_lip_syncer = 'lip_syncer' in frame_processors + return gradio.CheckboxGroup(visible = has_face_debugger), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_frame_colorizer), gradio.Slider(visible = has_frame_colorizer), gradio.Dropdown(visible = has_frame_colorizer), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.Dropdown(visible = has_lip_syncer) + + +def update_face_debugger_items(face_debugger_items : List[FaceDebuggerItem]) -> None: + frame_processors_globals.face_debugger_items = face_debugger_items + + +def update_face_enhancer_model(face_enhancer_model : FaceEnhancerModel) -> gradio.Dropdown: + frame_processors_globals.face_enhancer_model = face_enhancer_model + face_enhancer_module = load_frame_processor_module('face_enhancer') + face_enhancer_module.clear_frame_processor() + face_enhancer_module.set_options('model', face_enhancer_module.MODELS[face_enhancer_model]) + if face_enhancer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.face_enhancer_model) + return gradio.Dropdown() + + +def update_face_enhancer_blend(face_enhancer_blend : int) -> None: + frame_processors_globals.face_enhancer_blend = face_enhancer_blend + + +def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.Dropdown: + frame_processors_globals.face_swapper_model = face_swapper_model + if face_swapper_model == 'blendswap_256': + deepfuze.globals.face_recognizer_model = 'arcface_blendswap' + if face_swapper_model == 'inswapper_128' or face_swapper_model == 'inswapper_128_fp16': + deepfuze.globals.face_recognizer_model = 'arcface_inswapper' + if face_swapper_model == 'simswap_256' or face_swapper_model == 'simswap_512_unofficial': + deepfuze.globals.face_recognizer_model = 'arcface_simswap' + if face_swapper_model == 'uniface_256': + deepfuze.globals.face_recognizer_model = 'arcface_uniface' + face_swapper_module = load_frame_processor_module('face_swapper') + face_swapper_module.clear_model_initializer() + face_swapper_module.clear_frame_processor() + face_swapper_module.set_options('model', face_swapper_module.MODELS[face_swapper_model]) + if face_analyser.pre_check() and face_swapper_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.face_swapper_model) + return gradio.Dropdown() + + +def update_frame_colorizer_model(frame_colorizer_model : FrameColorizerModel) -> gradio.Dropdown: + frame_processors_globals.frame_colorizer_model = frame_colorizer_model + frame_colorizer_module = load_frame_processor_module('frame_colorizer') + frame_colorizer_module.clear_frame_processor() + frame_colorizer_module.set_options('model', frame_colorizer_module.MODELS[frame_colorizer_model]) + if frame_colorizer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.frame_colorizer_model) + return gradio.Dropdown() + + +def update_frame_colorizer_blend(frame_colorizer_blend : int) -> None: + frame_processors_globals.frame_colorizer_blend = frame_colorizer_blend + + +def update_frame_colorizer_size(frame_colorizer_size : str) -> gradio.Dropdown: + frame_processors_globals.frame_colorizer_size = frame_colorizer_size + return gradio.Dropdown(value = frame_processors_globals.frame_colorizer_size) + + +def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gradio.Dropdown: + frame_processors_globals.frame_enhancer_model = frame_enhancer_model + frame_enhancer_module = load_frame_processor_module('frame_enhancer') + frame_enhancer_module.clear_frame_processor() + frame_enhancer_module.set_options('model', frame_enhancer_module.MODELS[frame_enhancer_model]) + if frame_enhancer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.frame_enhancer_model) + return gradio.Dropdown() + + +def update_frame_enhancer_blend(frame_enhancer_blend : int) -> None: + frame_processors_globals.frame_enhancer_blend = frame_enhancer_blend + + +def update_lip_syncer_model(lip_syncer_model : LipSyncerModel) -> gradio.Dropdown: + frame_processors_globals.lip_syncer_model = lip_syncer_model + lip_syncer_module = load_frame_processor_module('lip_syncer') + lip_syncer_module.clear_frame_processor() + lip_syncer_module.set_options('model', lip_syncer_module.MODELS[lip_syncer_model]) + if lip_syncer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.lip_syncer_model) + return gradio.Dropdown() diff --git a/deepfuze/uis/components/memory.py b/deepfuze/uis/components/memory.py new file mode 100644 index 0000000..4446ae0 --- /dev/null +++ b/deepfuze/uis/components/memory.py @@ -0,0 +1,41 @@ +from typing import Optional +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze.typing import VideoMemoryStrategy +from deepfuze import wording + +VIDEO_MEMORY_STRATEGY_DROPDOWN : Optional[gradio.Dropdown] = None +SYSTEM_MEMORY_LIMIT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global VIDEO_MEMORY_STRATEGY_DROPDOWN + global SYSTEM_MEMORY_LIMIT_SLIDER + + VIDEO_MEMORY_STRATEGY_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.video_memory_strategy_dropdown'), + choices = deepfuze.choices.video_memory_strategies, + value = deepfuze.globals.video_memory_strategy + ) + SYSTEM_MEMORY_LIMIT_SLIDER = gradio.Slider( + label = wording.get('uis.system_memory_limit_slider'), + step =deepfuze.choices.system_memory_limit_range[1] - deepfuze.choices.system_memory_limit_range[0], + minimum = deepfuze.choices.system_memory_limit_range[0], + maximum = deepfuze.choices.system_memory_limit_range[-1], + value = deepfuze.globals.system_memory_limit + ) + + +def listen() -> None: + VIDEO_MEMORY_STRATEGY_DROPDOWN.change(update_video_memory_strategy, inputs = VIDEO_MEMORY_STRATEGY_DROPDOWN) + SYSTEM_MEMORY_LIMIT_SLIDER.release(update_system_memory_limit, inputs = SYSTEM_MEMORY_LIMIT_SLIDER) + + +def update_video_memory_strategy(video_memory_strategy : VideoMemoryStrategy) -> None: + deepfuze.globals.video_memory_strategy = video_memory_strategy + + +def update_system_memory_limit(system_memory_limit : int) -> None: + deepfuze.globals.system_memory_limit = system_memory_limit diff --git a/deepfuze/uis/components/output.py b/deepfuze/uis/components/output.py new file mode 100644 index 0000000..b2005c8 --- /dev/null +++ b/deepfuze/uis/components/output.py @@ -0,0 +1,88 @@ +from typing import Tuple, Optional +from time import sleep +import gradio + +import deepfuze.globals +from deepfuze import process_manager, wording +from deepfuze.core import conditional_process +from deepfuze.memory import limit_system_memory +from deepfuze.normalizer import normalize_output_path +from deepfuze.uis.core import get_ui_component +from deepfuze.filesystem import clear_temp, is_image, is_video + +OUTPUT_IMAGE : Optional[gradio.Image] = None +OUTPUT_VIDEO : Optional[gradio.Video] = None +OUTPUT_START_BUTTON : Optional[gradio.Button] = None +OUTPUT_CLEAR_BUTTON : Optional[gradio.Button] = None +OUTPUT_STOP_BUTTON : Optional[gradio.Button] = None + + +def render() -> None: + global OUTPUT_IMAGE + global OUTPUT_VIDEO + global OUTPUT_START_BUTTON + global OUTPUT_STOP_BUTTON + global OUTPUT_CLEAR_BUTTON + + OUTPUT_IMAGE = gradio.Image( + label = wording.get('uis.output_image_or_video'), + visible = False + ) + OUTPUT_VIDEO = gradio.Video( + label = wording.get('uis.output_image_or_video') + ) + OUTPUT_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + OUTPUT_STOP_BUTTON = gradio.Button( + value = wording.get('uis.stop_button'), + variant = 'primary', + size = 'sm', + visible = False + ) + OUTPUT_CLEAR_BUTTON = gradio.Button( + value = wording.get('uis.clear_button'), + size = 'sm' + ) + + +def listen() -> None: + output_path_textbox = get_ui_component('output_path_textbox') + if output_path_textbox: + OUTPUT_START_BUTTON.click(start, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_START_BUTTON.click(process, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO, OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_STOP_BUTTON.click(stop, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_CLEAR_BUTTON.click(clear, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ]) + + +def start() -> Tuple[gradio.Button, gradio.Button]: + while not process_manager.is_processing(): + sleep(0.5) + return gradio.Button(visible = False), gradio.Button(visible = True) + + +def process() -> Tuple[gradio.Image, gradio.Video, gradio.Button, gradio.Button]: + normed_output_path = normalize_output_path(deepfuze.globals.target_path, deepfuze.globals.output_path) + if deepfuze.globals.system_memory_limit > 0: + limit_system_memory(deepfuze.globals.system_memory_limit) + conditional_process() + if is_image(normed_output_path): + return gradio.Image(value = normed_output_path, visible = True), gradio.Video(value = None, visible = False), gradio.Button(visible = True), gradio.Button(visible = False) + if is_video(normed_output_path): + return gradio.Image(value = None, visible = False), gradio.Video(value = normed_output_path, visible = True), gradio.Button(visible = True), gradio.Button(visible = False) + return gradio.Image(value = None), gradio.Video(value = None), gradio.Button(visible = True), gradio.Button(visible = False) + + +def stop() -> Tuple[gradio.Button, gradio.Button]: + process_manager.stop() + return gradio.Button(visible = True), gradio.Button(visible = False) + + +def clear() -> Tuple[gradio.Image, gradio.Video]: + while process_manager.is_processing(): + sleep(0.5) + if deepfuze.globals.target_path: + clear_temp(deepfuze.globals.target_path) + return gradio.Image(value = None), gradio.Video(value = None) diff --git a/deepfuze/uis/components/output_options.py b/deepfuze/uis/components/output_options.py new file mode 100644 index 0000000..2a1e97f --- /dev/null +++ b/deepfuze/uis/components/output_options.py @@ -0,0 +1,161 @@ +from typing import Optional, Tuple +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording +from deepfuze.typing import OutputVideoEncoder, OutputVideoPreset, Fps +from deepfuze.filesystem import is_image, is_video +from deepfuze.uis.core import get_ui_components, register_ui_component +from deepfuze.vision import detect_image_resolution, create_image_resolutions, detect_video_fps, detect_video_resolution, create_video_resolutions, pack_resolution + +OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None +OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_VIDEO_FPS_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global OUTPUT_PATH_TEXTBOX + global OUTPUT_IMAGE_QUALITY_SLIDER + global OUTPUT_IMAGE_RESOLUTION_DROPDOWN + global OUTPUT_VIDEO_ENCODER_DROPDOWN + global OUTPUT_VIDEO_PRESET_DROPDOWN + global OUTPUT_VIDEO_RESOLUTION_DROPDOWN + global OUTPUT_VIDEO_QUALITY_SLIDER + global OUTPUT_VIDEO_FPS_SLIDER + + output_image_resolutions = [] + output_video_resolutions = [] + if is_image(deepfuze.globals.target_path): + output_image_resolution = detect_image_resolution(deepfuze.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if is_video(deepfuze.globals.target_path): + output_video_resolution = detect_video_resolution(deepfuze.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + deepfuze.globals.output_path = deepfuze.globals.output_path or '.' + OUTPUT_PATH_TEXTBOX = gradio.Textbox( + label = wording.get('uis.output_path_textbox'), + value = deepfuze.globals.output_path, + max_lines = 1 + ) + OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider( + label = wording.get('uis.output_image_quality_slider'), + value = deepfuze.globals.output_image_quality, + step = deepfuze.choices.output_image_quality_range[1] - deepfuze.choices.output_image_quality_range[0], + minimum = deepfuze.choices.output_image_quality_range[0], + maximum = deepfuze.choices.output_image_quality_range[-1], + visible = is_image(deepfuze.globals.target_path) + ) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_image_resolution_dropdown'), + choices = output_image_resolutions, + value = deepfuze.globals.output_image_resolution, + visible = is_image(deepfuze.globals.target_path) + ) + OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_encoder_dropdown'), + choices = deepfuze.choices.output_video_encoders, + value = deepfuze.globals.output_video_encoder, + visible = is_video(deepfuze.globals.target_path) + ) + OUTPUT_VIDEO_PRESET_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_preset_dropdown'), + choices = deepfuze.choices.output_video_presets, + value = deepfuze.globals.output_video_preset, + visible = is_video(deepfuze.globals.target_path) + ) + OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider( + label = wording.get('uis.output_video_quality_slider'), + value = deepfuze.globals.output_video_quality, + step = deepfuze.choices.output_video_quality_range[1] - deepfuze.choices.output_video_quality_range[0], + minimum = deepfuze.choices.output_video_quality_range[0], + maximum = deepfuze.choices.output_video_quality_range[-1], + visible = is_video(deepfuze.globals.target_path) + ) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_resolution_dropdown'), + choices = output_video_resolutions, + value = deepfuze.globals.output_video_resolution, + visible = is_video(deepfuze.globals.target_path) + ) + OUTPUT_VIDEO_FPS_SLIDER = gradio.Slider( + label = wording.get('uis.output_video_fps_slider'), + value = deepfuze.globals.output_video_fps, + step = 0.01, + minimum = 1, + maximum = 60, + visible = is_video(deepfuze.globals.target_path) + ) + register_ui_component('output_path_textbox', OUTPUT_PATH_TEXTBOX) + register_ui_component('output_video_fps_slider', OUTPUT_VIDEO_FPS_SLIDER) + + +def listen() -> None: + OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX) + OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) + OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) + OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN.change(update_output_video_resolution, inputs = OUTPUT_VIDEO_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_FPS_SLIDER.release(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + + +def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: + if is_image(deepfuze.globals.target_path): + output_image_resolution = detect_image_resolution(deepfuze.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + deepfuze.globals.output_image_resolution = pack_resolution(output_image_resolution) + return gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = deepfuze.globals.output_image_resolution, choices = output_image_resolutions), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + if is_video(deepfuze.globals.target_path): + output_video_resolution = detect_video_resolution(deepfuze.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + deepfuze.globals.output_video_resolution = pack_resolution(output_video_resolution) + deepfuze.globals.output_video_fps = detect_video_fps(deepfuze.globals.target_path) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = deepfuze.globals.output_video_resolution, choices = output_video_resolutions), gradio.Slider(visible = True, value = deepfuze.globals.output_video_fps) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + + +def update_output_path(output_path : str) -> None: + deepfuze.globals.output_path = output_path + + +def update_output_image_quality(output_image_quality : int) -> None: + deepfuze.globals.output_image_quality = output_image_quality + + +def update_output_image_resolution(output_image_resolution : str) -> None: + deepfuze.globals.output_image_resolution = output_image_resolution + + +def update_output_video_encoder(output_video_encoder: OutputVideoEncoder) -> None: + deepfuze.globals.output_video_encoder = output_video_encoder + + +def update_output_video_preset(output_video_preset : OutputVideoPreset) -> None: + deepfuze.globals.output_video_preset = output_video_preset + + +def update_output_video_quality(output_video_quality : int) -> None: + deepfuze.globals.output_video_quality = output_video_quality + + +def update_output_video_resolution(output_video_resolution : str) -> None: + deepfuze.globals.output_video_resolution = output_video_resolution + + +def update_output_video_fps(output_video_fps : Fps) -> None: + deepfuze.globals.output_video_fps = output_video_fps diff --git a/deepfuze/uis/components/preview.py b/deepfuze/uis/components/preview.py new file mode 100755 index 0000000..a52f88f --- /dev/null +++ b/deepfuze/uis/components/preview.py @@ -0,0 +1,207 @@ +from typing import Any, Dict, Optional +from time import sleep +import cv2 +import gradio +import numpy + +import deepfuze.globals +from deepfuze import logger, wording +from deepfuze.audio import get_audio_frame, create_empty_audio_frame +from deepfuze.common_helper import get_first +from deepfuze.core import conditional_append_reference_faces +from deepfuze.face_analyser import get_average_face, clear_face_analyser +from deepfuze.face_store import clear_static_faces, get_reference_faces, clear_reference_faces +from deepfuze.typing import Face, FaceSet, AudioFrame, VisionFrame +from deepfuze.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_resolution, read_static_image, read_static_images +from deepfuze.filesystem import is_image, is_video, filter_audio_paths +from deepfuze.content_analyser import analyse_frame +from deepfuze.processors.frame.core import load_frame_processor_module +from deepfuze.uis.core import get_ui_component, get_ui_components, register_ui_component + +PREVIEW_IMAGE : Optional[gradio.Image] = None +PREVIEW_FRAME_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global PREVIEW_IMAGE + global PREVIEW_FRAME_SLIDER + + preview_image_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.preview_image'), + 'interactive': False + } + preview_frame_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.preview_frame_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_frames = read_static_images(deepfuze.globals.source_paths) + source_face = get_average_face(source_frames) + source_audio_path = get_first(filter_audio_paths(deepfuze.globals.source_paths)) + source_audio_frame = create_empty_audio_frame() + if source_audio_path and deepfuze.globals.output_video_fps and deepfuze.globals.reference_frame_number: + temp_audio_frame = get_audio_frame(source_audio_path, deepfuze.globals.output_video_fps, deepfuze.globals.reference_frame_number) + if numpy.any(temp_audio_frame): + source_audio_frame = temp_audio_frame + if is_image(deepfuze.globals.target_path): + target_vision_frame = read_static_image(deepfuze.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) + if is_video(deepfuze.globals.target_path): + temp_vision_frame = get_video_frame(deepfuze.globals.target_path, deepfuze.globals.reference_frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) + preview_image_args['visible'] = True + preview_frame_slider_args['value'] = deepfuze.globals.reference_frame_number + preview_frame_slider_args['maximum'] = count_video_frame_total(deepfuze.globals.target_path) + preview_frame_slider_args['visible'] = True + PREVIEW_IMAGE = gradio.Image(**preview_image_args) + PREVIEW_FRAME_SLIDER = gradio.Slider(**preview_frame_slider_args) + register_ui_component('preview_frame_slider', PREVIEW_FRAME_SLIDER) + + +def listen() -> None: + PREVIEW_FRAME_SLIDER.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + reference_face_position_gallery = get_ui_component('reference_face_position_gallery') + if reference_face_position_gallery: + reference_face_position_gallery.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'source_audio', + 'source_image', + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) + + for ui_component in get_ui_components( + [ + 'face_debugger_items_checkbox_group', + 'frame_colorizer_size_dropdown', + 'face_selector_mode_dropdown', + 'face_mask_types_checkbox_group', + 'face_mask_region_checkbox_group', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown' + ]): + ui_component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'face_enhancer_blend_slider', + 'frame_colorizer_blend_slider', + 'frame_enhancer_blend_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', + 'reference_face_distance_slider', + 'face_mask_blur_slider', + 'face_mask_padding_top_slider', + 'face_mask_padding_bottom_slider', + 'face_mask_padding_left_slider', + 'face_mask_padding_right_slider', + 'output_video_fps_slider' + ]): + ui_component.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'frame_processors_checkbox_group', + 'face_enhancer_model_dropdown', + 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', + 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', + 'face_detector_model_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'face_detector_score_slider', + 'face_landmarker_score_slider' + ]): + ui_component.release(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + +def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image: + clear_face_analyser() + clear_reference_faces() + clear_static_faces() + return update_preview_image(frame_number) + + +def update_preview_image(frame_number : int = 0) -> gradio.Image: + for frame_processor in deepfuze.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in deepfuze.globals.face_selector_mode else None + source_frames = read_static_images(deepfuze.globals.source_paths) + source_face = get_average_face(source_frames) + source_audio_path = get_first(filter_audio_paths(deepfuze.globals.source_paths)) + source_audio_frame = create_empty_audio_frame() + if source_audio_path and deepfuze.globals.output_video_fps and deepfuze.globals.reference_frame_number: + reference_audio_frame_number = deepfuze.globals.reference_frame_number + if deepfuze.globals.trim_frame_start: + reference_audio_frame_number -= deepfuze.globals.trim_frame_start + temp_audio_frame = get_audio_frame(source_audio_path, deepfuze.globals.output_video_fps, reference_audio_frame_number) + if numpy.any(temp_audio_frame): + source_audio_frame = temp_audio_frame + if is_image(deepfuze.globals.target_path): + target_vision_frame = read_static_image(deepfuze.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) + if is_video(deepfuze.globals.target_path): + temp_vision_frame = get_video_frame(deepfuze.globals.target_path, frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) + return gradio.Image(value = None) + + +def update_preview_frame_slider() -> gradio.Slider: + if is_video(deepfuze.globals.target_path): + video_frame_total = count_video_frame_total(deepfuze.globals.target_path) + return gradio.Slider(maximum = video_frame_total, visible = True) + return gradio.Slider(value = None, maximum = None, visible = False) + + +def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame: + target_vision_frame = resize_frame_resolution(target_vision_frame, (640, 640)) + if analyse_frame(target_vision_frame): + return cv2.GaussianBlur(target_vision_frame, (99, 99), 0) + for frame_processor in deepfuze.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + logger.disable() + if frame_processor_module.pre_process('preview'): + logger.enable() + target_vision_frame = frame_processor_module.process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame diff --git a/deepfuze/uis/components/source.py b/deepfuze/uis/components/source.py new file mode 100644 index 0000000..6e1868e --- /dev/null +++ b/deepfuze/uis/components/source.py @@ -0,0 +1,67 @@ +from typing import Optional, List, Tuple +import gradio + +import deepfuze.globals +from deepfuze import wording +from deepfuze.uis.typing import File +from deepfuze.common_helper import get_first +from deepfuze.filesystem import has_audio, has_image, filter_audio_paths, filter_image_paths +from deepfuze.uis.core import register_ui_component + +SOURCE_FILE : Optional[gradio.File] = None +SOURCE_AUDIO : Optional[gradio.Audio] = None +SOURCE_IMAGE : Optional[gradio.Image] = None + + +def render() -> None: + global SOURCE_FILE + global SOURCE_AUDIO + global SOURCE_IMAGE + + has_source_audio = has_audio(deepfuze.globals.source_paths) + has_source_image = has_image(deepfuze.globals.source_paths) + SOURCE_FILE = gradio.File( + file_count = 'multiple', + file_types = + [ + '.mp3', + '.wav', + '.png', + '.jpg', + '.webp' + ], + label = wording.get('uis.source_file'), + value = deepfuze.globals.source_paths if has_source_audio or has_source_image else None + ) + source_file_names = [ source_file_value['name'] for source_file_value in SOURCE_FILE.value ] if SOURCE_FILE.value else None + source_audio_path = get_first(filter_audio_paths(source_file_names)) + source_image_path = get_first(filter_image_paths(source_file_names)) + SOURCE_AUDIO = gradio.Audio( + value = source_audio_path if has_source_audio else None, + visible = has_source_audio, + show_label = False + ) + SOURCE_IMAGE = gradio.Image( + value = source_image_path if has_source_image else None, + visible = has_source_image, + show_label = False + ) + register_ui_component('source_audio', SOURCE_AUDIO) + register_ui_component('source_image', SOURCE_IMAGE) + + +def listen() -> None: + SOURCE_FILE.change(update, inputs = SOURCE_FILE, outputs = [ SOURCE_AUDIO, SOURCE_IMAGE ]) + + +def update(files : List[File]) -> Tuple[gradio.Audio, gradio.Image]: + file_names = [ file.name for file in files ] if files else None + has_source_audio = has_audio(file_names) + has_source_image = has_image(file_names) + if has_source_audio or has_source_image: + source_audio_path = get_first(filter_audio_paths(file_names)) + source_image_path = get_first(filter_image_paths(file_names)) + deepfuze.globals.source_paths = file_names + return gradio.Audio(value = source_audio_path, visible = has_source_audio), gradio.Image(value = source_image_path, visible = has_source_image) + deepfuze.globals.source_paths = None + return gradio.Audio(value = None, visible = False), gradio.Image(value = None, visible = False) diff --git a/deepfuze/uis/components/target.py b/deepfuze/uis/components/target.py new file mode 100644 index 0000000..f9d9d0a --- /dev/null +++ b/deepfuze/uis/components/target.py @@ -0,0 +1,83 @@ +from typing import Tuple, Optional +import gradio + +import deepfuze.globals +from deepfuze import wording +from deepfuze.face_store import clear_static_faces, clear_reference_faces +from deepfuze.uis.typing import File +from deepfuze.filesystem import get_file_size, is_image, is_video +from deepfuze.uis.core import register_ui_component +from deepfuze.vision import get_video_frame, normalize_frame_color + +FILE_SIZE_LIMIT = 512 * 1024 * 1024 + +TARGET_FILE : Optional[gradio.File] = None +TARGET_IMAGE : Optional[gradio.Image] = None +TARGET_VIDEO : Optional[gradio.Video] = None + + +def render() -> None: + global TARGET_FILE + global TARGET_IMAGE + global TARGET_VIDEO + + is_target_image = is_image(deepfuze.globals.target_path) + is_target_video = is_video(deepfuze.globals.target_path) + TARGET_FILE = gradio.File( + label = wording.get('uis.target_file'), + file_count = 'single', + file_types = + [ + '.png', + '.jpg', + '.webp', + '.webm', + '.mp4' + ], + value = deepfuze.globals.target_path if is_target_image or is_target_video else None + ) + target_image_args =\ + { + 'show_label': False, + 'visible': False + } + target_video_args =\ + { + 'show_label': False, + 'visible': False + } + if is_target_image: + target_image_args['value'] = TARGET_FILE.value['name'] + target_image_args['visible'] = True + if is_target_video: + if get_file_size(deepfuze.globals.target_path) > FILE_SIZE_LIMIT: + preview_vision_frame = normalize_frame_color(get_video_frame(deepfuze.globals.target_path)) + target_image_args['value'] = preview_vision_frame + target_image_args['visible'] = True + else: + target_video_args['value'] = TARGET_FILE.value['name'] + target_video_args['visible'] = True + TARGET_IMAGE = gradio.Image(**target_image_args) + TARGET_VIDEO = gradio.Video(**target_video_args) + register_ui_component('target_image', TARGET_IMAGE) + register_ui_component('target_video', TARGET_VIDEO) + + +def listen() -> None: + TARGET_FILE.change(update, inputs = TARGET_FILE, outputs = [ TARGET_IMAGE, TARGET_VIDEO ]) + + +def update(file : File) -> Tuple[gradio.Image, gradio.Video]: + clear_reference_faces() + clear_static_faces() + if file and is_image(file.name): + deepfuze.globals.target_path = file.name + return gradio.Image(value = file.name, visible = True), gradio.Video(value = None, visible = False) + if file and is_video(file.name): + deepfuze.globals.target_path = file.name + if get_file_size(file.name) > FILE_SIZE_LIMIT: + preview_vision_frame = normalize_frame_color(get_video_frame(file.name)) + return gradio.Image(value = preview_vision_frame, visible = True), gradio.Video(value = None, visible = False) + return gradio.Image(value = None, visible = False), gradio.Video(value = file.name, visible = True) + deepfuze.globals.target_path = None + return gradio.Image(value = None, visible = False), gradio.Video(value = None, visible = False) diff --git a/deepfuze/uis/components/temp_frame.py b/deepfuze/uis/components/temp_frame.py new file mode 100644 index 0000000..2610bbf --- /dev/null +++ b/deepfuze/uis/components/temp_frame.py @@ -0,0 +1,41 @@ +from typing import Optional +import gradio + +import deepfuze.globals +import deepfuze.choices +from deepfuze import wording +from deepfuze.typing import TempFrameFormat +from deepfuze.filesystem import is_video +from deepfuze.uis.core import get_ui_component + +TEMP_FRAME_FORMAT_DROPDOWN : Optional[gradio.Dropdown] = None + + +def render() -> None: + global TEMP_FRAME_FORMAT_DROPDOWN + + TEMP_FRAME_FORMAT_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.temp_frame_format_dropdown'), + choices = deepfuze.choices.temp_frame_formats, + value = deepfuze.globals.temp_frame_format, + visible = is_video(deepfuze.globals.target_path) + ) + + +def listen() -> None: + TEMP_FRAME_FORMAT_DROPDOWN.change(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN) + target_video = get_ui_component('target_video') + if target_video: + for method in [ 'upload', 'change', 'clear' ]: + getattr(target_video, method)(remote_update, outputs = TEMP_FRAME_FORMAT_DROPDOWN) + + +def remote_update() -> gradio.Dropdown: + if is_video(deepfuze.globals.target_path): + return gradio.Dropdown(visible = True) + return gradio.Dropdown(visible = False) + + +def update_temp_frame_format(temp_frame_format : TempFrameFormat) -> None: + deepfuze.globals.temp_frame_format = temp_frame_format + diff --git a/deepfuze/uis/components/trim_frame.py b/deepfuze/uis/components/trim_frame.py new file mode 100644 index 0000000..3584eaa --- /dev/null +++ b/deepfuze/uis/components/trim_frame.py @@ -0,0 +1,79 @@ +from typing import Any, Dict, Tuple, Optional +import gradio + +import deepfuze.globals +from deepfuze import wording +from deepfuze.face_store import clear_static_faces +from deepfuze.vision import count_video_frame_total +from deepfuze.filesystem import is_video +from deepfuze.uis.core import get_ui_components, register_ui_component + +TRIM_FRAME_START_SLIDER : Optional[gradio.Slider] = None +TRIM_FRAME_END_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global TRIM_FRAME_START_SLIDER + global TRIM_FRAME_END_SLIDER + + trim_frame_start_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.trim_frame_start_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + trim_frame_end_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.trim_frame_end_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + if is_video(deepfuze.globals.target_path): + video_frame_total = count_video_frame_total(deepfuze.globals.target_path) + trim_frame_start_slider_args['value'] = deepfuze.globals.trim_frame_start or 0 + trim_frame_start_slider_args['maximum'] = video_frame_total + trim_frame_start_slider_args['visible'] = True + trim_frame_end_slider_args['value'] = deepfuze.globals.trim_frame_end or video_frame_total + trim_frame_end_slider_args['maximum'] = video_frame_total + trim_frame_end_slider_args['visible'] = True + with gradio.Row(): + TRIM_FRAME_START_SLIDER = gradio.Slider(**trim_frame_start_slider_args) + TRIM_FRAME_END_SLIDER = gradio.Slider(**trim_frame_end_slider_args) + register_ui_component('trim_frame_start_slider', TRIM_FRAME_START_SLIDER) + register_ui_component('trim_frame_end_slider', TRIM_FRAME_END_SLIDER) + + +def listen() -> None: + TRIM_FRAME_START_SLIDER.release(update_trim_frame_start, inputs = TRIM_FRAME_START_SLIDER) + TRIM_FRAME_END_SLIDER.release(update_trim_frame_end, inputs = TRIM_FRAME_END_SLIDER) + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(remote_update, outputs = [ TRIM_FRAME_START_SLIDER, TRIM_FRAME_END_SLIDER ]) + + +def remote_update() -> Tuple[gradio.Slider, gradio.Slider]: + if is_video(deepfuze.globals.target_path): + video_frame_total = count_video_frame_total(deepfuze.globals.target_path) + deepfuze.globals.trim_frame_start = None + deepfuze.globals.trim_frame_end = None + return gradio.Slider(value = 0, maximum = video_frame_total, visible = True), gradio.Slider(value = video_frame_total, maximum = video_frame_total, visible = True) + return gradio.Slider(value = None, maximum = None, visible = False), gradio.Slider(value = None, maximum = None, visible = False) + + +def update_trim_frame_start(trim_frame_start : int) -> None: + clear_static_faces() + deepfuze.globals.trim_frame_start = trim_frame_start if trim_frame_start > 0 else None + + +def update_trim_frame_end(trim_frame_end : int) -> None: + clear_static_faces() + video_frame_total = count_video_frame_total(deepfuze.globals.target_path) + deepfuze.globals.trim_frame_end = trim_frame_end if trim_frame_end < video_frame_total else None diff --git a/deepfuze/uis/components/webcam.py b/deepfuze/uis/components/webcam.py new file mode 100644 index 0000000..da993eb --- /dev/null +++ b/deepfuze/uis/components/webcam.py @@ -0,0 +1,180 @@ +from typing import Optional, Generator, Deque +import os +import subprocess +import cv2 +import gradio +from time import sleep +from concurrent.futures import ThreadPoolExecutor +from collections import deque +from tqdm import tqdm + +import deepfuze.globals +from deepfuze import logger, wording +from deepfuze.audio import create_empty_audio_frame +from deepfuze.common_helper import is_windows +from deepfuze.content_analyser import analyse_stream +from deepfuze.filesystem import filter_image_paths +from deepfuze.typing import VisionFrame, Face, Fps +from deepfuze.face_analyser import get_average_face +from deepfuze.processors.frame.core import get_frame_processors_modules, load_frame_processor_module +from deepfuze.ffmpeg import open_ffmpeg +from deepfuze.vision import normalize_frame_color, read_static_images, unpack_resolution +from deepfuze.uis.typing import StreamMode, WebcamMode +from deepfuze.uis.core import get_ui_component, get_ui_components + +WEBCAM_CAPTURE : Optional[cv2.VideoCapture] = None +WEBCAM_IMAGE : Optional[gradio.Image] = None +WEBCAM_START_BUTTON : Optional[gradio.Button] = None +WEBCAM_STOP_BUTTON : Optional[gradio.Button] = None + + +def get_webcam_capture() -> Optional[cv2.VideoCapture]: + global WEBCAM_CAPTURE + + if WEBCAM_CAPTURE is None: + if is_windows(): + webcam_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW) + else: + webcam_capture = cv2.VideoCapture(0) + if webcam_capture and webcam_capture.isOpened(): + WEBCAM_CAPTURE = webcam_capture + return WEBCAM_CAPTURE + + +def clear_webcam_capture() -> None: + global WEBCAM_CAPTURE + + if WEBCAM_CAPTURE: + WEBCAM_CAPTURE.release() + WEBCAM_CAPTURE = None + + +def render() -> None: + global WEBCAM_IMAGE + global WEBCAM_START_BUTTON + global WEBCAM_STOP_BUTTON + + WEBCAM_IMAGE = gradio.Image( + label = wording.get('uis.webcam_image') + ) + WEBCAM_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + WEBCAM_STOP_BUTTON = gradio.Button( + value = wording.get('uis.stop_button'), + size = 'sm' + ) + + +def listen() -> None: + start_event = None + webcam_mode_radio = get_ui_component('webcam_mode_radio') + webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown') + webcam_fps_slider = get_ui_component('webcam_fps_slider') + if webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: + start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) + WEBCAM_STOP_BUTTON.click(stop, cancels = start_event) + + for ui_component in get_ui_components( + [ + 'frame_processors_checkbox_group', + 'face_swapper_model_dropdown', + 'face_enhancer_model_dropdown', + 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', + 'source_image' + ]): + ui_component.change(update, cancels = start_event) + + +def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: + deepfuze.globals.face_selector_mode = 'one' + deepfuze.globals.face_analyser_order = 'large-small' + source_image_paths = filter_image_paths(deepfuze.globals.source_paths) + source_frames = read_static_images(source_image_paths) + source_face = get_average_face(source_frames) + stream = None + + if webcam_mode in [ 'udp', 'v4l2' ]: + stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) #type:ignore[arg-type] + webcam_width, webcam_height = unpack_resolution(webcam_resolution) + webcam_capture = get_webcam_capture() + if webcam_capture and webcam_capture.isOpened(): + webcam_capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) #type:ignore[attr-defined] + webcam_capture.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width) + webcam_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height) + webcam_capture.set(cv2.CAP_PROP_FPS, webcam_fps) + for capture_frame in multi_process_capture(source_face, webcam_capture, webcam_fps): + if webcam_mode == 'inline': + yield normalize_frame_color(capture_frame) + else: + try: + stream.stdin.write(capture_frame.tobytes()) + except Exception: + clear_webcam_capture() + yield None + + +def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: + with tqdm(desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: + with ThreadPoolExecutor(max_workers = deepfuze.globals.execution_thread_count) as executor: + futures = [] + deque_capture_frames : Deque[VisionFrame] = deque() + while webcam_capture and webcam_capture.isOpened(): + _, capture_frame = webcam_capture.read() + if analyse_stream(capture_frame, webcam_fps): + return + future = executor.submit(process_stream_frame, source_face, capture_frame) + futures.append(future) + for future_done in [ future for future in futures if future.done() ]: + capture_frame = future_done.result() + deque_capture_frames.append(capture_frame) + futures.remove(future_done) + while deque_capture_frames: + progress.update() + yield deque_capture_frames.popleft() + + +def update() -> None: + for frame_processor in deepfuze.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + + +def stop() -> gradio.Image: + clear_webcam_capture() + return gradio.Image(value = None) + + +def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) -> VisionFrame: + source_audio_frame = create_empty_audio_frame() + for frame_processor_module in get_frame_processors_modules(deepfuze.globals.frame_processors): + logger.disable() + if frame_processor_module.pre_process('stream'): + logger.enable() + target_vision_frame = frame_processor_module.process_frame( + { + 'source_face': source_face, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame + + +def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: + commands = [ '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s', stream_resolution, '-r', str(stream_fps), '-i', '-'] + if stream_mode == 'udp': + commands.extend([ '-b:v', '2000k', '-f', 'mpegts', 'udp://localhost:27000?pkt_size=1316' ]) + if stream_mode == 'v4l2': + try: + device_name = os.listdir('/sys/devices/virtual/video4linux')[0] + if device_name: + commands.extend([ '-f', 'v4l2', '/dev/' + device_name ]) + except FileNotFoundError: + logger.error(wording.get('stream_not_loaded').format(stream_mode = stream_mode), __name__.upper()) + return open_ffmpeg(commands) diff --git a/deepfuze/uis/components/webcam_options.py b/deepfuze/uis/components/webcam_options.py new file mode 100644 index 0000000..b662ad8 --- /dev/null +++ b/deepfuze/uis/components/webcam_options.py @@ -0,0 +1,37 @@ +from typing import Optional +import gradio + +from deepfuze import wording +from deepfuze.uis import choices as uis_choices +from deepfuze.uis.core import register_ui_component + +WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None +WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global WEBCAM_MODE_RADIO + global WEBCAM_RESOLUTION_DROPDOWN + global WEBCAM_FPS_SLIDER + + WEBCAM_MODE_RADIO = gradio.Radio( + label = wording.get('uis.webcam_mode_radio'), + choices = uis_choices.webcam_modes, + value = 'inline' + ) + WEBCAM_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.webcam_resolution_dropdown'), + choices = uis_choices.webcam_resolutions, + value = uis_choices.webcam_resolutions[0] + ) + WEBCAM_FPS_SLIDER = gradio.Slider( + label = wording.get('uis.webcam_fps_slider'), + value = 25, + step = 1, + minimum = 1, + maximum = 60 + ) + register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO) + register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN) + register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER) diff --git a/deepfuze/uis/core.py b/deepfuze/uis/core.py new file mode 100644 index 0000000..881c6c3 --- /dev/null +++ b/deepfuze/uis/core.py @@ -0,0 +1,156 @@ +from typing import Dict, Optional, Any, List +from types import ModuleType +import os +import importlib +import sys +import gradio + +import deepfuze.globals +from deepfuze.uis import overrides +from deepfuze import metadata, logger, wording +from deepfuze.uis.typing import Component, ComponentName +from deepfuze.filesystem import resolve_relative_path + +os.environ['GRADIO_ANALYTICS_ENABLED'] = '0' + +gradio.processing_utils.encode_array_to_base64 = overrides.encode_array_to_base64 +gradio.processing_utils.encode_pil_to_base64 = overrides.encode_pil_to_base64 + +UI_COMPONENTS: Dict[ComponentName, Component] = {} +UI_LAYOUT_MODULES : List[ModuleType] = [] +UI_LAYOUT_METHODS =\ +[ + 'pre_check', + 'pre_render', + 'render', + 'listen', + 'run' +] + + +def load_ui_layout_module(ui_layout : str) -> Any: + try: + ui_layout_module = importlib.import_module('deepfuze.uis.layouts.' + ui_layout) + for method_name in UI_LAYOUT_METHODS: + if not hasattr(ui_layout_module, method_name): + raise NotImplementedError + except ModuleNotFoundError as exception: + logger.error(wording.get('ui_layout_not_loaded').format(ui_layout = ui_layout), __name__.upper()) + logger.debug(exception.msg, __name__.upper()) + sys.exit(1) + except NotImplementedError: + logger.error(wording.get('ui_layout_not_implemented').format(ui_layout = ui_layout), __name__.upper()) + sys.exit(1) + return ui_layout_module + + +def get_ui_layouts_modules(ui_layouts : List[str]) -> List[ModuleType]: + global UI_LAYOUT_MODULES + + if not UI_LAYOUT_MODULES: + for ui_layout in ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + UI_LAYOUT_MODULES.append(ui_layout_module) + return UI_LAYOUT_MODULES + + +def get_ui_component(component_name : ComponentName) -> Optional[Component]: + if component_name in UI_COMPONENTS: + return UI_COMPONENTS[component_name] + return None + + +def get_ui_components(component_names : List[ComponentName]) -> Optional[List[Component]]: + ui_components = [] + + for component_name in component_names: + component = get_ui_component(component_name) + if component: + ui_components.append(component) + return ui_components + + +def register_ui_component(component_name : ComponentName, component: Component) -> None: + UI_COMPONENTS[component_name] = component + + +def launch() -> None: + ui_layouts_total = len(deepfuze.globals.ui_layouts) + with gradio.Blocks(theme = get_theme(), css = get_css(), title = metadata.get('name') + ' ' + metadata.get('version')) as ui: + for ui_layout in deepfuze.globals.ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + if ui_layout_module.pre_render(): + if ui_layouts_total > 1: + with gradio.Tab(ui_layout): + ui_layout_module.render() + ui_layout_module.listen() + else: + ui_layout_module.render() + ui_layout_module.listen() + + for ui_layout in deepfuze.globals.ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + ui_layout_module.run(ui) + + +def get_theme() -> gradio.Theme: + return gradio.themes.Base( + primary_hue = gradio.themes.colors.red, + secondary_hue = gradio.themes.colors.neutral, + font = gradio.themes.GoogleFont('Open Sans') + ).set( + background_fill_primary = '*neutral_100', + block_background_fill = 'white', + block_border_width = '0', + block_label_background_fill = '*primary_100', + block_label_background_fill_dark = '*primary_600', + block_label_border_width = 'none', + block_label_margin = '0.5rem', + block_label_radius = '*radius_md', + block_label_text_color = '*primary_500', + block_label_text_color_dark = 'white', + block_label_text_weight = '600', + block_title_background_fill = '*primary_100', + block_title_background_fill_dark = '*primary_600', + block_title_padding = '*block_label_padding', + block_title_radius = '*block_label_radius', + block_title_text_color = '*primary_500', + block_title_text_size = '*text_sm', + block_title_text_weight = '600', + block_padding = '0.5rem', + border_color_primary = 'transparent', + border_color_primary_dark = 'transparent', + button_large_padding = '2rem 0.5rem', + button_large_text_weight = 'normal', + button_primary_background_fill = '*primary_500', + button_primary_text_color = 'white', + button_secondary_background_fill = 'white', + button_secondary_border_color = 'transparent', + button_secondary_border_color_dark = 'transparent', + button_secondary_border_color_hover = 'transparent', + button_secondary_border_color_hover_dark = 'transparent', + button_secondary_text_color = '*neutral_800', + button_small_padding = '0.75rem', + checkbox_background_color = '*neutral_200', + checkbox_background_color_selected = '*primary_600', + checkbox_background_color_selected_dark = '*primary_700', + checkbox_border_color_focus = '*primary_500', + checkbox_border_color_focus_dark = '*primary_600', + checkbox_border_color_selected = '*primary_600', + checkbox_border_color_selected_dark = '*primary_700', + checkbox_label_background_fill = '*neutral_50', + checkbox_label_background_fill_hover = '*neutral_50', + checkbox_label_background_fill_selected = '*primary_500', + checkbox_label_background_fill_selected_dark = '*primary_600', + checkbox_label_text_color_selected = 'white', + input_background_fill = '*neutral_50', + shadow_drop = 'none', + slider_color = '*primary_500', + slider_color_dark = '*primary_600' + ) + + +def get_css() -> str: + fixes_css_path = resolve_relative_path('uis/assets/fixes.css') + overrides_css_path = resolve_relative_path('uis/assets/overrides.css') + return open(fixes_css_path, 'r').read() + open(overrides_css_path, 'r').read() diff --git a/deepfuze/uis/layouts/__pycache__/default.cpython-310.pyc b/deepfuze/uis/layouts/__pycache__/default.cpython-310.pyc new file mode 100644 index 0000000..5f621e5 Binary files /dev/null and b/deepfuze/uis/layouts/__pycache__/default.cpython-310.pyc differ diff --git a/deepfuze/uis/layouts/__pycache__/default.cpython-311.pyc b/deepfuze/uis/layouts/__pycache__/default.cpython-311.pyc new file mode 100644 index 0000000..07a62e0 Binary files /dev/null and b/deepfuze/uis/layouts/__pycache__/default.cpython-311.pyc differ diff --git a/deepfuze/uis/layouts/benchmark.py b/deepfuze/uis/layouts/benchmark.py new file mode 100644 index 0000000..92f6885 --- /dev/null +++ b/deepfuze/uis/layouts/benchmark.py @@ -0,0 +1,67 @@ +import multiprocessing +import gradio + +import deepfuze.globals +from deepfuze.download import conditional_download +from deepfuze.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, benchmark_options, benchmark + + +def pre_check() -> bool: + if not deepfuze.globals.skip_download: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-540p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-720p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1440p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-2160p.mp4' + ]) + return True + return False + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + execution_queue_count.render() + with gradio.Blocks(): + memory.render() + with gradio.Blocks(): + benchmark_options.render() + with gradio.Column(scale = 5): + with gradio.Blocks(): + benchmark.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + execution_queue_count.listen() + memory.listen() + benchmark.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(2, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True, inbrowser = deepfuze.globals.open_browser) diff --git a/deepfuze/uis/layouts/default.py b/deepfuze/uis/layouts/default.py new file mode 100755 index 0000000..c80dc57 --- /dev/null +++ b/deepfuze/uis/layouts/default.py @@ -0,0 +1,81 @@ +import multiprocessing +import gradio + +import deepfuze.globals +from deepfuze.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, temp_frame, output_options, common_options, source, target, output, preview, trim_frame, face_analyser, face_selector, face_masker + + +def pre_check() -> bool: + return True + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + execution_queue_count.render() + with gradio.Blocks(): + memory.render() + with gradio.Blocks(): + temp_frame.render() + with gradio.Blocks(): + output_options.render() + with gradio.Column(scale = 2): + with gradio.Blocks(): + source.render() + with gradio.Blocks(): + target.render() + with gradio.Blocks(): + output.render() + with gradio.Column(scale = 3): + with gradio.Blocks(): + preview.render() + with gradio.Blocks(): + trim_frame.render() + with gradio.Blocks(): + face_selector.render() + with gradio.Blocks(): + face_masker.render() + with gradio.Blocks(): + face_analyser.render() + with gradio.Blocks(): + common_options.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + execution_queue_count.listen() + memory.listen() + temp_frame.listen() + output_options.listen() + source.listen() + target.listen() + output.listen() + preview.listen() + trim_frame.listen() + face_selector.listen() + face_masker.listen() + face_analyser.listen() + common_options.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(8, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True, inbrowser = deepfuze.globals.open_browser) diff --git a/deepfuze/uis/layouts/webcam.py b/deepfuze/uis/layouts/webcam.py new file mode 100644 index 0000000..0809699 --- /dev/null +++ b/deepfuze/uis/layouts/webcam.py @@ -0,0 +1,50 @@ +import multiprocessing +import gradio + +import deepfuze.globals +from deepfuze.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, webcam_options, source, webcam + + +def pre_check() -> bool: + return True + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + with gradio.Blocks(): + webcam_options.render() + with gradio.Blocks(): + source.render() + with gradio.Column(scale = 5): + with gradio.Blocks(): + webcam.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + source.listen() + webcam.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(2, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True, inbrowser = deepfuze.globals.open_browser) diff --git a/deepfuze/uis/overrides.py b/deepfuze/uis/overrides.py new file mode 100644 index 0000000..7f3c470 --- /dev/null +++ b/deepfuze/uis/overrides.py @@ -0,0 +1,13 @@ +from typing import Any +import cv2 +import numpy +import base64 + + +def encode_array_to_base64(array : numpy.ndarray[Any, Any]) -> str: + buffer = cv2.imencode('.jpg', array[:, :, ::-1])[1] + return 'data:image/jpeg;base64,' + base64.b64encode(buffer.tobytes()).decode('utf-8') + + +def encode_pil_to_base64(image : Any) -> str: + return encode_array_to_base64(numpy.asarray(image)[:, :, ::-1]) diff --git a/deepfuze/uis/typing.py b/deepfuze/uis/typing.py new file mode 100644 index 0000000..59d06f5 --- /dev/null +++ b/deepfuze/uis/typing.py @@ -0,0 +1,53 @@ +from typing import Literal, Any, IO +import gradio + +File = IO[Any] +Component = gradio.File or gradio.Image or gradio.Video or gradio.Slider +ComponentName = Literal\ +[ + 'source_audio', + 'source_image', + 'target_image', + 'target_video', + 'preview_frame_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', + 'face_selector_mode_dropdown', + 'reference_face_position_gallery', + 'reference_face_distance_slider', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown', + 'face_detector_model_dropdown', + 'face_detector_size_dropdown', + 'face_detector_score_slider', + 'face_landmarker_score_slider', + 'face_mask_types_checkbox_group', + 'face_mask_blur_slider', + 'face_mask_padding_top_slider', + 'face_mask_padding_bottom_slider', + 'face_mask_padding_left_slider', + 'face_mask_padding_right_slider', + 'face_mask_region_checkbox_group', + 'frame_processors_checkbox_group', + 'face_debugger_items_checkbox_group', + 'face_enhancer_model_dropdown', + 'face_enhancer_blend_slider', + 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', + 'frame_colorizer_blend_slider', + 'frame_colorizer_size_dropdown', + 'frame_enhancer_model_dropdown', + 'frame_enhancer_blend_slider', + 'lip_syncer_model_dropdown', + 'output_path_textbox', + 'output_video_fps_slider', + 'benchmark_runs_checkbox_group', + 'benchmark_cycles_slider', + 'webcam_mode_radio', + 'webcam_resolution_dropdown', + 'webcam_fps_slider' +] + +WebcamMode = Literal['inline', 'udp', 'v4l2'] +StreamMode = Literal['udp', 'v4l2'] diff --git a/deepfuze/vision.py b/deepfuze/vision.py new file mode 100644 index 0000000..117a2ef --- /dev/null +++ b/deepfuze/vision.py @@ -0,0 +1,231 @@ +from typing import Optional, List, Tuple +from functools import lru_cache +import cv2 +import numpy +from cv2.typing import Size + +from deepfuze.common_helper import is_windows +from deepfuze.typing import VisionFrame, Resolution, Fps +from deepfuze.choices import image_template_sizes, video_template_sizes +from deepfuze.filesystem import is_image, is_video, sanitize_path_for_windows + + +@lru_cache(maxsize = 128) +def read_static_image(image_path : str) -> Optional[VisionFrame]: + return read_image(image_path) + + +def read_static_images(image_paths : List[str]) -> Optional[List[VisionFrame]]: + frames = [] + if image_paths: + for image_path in image_paths: + frames.append(read_static_image(image_path)) + return frames + + +def read_image(image_path : str) -> Optional[VisionFrame]: + if is_image(image_path): + if is_windows(): + image_path = sanitize_path_for_windows(image_path) + return cv2.imread(image_path) + return None + + +def write_image(image_path : str, vision_frame : VisionFrame) -> bool: + if image_path: + if is_windows(): + image_path = sanitize_path_for_windows(image_path) + return cv2.imwrite(image_path, vision_frame) + return False + + +def detect_image_resolution(image_path : str) -> Optional[Resolution]: + if is_image(image_path): + image = read_image(image_path) + height, width = image.shape[:2] + return width, height + return None + + +def restrict_image_resolution(image_path : str, resolution : Resolution) -> Resolution: + if is_image(image_path): + image_resolution = detect_image_resolution(image_path) + if image_resolution < resolution: + return image_resolution + return resolution + + +def create_image_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) + for template_size in image_template_sizes: + temp_resolutions.append(normalize_resolution((width * template_size, height * template_size))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions + + +def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: + if is_video(video_path): + if is_windows(): + video_path = sanitize_path_for_windows(video_path) + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) + video_capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1)) + has_vision_frame, vision_frame = video_capture.read() + video_capture.release() + if has_vision_frame: + return vision_frame + return None + + +def count_video_frame_total(video_path : str) -> int: + if is_video(video_path): + if is_windows(): + video_path = sanitize_path_for_windows(video_path) + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + video_frame_total = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) + video_capture.release() + return video_frame_total + return 0 + + +def detect_video_fps(video_path : str) -> Optional[float]: + if is_video(video_path): + if is_windows(): + video_path = sanitize_path_for_windows(video_path) + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + video_fps = video_capture.get(cv2.CAP_PROP_FPS) + video_capture.release() + return video_fps + return None + + +def restrict_video_fps(video_path : str, fps : Fps) -> Fps: + if is_video(video_path): + video_fps = detect_video_fps(video_path) + if video_fps < fps: + return video_fps + return fps + + +def detect_video_resolution(video_path : str) -> Optional[Resolution]: + if is_video(video_path): + if is_windows(): + video_path = sanitize_path_for_windows(video_path) + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) + height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) + video_capture.release() + return int(width), int(height) + return None + + +def restrict_video_resolution(video_path : str, resolution : Resolution) -> Resolution: + if is_video(video_path): + video_resolution = detect_video_resolution(video_path) + if video_resolution < resolution: + return video_resolution + return resolution + + +def create_video_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) + for template_size in video_template_sizes: + if width > height: + temp_resolutions.append(normalize_resolution((template_size * width / height, template_size))) + else: + temp_resolutions.append(normalize_resolution((template_size, template_size * height / width))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions + + +def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: + width, height = resolution + + if width and height: + normalize_width = round(width / 2) * 2 + normalize_height = round(height / 2) * 2 + return normalize_width, normalize_height + return 0, 0 + + +def pack_resolution(resolution : Resolution) -> str: + width, height = normalize_resolution(resolution) + return str(width) + 'x' + str(height) + + +def unpack_resolution(resolution : str) -> Resolution: + width, height = map(int, resolution.split('x')) + return width, height + + +def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame: + height, width = vision_frame.shape[:2] + max_width, max_height = max_resolution + + if height > max_height or width > max_width: + scale = min(max_height / height, max_width / width) + new_width = int(width * scale) + new_height = int(height * scale) + return cv2.resize(vision_frame, (new_width, new_height)) + return vision_frame + + +def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame: + return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB) + + +def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]: + vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0))) + tile_width = size[0] - 2 * size[2] + pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width + pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width + pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0))) + pad_height, pad_width = pad_vision_frame.shape[:2] + row_range = range(size[2], pad_height - size[2], tile_width) + col_range = range(size[2], pad_width - size[2], tile_width) + tile_vision_frames = [] + + for row_vision_frame in row_range: + top = row_vision_frame - size[2] + bottom = row_vision_frame + size[2] + tile_width + for column_vision_frame in col_range: + left = column_vision_frame - size[2] + right = column_vision_frame + size[2] + tile_width + tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :]) + return tile_vision_frames, pad_width, pad_height + + +def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int, temp_height : int, pad_width : int, pad_height : int, size : Size) -> VisionFrame: + merge_vision_frame = numpy.zeros((pad_height, pad_width, 3)).astype(numpy.uint8) + tile_width = tile_vision_frames[0].shape[1] - 2 * size[2] + tiles_per_row = min(pad_width // tile_width, len(tile_vision_frames)) + + for index, tile_vision_frame in enumerate(tile_vision_frames): + tile_vision_frame = tile_vision_frame[size[2]:-size[2], size[2]:-size[2]] + row_index = index // tiles_per_row + col_index = index % tiles_per_row + top = row_index * tile_vision_frame.shape[0] + bottom = top + tile_vision_frame.shape[0] + left = col_index * tile_vision_frame.shape[1] + right = left + tile_vision_frame.shape[1] + merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame + merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :] + return merge_vision_frame diff --git a/deepfuze/voice_extractor.py b/deepfuze/voice_extractor.py new file mode 100644 index 0000000..222d193 --- /dev/null +++ b/deepfuze/voice_extractor.py @@ -0,0 +1,129 @@ +from typing import Any, Tuple +from time import sleep +import scipy +import numpy +import onnxruntime + +import deepfuze.globals +from deepfuze import process_manager +from deepfuze.thread_helper import thread_lock, thread_semaphore +from deepfuze.typing import ModelSet, AudioChunk, Audio +from deepfuze.execution import apply_execution_provider_options +from deepfuze.filesystem import resolve_relative_path, is_file +from deepfuze.download import conditional_download + +VOICE_EXTRACTOR = None +MODELS : ModelSet =\ +{ + 'voice_extractor': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/voice_extractor.onnx', + 'path': resolve_relative_path('../../../models/deepfuze/voice_extractor.onnx') + } +} + + +def get_voice_extractor() -> Any: + global VOICE_EXTRACTOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if VOICE_EXTRACTOR is None: + model_path = MODELS.get('voice_extractor').get('path') + VOICE_EXTRACTOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(deepfuze.globals.execution_device_id, deepfuze.globals.execution_providers)) + return VOICE_EXTRACTOR + + +def clear_voice_extractor() -> None: + global VOICE_EXTRACTOR + + VOICE_EXTRACTOR = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../../../models/deepfuze') + model_url = MODELS.get('voice_extractor').get('url') + model_path = MODELS.get('voice_extractor').get('path') + + if not deepfuze.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def batch_extract_voice(audio : Audio, chunk_size : int, step_size : int) -> Audio: + temp_audio = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + temp_chunk = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + + for start in range(0, audio.shape[0], step_size): + end = min(start + chunk_size, audio.shape[0]) + temp_audio[start:end, ...] += extract_voice(audio[start:end, ...]) + temp_chunk[start:end, ...] += 1 + audio = temp_audio / temp_chunk + return audio + + +def extract_voice(temp_audio_chunk : AudioChunk) -> AudioChunk: + voice_extractor = get_voice_extractor() + chunk_size = 1024 * (voice_extractor.get_inputs()[0].shape[3] - 1) + trim_size = 3840 + temp_audio_chunk, pad_size = prepare_audio_chunk(temp_audio_chunk.T, chunk_size, trim_size) + temp_audio_chunk = decompose_audio_chunk(temp_audio_chunk, trim_size) + with thread_semaphore(): + temp_audio_chunk = voice_extractor.run(None, + { + voice_extractor.get_inputs()[0].name: temp_audio_chunk + })[0] + temp_audio_chunk = compose_audio_chunk(temp_audio_chunk, trim_size) + temp_audio_chunk = normalize_audio_chunk(temp_audio_chunk, chunk_size, trim_size, pad_size) + return temp_audio_chunk + + +def prepare_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int) -> Tuple[AudioChunk, int]: + step_size = chunk_size - 2 * trim_size + pad_size = step_size - temp_audio_chunk.shape[1] % step_size + audio_chunk_size = temp_audio_chunk.shape[1] + pad_size + temp_audio_chunk = temp_audio_chunk.astype(numpy.float32) / numpy.iinfo(numpy.int16).max + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (trim_size, trim_size + pad_size))) + temp_audio_chunks = [] + + for index in range(0, audio_chunk_size, step_size): + temp_audio_chunks.append(temp_audio_chunk[:, index:index + chunk_size]) + temp_audio_chunk = numpy.concatenate(temp_audio_chunks, axis = 0) + temp_audio_chunk = temp_audio_chunk.reshape((-1, chunk_size)) + return temp_audio_chunk, pad_size + + +def decompose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = scipy.signal.stft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[2] + temp_audio_chunk = numpy.stack((numpy.real(temp_audio_chunk), numpy.imag(temp_audio_chunk)), axis = -1).transpose((0, 3, 1, 2)) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, 2, trim_size + 1, voice_extractor_shape[3]).reshape(-1, voice_extractor_shape[1], trim_size + 1, voice_extractor_shape[3]) + temp_audio_chunk = temp_audio_chunk[:, :, :voice_extractor_shape[2]] + temp_audio_chunk /= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def compose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (0, 0), (0, trim_size + 1 - voice_extractor_shape[2]), (0, 0))) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, trim_size + 1, voice_extractor_shape[3]).transpose((0, 2, 3, 1)) + temp_audio_chunk = temp_audio_chunk[:, :, :, 0] + 1j * temp_audio_chunk[:, :, :, 1] + temp_audio_chunk = scipy.signal.istft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[1] + temp_audio_chunk *= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def normalize_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int, pad_size : int) -> AudioChunk: + temp_audio_chunk = temp_audio_chunk.reshape((-1, 2, chunk_size)) + temp_audio_chunk = temp_audio_chunk[:, :, trim_size:-trim_size].transpose(1, 0, 2) + temp_audio_chunk = temp_audio_chunk.reshape(2, -1)[:, :-pad_size].T + return temp_audio_chunk diff --git a/deepfuze/wording.py b/deepfuze/wording.py new file mode 100755 index 0000000..1401e44 --- /dev/null +++ b/deepfuze/wording.py @@ -0,0 +1,220 @@ +from typing import Any, Dict, Optional + +WORDING : Dict[str, Any] =\ +{ + 'conda_not_activated': 'Conda is not activated', + 'python_not_supported': 'Python version is not supported, upgrade to {version} or higher', + 'ffmpeg_not_installed': 'FFMpeg is not installed', + 'creating_temp': 'Creating temporary resources', + 'extracting_frames': 'Extracting frames with a resolution of {resolution} and {fps} frames per second', + 'extracting_frames_succeed': 'Extracting frames succeed', + 'extracting_frames_failed': 'Extracting frames failed', + 'analysing': 'Analysing', + 'processing': 'Processing', + 'downloading': 'Downloading', + 'temp_frames_not_found': 'Temporary frames not found', + 'copying_image': 'Copying image with a resolution of {resolution}', + 'copying_image_succeed': 'Copying image succeed', + 'copying_image_failed': 'Copying image failed', + 'finalizing_image': 'Finalizing image with a resolution of {resolution}', + 'finalizing_image_succeed': 'Finalizing image succeed', + 'finalizing_image_skipped': 'Finalizing image skipped', + 'merging_video': 'Merging video with a resolution of {resolution} and {fps} frames per second', + 'merging_video_succeed': 'Merging video succeed', + 'merging_video_failed': 'Merging video failed', + 'skipping_audio': 'Skipping audio', + 'restoring_audio_succeed': 'Restoring audio succeed', + 'restoring_audio_skipped': 'Restoring audio skipped', + 'clearing_temp': 'Clearing temporary resources', + 'processing_stopped': 'Processing stopped', + 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', + 'processing_image_failed': 'Processing to image failed', + 'processing_video_succeed': 'Processing to video succeed in {seconds} seconds', + 'processing_video_failed': 'Processing to video failed', + 'model_download_not_done': 'Download of the model is not done', + 'model_file_not_present': 'File of the model is not present', + 'select_image_source': 'Select a image for source path', + 'select_audio_source': 'Select a audio for source path', + 'select_video_target': 'Select a video for target path', + 'select_image_or_video_target': 'Select a image or video for target path', + 'select_file_or_directory_output': 'Select a file or directory for output path', + 'no_source_face_detected': 'No source face detected', + 'frame_processor_not_loaded': 'Frame processor {frame_processor} could not be loaded', + 'frame_processor_not_implemented': 'Frame processor {frame_processor} not implemented correctly', + 'ui_layout_not_loaded': 'UI layout {ui_layout} could not be loaded', + 'ui_layout_not_implemented': 'UI layout {ui_layout} not implemented correctly', + 'stream_not_loaded': 'Stream {stream_mode} could not be loaded', + 'point': '.', + 'comma': ',', + 'colon': ':', + 'question_mark': '?', + 'exclamation_mark': '!', + 'help': + { + # installer + 'install_dependency': 'select the variant of {dependency} to install', + 'skip_conda': 'skip the conda environment check', + # general + 'config': 'choose the config file to override defaults', + 'source': 'choose single or multiple source images or audios', + 'target': 'choose single target image or video', + 'output': 'specify the output file or directory', + # misc + 'force_download': 'force automate downloads and exit', + 'skip_download': 'omit automate downloads and remote lookups', + 'headless': 'run the program without a user interface', + 'log_level': 'adjust the message severity displayed in the terminal', + # execution + 'execution_device_id': 'specify the device used for processing', + 'execution_providers': 'accelerate the model inference using different providers (choices: {choices}, ...)', + 'execution_thread_count': 'specify the amount of parallel threads while processing', + 'execution_queue_count': 'specify the amount of frames each thread is processing', + # memory + 'video_memory_strategy': 'balance fast frame processing and low VRAM usage', + 'system_memory_limit': 'limit the available RAM that can be used while processing', + # face analyser + 'face_analyser_order': 'specify the order in which the face analyser detects faces', + 'face_analyser_age': 'filter the detected faces based on their age', + 'face_analyser_gender': 'filter the detected faces based on their gender', + 'face_detector_model': 'choose the model responsible for detecting the face', + 'face_detector_size': 'specify the size of the frame provided to the face detector', + 'face_detector_score': 'filter the detected faces base on the confidence score', + 'face_landmarker_score': 'filter the detected landmarks base on the confidence score', + # face selector + 'face_selector_mode': 'use reference based tracking or simple matching', + 'reference_face_position': 'specify the position used to create the reference face', + 'reference_face_distance': 'specify the desired similarity between the reference face and target face', + 'reference_frame_number': 'specify the frame used to create the reference face', + # face mask + 'face_mask_types': 'mix and match different face mask types (choices: {choices})', + 'face_mask_blur': 'specify the degree of blur applied the box mask', + 'face_mask_padding': 'apply top, right, bottom and left padding to the box mask', + 'face_mask_regions': 'choose the facial features used for the region mask (choices: {choices})', + # frame extraction + 'trim_frame_start': 'specify the the start frame of the target video', + 'trim_frame_end': 'specify the the end frame of the target video', + 'temp_frame_format': 'specify the temporary resources format', + 'keep_temp': 'keep the temporary resources after processing', + # output creation + 'output_image_quality': 'specify the image quality which translates to the compression factor', + 'output_image_resolution': 'specify the image output resolution based on the target image', + 'output_video_encoder': 'specify the encoder use for the video compression', + 'output_video_preset': 'balance fast video processing and video file size', + 'output_video_quality': 'specify the video quality which translates to the compression factor', + 'output_video_resolution': 'specify the video output resolution based on the target video', + 'output_video_fps': 'specify the video output fps based on the target video', + 'skip_audio': 'omit the audio from the target video', + # frame processors + 'frame_processors': 'load a single or multiple frame processors. (choices: {choices}, ...)', + 'face_debugger_items': 'load a single or multiple frame processors (choices: {choices})', + 'face_enhancer_model': 'choose the model responsible for enhancing the face', + 'face_enhancer_blend': 'blend the enhanced into the previous face', + 'face_swapper_model': 'choose the model responsible for swapping the face', + 'frame_colorizer_model': 'choose the model responsible for colorizing the frame', + 'frame_colorizer_blend': 'blend the colorized into the previous frame', + 'frame_colorizer_size': 'specify the size of the frame provided to the frame colorizer', + 'frame_enhancer_model': 'choose the model responsible for enhancing the frame', + 'frame_enhancer_blend': 'blend the enhanced into the previous frame', + 'lip_syncer_model': 'choose the model responsible for syncing the lips', + # uis + 'open_browser': 'open the browser once the program is ready', + 'ui_layouts': 'launch a single or multiple UI layouts (choices: {choices}, ...)' + }, + 'uis': + { + # general + 'start_button': 'START', + 'stop_button': 'STOP', + 'clear_button': 'CLEAR', + # about + 'donate_button': 'DONATE', + # benchmark + 'benchmark_results_dataframe': 'BENCHMARK RESULTS', + # benchmark options + 'benchmark_runs_checkbox_group': 'BENCHMARK RUNS', + 'benchmark_cycles_slider': 'BENCHMARK CYCLES', + # common options + 'common_options_checkbox_group': 'OPTIONS', + # execution + 'execution_providers_checkbox_group': 'EXECUTION PROVIDERS', + # execution queue count + 'execution_queue_count_slider': 'EXECUTION QUEUE COUNT', + # execution thread count + 'execution_thread_count_slider': 'EXECUTION THREAD COUNT', + # face analyser + 'face_analyser_order_dropdown': 'FACE ANALYSER ORDER', + 'face_analyser_age_dropdown': 'FACE ANALYSER AGE', + 'face_analyser_gender_dropdown': 'FACE ANALYSER GENDER', + 'face_detector_model_dropdown': 'FACE DETECTOR MODEL', + 'face_detector_size_dropdown': 'FACE DETECTOR SIZE', + 'face_detector_score_slider': 'FACE DETECTOR SCORE', + 'face_landmarker_score_slider': 'FACE LANDMARKER SCORE', + # face masker + 'face_mask_types_checkbox_group': 'FACE MASK TYPES', + 'face_mask_blur_slider': 'FACE MASK BLUR', + 'face_mask_padding_top_slider': 'FACE MASK PADDING TOP', + 'face_mask_padding_right_slider': 'FACE MASK PADDING RIGHT', + 'face_mask_padding_bottom_slider': 'FACE MASK PADDING BOTTOM', + 'face_mask_padding_left_slider': 'FACE MASK PADDING LEFT', + 'face_mask_region_checkbox_group': 'FACE MASK REGIONS', + # face selector + 'face_selector_mode_dropdown': 'FACE SELECTOR MODE', + 'reference_face_gallery': 'REFERENCE FACE', + 'reference_face_distance_slider': 'REFERENCE FACE DISTANCE', + # frame processors + 'frame_processors_checkbox_group': 'FRAME PROCESSORS', + # frame processors options + 'face_debugger_items_checkbox_group': 'FACE DEBUGGER ITEMS', + 'face_enhancer_model_dropdown': 'FACE ENHANCER MODEL', + 'face_enhancer_blend_slider': 'FACE ENHANCER BLEND', + 'face_swapper_model_dropdown': 'FACE SWAPPER MODEL', + 'frame_colorizer_model_dropdown': 'FRAME COLORIZER MODEL', + 'frame_colorizer_blend_slider': 'FRAME COLORIZER BLEND', + 'frame_colorizer_size_dropdown': 'FRAME COLORIZER SIZE', + 'frame_enhancer_model_dropdown': 'FRAME ENHANCER MODEL', + 'frame_enhancer_blend_slider': 'FRAME ENHANCER BLEND', + 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', + # memory + 'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY', + 'system_memory_limit_slider': 'SYSTEM MEMORY LIMIT', + # output + 'output_image_or_video': 'OUTPUT', + # output options + 'output_path_textbox': 'OUTPUT PATH', + 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', + 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', + 'output_video_encoder_dropdown': 'OUTPUT VIDEO ENCODER', + 'output_video_preset_dropdown': 'OUTPUT VIDEO PRESET', + 'output_video_quality_slider': 'OUTPUT VIDEO QUALITY', + 'output_video_resolution_dropdown': 'OUTPUT VIDEO RESOLUTION', + 'output_video_fps_slider': 'OUTPUT VIDEO FPS', + # preview + 'preview_image': 'PREVIEW', + 'preview_frame_slider': 'PREVIEW FRAME', + # source + 'source_file': 'SOURCE', + # target + 'target_file': 'TARGET', + # temp frame + 'temp_frame_format_dropdown': 'TEMP FRAME FORMAT', + # trim frame + 'trim_frame_start_slider': 'TRIM FRAME START', + 'trim_frame_end_slider': 'TRIM FRAME END', + # webcam + 'webcam_image': 'WEBCAM', + # webcam options + 'webcam_mode_radio': 'WEBCAM MODE', + 'webcam_resolution_dropdown': 'WEBCAM RESOLUTION', + 'webcam_fps_slider': 'WEBCAM FPS' + } +} + + +def get(key : str) -> Optional[str]: + if '.' in key: + section, name = key.split('.') + if section in WORDING and name in WORDING[section]: + return WORDING[section][name] + if key in WORDING: + return WORDING[key] + return None diff --git a/install.py b/install.py new file mode 100755 index 0000000..9569a1f --- /dev/null +++ b/install.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import os +import subprocess + +os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' +subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) + +from deepfuze import installer + +if __name__ == '__main__': + installer.cli() diff --git a/llm_node.py b/llm_node.py new file mode 100644 index 0000000..9082295 --- /dev/null +++ b/llm_node.py @@ -0,0 +1,39 @@ +from openai import OpenAI + +class LLM_node: + + @classmethod + def INPUT_TYPES(self): + return { + "required":{ + "system_prompt" : ("STRING",{"default":"","multiline": True,},), + "user_query": ("STRING", {"default":"","multiline": True,},), + "model_name": (["gpt-3.5-turbo","gpt-4o","gpt-4-turbo","gpt-4",""],), + "api_key": ("STRING",{"default":""},) + }, + "optional":{ + "max_tokens" : ("INT", {"default":250,"min":10,"max":2000,"step":10},), + "temperature" : ("FLOAT", {"default":0,"min":0,"max":1,"step":0.1}), + "timeout": ("INT", {"default":10,"min":1,"max":200,"step":1},), + } + } + + CATEGORY = "LLM" + RETURN_TYPES = ("STRING",) + RETURN_NAMES = ("text",) + FUNCTION = "run_llm" + + def run_llm(self, system_prompt, user_query, model_name,temperature,api_key,max_tokens,timeout): + client = OpenAI(api_key=api_key) + response = client.chat.completions.create( + model=model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_query} + ], + # stop = stop.split(","), + temperature=temperature, + max_tokens=max_tokens, + timeout=timeout + ) + return (text,) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..64218bc --- /dev/null +++ b/mypy.ini @@ -0,0 +1,7 @@ +[mypy] +check_untyped_defs = True +disallow_any_generics = True +disallow_untyped_calls = True +disallow_untyped_defs = True +ignore_missing_imports = True +strict_optional = False diff --git a/nodes.py b/nodes.py new file mode 100644 index 0000000..b7764a3 --- /dev/null +++ b/nodes.py @@ -0,0 +1,1110 @@ + +import os +import sys +import json +import subprocess +import numpy as np +import re +import cv2 +import time +import itertools +import numpy as np +import datetime +from typing import List +import torch +import psutil + +from PIL import Image, ExifTags +from PIL.PngImagePlugin import PngInfo +from pathlib import Path +from string import Template +from pydub import AudioSegment +from .utils import BIGMAX, DIMMAX, calculate_file_hash, get_sorted_dir_files_from_directory, get_audio, lazy_eval, hash_path, validate_path, strip_path +from PIL import Image, ImageOps +from comfy.utils import common_upscale, ProgressBar + +from scipy.io.wavfile import write +import folder_paths +from .utils import ffmpeg_path, get_audio, hash_path, validate_path, requeue_workflow, gifski_path, calculate_file_hash, strip_path +from comfy.utils import ProgressBar +from .utils import BIGMAX, DIMMAX, calculate_file_hash, get_sorted_dir_files_from_directory, get_audio, lazy_eval, hash_path, validate_path, strip_path +from .llm_node import LLM_node +from .audio_playback import PlayBackAudio + + +# folder_paths.folder_names_and_paths["VHS_video_formats"] = ( +# [ +# os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "video_formats"), +# ], +# [".json"] +# ) + +result_dir = os.path.join(folder_paths.get_output_directory(),"deepfuze") +audio_dir = os.path.join(folder_paths.get_input_directory(),"audio") + +try: + os.makedirs(result_dir) +except: pass +try: + os.makedirs(audio_dir) +except: pass +audio_extensions = ['mp3', 'mp4', 'wav', 'ogg'] + + + +video_extensions = ['webm', 'mp4', 'mkv', 'gif'] + + +def is_gif(filename) -> bool: + file_parts = filename.split('.') + return len(file_parts) > 1 and file_parts[-1] == "gif" + + +def target_size(width, height, force_size, custom_width, custom_height) -> tuple[int, int]: + if force_size == "Custom": + return (custom_width, custom_height) + elif force_size == "Custom Height": + force_size = "?x"+str(custom_height) + elif force_size == "Custom Width": + force_size = str(custom_width)+"x?" + + if force_size != "Disabled": + force_size = force_size.split("x") + if force_size[0] == "?": + width = (width*int(force_size[1]))//height + #Limit to a multple of 8 for latent conversion + width = int(width)+4 & ~7 + height = int(force_size[1]) + elif force_size[1] == "?": + height = (height*int(force_size[0]))//width + height = int(height)+4 & ~7 + width = int(force_size[0]) + else: + width = int(force_size[0]) + height = int(force_size[1]) + return (width, height) + +def cv_frame_generator(video, force_rate, frame_load_cap, skip_first_frames, + select_every_nth, meta_batch=None, unique_id=None): + video_cap = cv2.VideoCapture(strip_path(video)) + if not video_cap.isOpened(): + raise ValueError(f"{video} could not be loaded with cv.") + pbar = ProgressBar(frame_load_cap) if frame_load_cap > 0 else None + + # extract video metadata + fps = video_cap.get(cv2.CAP_PROP_FPS) + width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + total_frames = int(video_cap.get(cv2.CAP_PROP_FRAME_COUNT)) + duration = total_frames / fps + + # set video_cap to look at start_index frame + total_frame_count = 0 + total_frames_evaluated = -1 + frames_added = 0 + base_frame_time = 1 / fps + prev_frame = None + + if force_rate == 0: + target_frame_time = base_frame_time + else: + target_frame_time = 1/force_rate + + yield (width, height, fps, duration, total_frames, target_frame_time) + + time_offset=target_frame_time - base_frame_time + while video_cap.isOpened(): + if time_offset < target_frame_time: + is_returned = video_cap.grab() + # if didn't return frame, video has ended + if not is_returned: + break + time_offset += base_frame_time + if time_offset < target_frame_time: + continue + time_offset -= target_frame_time + # if not at start_index, skip doing anything with frame + total_frame_count += 1 + if total_frame_count <= skip_first_frames: + continue + else: + total_frames_evaluated += 1 + + # if should not be selected, skip doing anything with frame + if total_frames_evaluated%select_every_nth != 0: + continue + + # opencv loads images in BGR format (yuck), so need to convert to RGB for ComfyUI use + # follow up: can videos ever have an alpha channel? + # To my testing: No. opencv has no support for alpha + unused, frame = video_cap.retrieve() + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # convert frame to comfyui's expected format + # TODO: frame contains no exif information. Check if opencv2 has already applied + frame = np.array(frame, dtype=np.float32) + torch.from_numpy(frame).div_(255) + if prev_frame is not None: + inp = yield prev_frame + if inp is not None: + #ensure the finally block is called + return + prev_frame = frame + frames_added += 1 + if pbar is not None: + pbar.update_absolute(frames_added, frame_load_cap) + # if cap exists and we've reached it, stop processing frames + if frame_load_cap > 0 and frames_added >= frame_load_cap: + break + if meta_batch is not None: + meta_batch.inputs.pop(unique_id) + meta_batch.has_closed_inputs = True + if prev_frame is not None: + yield prev_frame + +def load_video_cv(video: str, force_rate: int, force_size: str, + custom_width: int,custom_height: int, frame_load_cap: int, + skip_first_frames: int, select_every_nth: int, + meta_batch=None, unique_id=None, memory_limit_mb=None): + print(meta_batch) + if meta_batch is None or unique_id not in meta_batch.inputs: + gen = cv_frame_generator(video, force_rate, frame_load_cap, skip_first_frames, + select_every_nth, meta_batch, unique_id) + (width, height, fps, duration, total_frames, target_frame_time) = next(gen) + + if meta_batch is not None: + meta_batch.inputs[unique_id] = (gen, width, height, fps, duration, total_frames, target_frame_time) + + else: + (gen, width, height, fps, duration, total_frames, target_frame_time) = meta_batch.inputs[unique_id] + + if memory_limit_mb is not None: + memory_limit *= 2 ** 20 + else: + #TODO: verify if garbage collection should be performed here. + #leaves ~128 MB unreserved for safety + memory_limit = (psutil.virtual_memory().available + psutil.swap_memory().free) - 2 ** 27 + #space required to load as f32, exist as latent with wiggle room, decode to f32 + max_loadable_frames = int(memory_limit//(width*height*3*(4+4+1/10))) + if meta_batch is not None: + if meta_batch.frames_per_batch > max_loadable_frames: + raise RuntimeError(f"Meta Batch set to {meta_batch.frames_per_batch} frames but only {max_loadable_frames} can fit in memory") + gen = itertools.islice(gen, meta_batch.frames_per_batch) + else: + original_gen = gen + gen = itertools.islice(gen, max_loadable_frames) + + #Some minor wizardry to eliminate a copy and reduce max memory by a factor of ~2 + images = torch.from_numpy(np.fromiter(gen, np.dtype((np.float32, (height, width, 3))))) + if meta_batch is None: + try: + next(original_gen) + raise RuntimeError(f"Memory limit hit after loading {len(images)} frames. Stopping execution.") + except StopIteration: + pass + if len(images) == 0: + raise RuntimeError("No frames generated") + if force_size != "Disabled": + new_size = target_size(width, height, force_size, custom_width, custom_height) + if new_size[0] != width or new_size[1] != height: + s = images.movedim(-1,1) + s = common_upscale(s, new_size[0], new_size[1], "lanczos", "center") + images = s.movedim(1,-1) + + #Setup lambda for lazy audio capture + audio = lambda : get_audio(video, skip_first_frames * target_frame_time, + frame_load_cap*target_frame_time*select_every_nth) + #Adjust target_frame_time for select_every_nth + target_frame_time *= select_every_nth + video_info = { + "source_fps": fps, + "source_frame_count": total_frames, + "source_duration": duration, + "source_width": width, + "source_height": height, + "loaded_fps": 1/target_frame_time, + "loaded_frame_count": len(images), + "loaded_duration": len(images) * target_frame_time, + "loaded_width": images.shape[2], + "loaded_height": images.shape[1], + } + print("images", type(images)) + return (images, len(images), lazy_eval(audio), video_info) + + + +class AudioData: + def __init__(self, audio_file) -> None: + + # Extract the sample rate + sample_rate = audio_file.frame_rate + + # Get the number of audio channels + num_channels = audio_file.channels + + # Extract the audio data as a NumPy array + audio_data = np.array(audio_file.get_array_of_samples()) + self.audio_data = audio_data + self.sample_rate = sample_rate + self.num_channels = num_channels + + def get_channel_audio_data(self, channel: int): + if channel < 0 or channel >= self.num_channels: + raise IndexError(f"Channel '{channel}' out of range. total channels is '{self.num_channels}'.") + return self.audio_data[channel::self.num_channels] + + def get_channel_fft(self, channel: int): + audio_data = self.get_channel_audio_data(channel) + return fft(audio_data) + + +def gen_format_widgets(video_format): + for k in video_format: + if k.endswith("_pass"): + for i in range(len(video_format[k])): + if isinstance(video_format[k][i], list): + item = [video_format[k][i]] + yield item + video_format[k][i] = item[0] + else: + if isinstance(video_format[k], list): + item = [video_format[k]] + yield item + video_format[k] = item[0] + +def get_video_formats(): + formats = [] + for format_name in folder_paths.get_filename_list("VHS_video_formats"): + format_name = format_name[:-5] + video_format_path = folder_paths.get_full_path("VHS_video_formats", format_name + ".json") + with open(video_format_path, 'r') as stream: + video_format = json.load(stream) + if "gifski_pass" in video_format and gifski_path is None: + #Skip format + continue + widgets = [w[0] for w in gen_format_widgets(video_format)] + if (len(widgets) > 0): + formats.append(["video/" + format_name, widgets]) + else: + formats.append("video/" + format_name) + return formats + +def get_format_widget_defaults(format_name): + video_format_path = folder_paths.get_full_path("VHS_video_formats", format_name + ".json") + with open(video_format_path, 'r') as stream: + video_format = json.load(stream) + results = {} + for w in gen_format_widgets(video_format): + if len(w[0]) > 2 and 'default' in w[0][2]: + default = w[0][2]['default'] + else: + if type(w[0][1]) is list: + default = w[0][1][0] + else: + #NOTE: This doesn't respect max/min, but should be good enough as a fallback to a fallback to a fallback + default = {"BOOLEAN": False, "INT": 0, "FLOAT": 0, "STRING": ""}[w[0][1]] + results[w[0][0]] = default + return results + + +def apply_format_widgets(format_name, kwargs): + video_format_path = folder_paths.get_full_path("VHS_video_formats", format_name + ".json") + print(video_format_path) + with open(video_format_path, 'r') as stream: + video_format = json.load(stream) + for w in gen_format_widgets(video_format): + print(w[0][0]) + assert(w[0][0] in kwargs) + if len(w[0]) > 3: + w[0] = Template(w[0][3]).substitute(val=kwargs[w[0][0]]) + else: + w[0] = str(kwargs[w[0][0]]) + return video_format + +def tensor_to_int(tensor, bits): + #TODO: investigate benefit of rounding by adding 0.5 before clip/cast + tensor = tensor.cpu().numpy() * (2**bits-1) + return np.clip(tensor, 0, (2**bits-1)) +def tensor_to_shorts(tensor): + return tensor_to_int(tensor, 16).astype(np.uint16) +def tensor_to_bytes(tensor): + return tensor_to_int(tensor, 8).astype(np.uint8) + +def ffmpeg_process(args, video_format, video_metadata, file_path, env): + + res = None + frame_data = yield + total_frames_output = 0 + if video_format.get('save_metadata', 'False') != 'False': + os.makedirs(folder_paths.get_temp_directory(), exist_ok=True) + metadata = json.dumps(video_metadata) + metadata_path = os.path.join(folder_paths.get_temp_directory(), "metadata.txt") + #metadata from file should escape = ; # \ and newline + metadata = metadata.replace("\\","\\\\") + metadata = metadata.replace(";","\\;") + metadata = metadata.replace("#","\\#") + metadata = metadata.replace("=","\\=") + metadata = metadata.replace("\n","\\\n") + metadata = "comment=" + metadata + with open(metadata_path, "w") as f: + f.write(";FFMETADATA1\n") + f.write(metadata) + m_args = args[:1] + ["-i", metadata_path] + args[1:] + ["-metadata", "creation_time=now"] + with subprocess.Popen(m_args + [file_path], stderr=subprocess.PIPE, + stdin=subprocess.PIPE, env=env) as proc: + try: + while frame_data is not None: + proc.stdin.write(frame_data) + #TODO: skip flush for increased speed + frame_data = yield + total_frames_output+=1 + proc.stdin.flush() + proc.stdin.close() + res = proc.stderr.read() + except BrokenPipeError as e: + err = proc.stderr.read() + #Check if output file exists. If it does, the re-execution + #will also fail. This obscures the cause of the error + #and seems to never occur concurrent to the metadata issue + if os.path.exists(file_path): + raise Exception("An error occurred in the ffmpeg subprocess:\n" \ + + err.decode("utf-8")) + #Res was not set + print(err.decode("utf-8"), end="", file=sys.stderr) + print("An error occurred when saving with metadata") + if res != b'': + with subprocess.Popen(args + [file_path], stderr=subprocess.PIPE, + stdin=subprocess.PIPE, env=env) as proc: + try: + while frame_data is not None: + proc.stdin.write(frame_data) + frame_data = yield + total_frames_output+=1 + proc.stdin.flush() + proc.stdin.close() + res = proc.stderr.read() + except BrokenPipeError as e: + res = proc.stderr.read() + raise Exception("An error occurred in the ffmpeg subprocess:\n" \ + + res.decode("utf-8")) + yield total_frames_output + if len(res) > 0: + print(res.decode("utf-8"), end="", file=sys.stderr) + +def gifski_process(args, video_format, file_path, env): + frame_data = yield + with subprocess.Popen(args + video_format['main_pass'] + ['-f', 'yuv4mpegpipe', '-'], + stderr=subprocess.PIPE, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, env=env) as procff: + with subprocess.Popen([gifski_path] + video_format['gifski_pass'] + + ['-q', '-o', file_path, '-'], stderr=subprocess.PIPE, + stdin=procff.stdout, stdout=subprocess.PIPE, + env=env) as procgs: + try: + while frame_data is not None: + procff.stdin.write(frame_data) + frame_data = yield + procff.stdin.flush() + procff.stdin.close() + resff = procff.stderr.read() + resgs = procgs.stderr.read() + outgs = procgs.stdout.read() + except BrokenPipeError as e: + procff.stdin.close() + resff = procff.stderr.read() + resgs = procgs.stderr.read() + raise Exception("An error occurred while creating gifski output\n" \ + + "Make sure you are using gifski --version >=1.32.0\nffmpeg: " \ + + resff.decode("utf-8") + '\ngifski: ' + resgs.decode("utf-8")) + if len(resff) > 0: + print(resff.decode("utf-8"), end="", file=sys.stderr) + if len(resgs) > 0: + print(resgs.decode("utf-8"), end="", file=sys.stderr) + #should always be empty as the quiet flag is passed + if len(outgs) > 0: + print(outgs.decode("utf-8")) + +def to_pingpong(inp): + if not hasattr(inp, "__getitem__"): + inp = list(inp) + yield from inp + for i in range(len(inp)-2,0,-1): + yield inp[i] + + +video_extensions = ['webm', 'mp4', 'mkv', 'gif'] + + +def is_gif(filename) -> bool: + file_parts = filename.split('.') + return len(file_parts) > 1 and file_parts[-1] == "gif" + + +def target_size(width, height, force_size, custom_width, custom_height) -> tuple[int, int]: + if force_size == "Custom": + return (custom_width, custom_height) + elif force_size == "Custom Height": + force_size = "?x"+str(custom_height) + elif force_size == "Custom Width": + force_size = str(custom_width)+"x?" + + if force_size != "Disabled": + force_size = force_size.split("x") + if force_size[0] == "?": + width = (width*int(force_size[1]))//height + #Limit to a multple of 8 for latent conversion + width = int(width)+4 & ~7 + height = int(force_size[1]) + elif force_size[1] == "?": + height = (height*int(force_size[0]))//width + height = int(height)+4 & ~7 + width = int(force_size[0]) + else: + width = int(force_size[0]) + height = int(force_size[1]) + return (width, height) + +def cv_frame_generator(video, force_rate, frame_load_cap, skip_first_frames, + select_every_nth, meta_batch=None, unique_id=None): + video_cap = cv2.VideoCapture(strip_path(video)) + if not video_cap.isOpened(): + raise ValueError(f"{video} could not be loaded with cv.") + pbar = ProgressBar(frame_load_cap) if frame_load_cap > 0 else None + + # extract video metadata + fps = video_cap.get(cv2.CAP_PROP_FPS) + width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + total_frames = int(video_cap.get(cv2.CAP_PROP_FRAME_COUNT)) + duration = total_frames / fps + + # set video_cap to look at start_index frame + total_frame_count = 0 + total_frames_evaluated = -1 + frames_added = 0 + base_frame_time = 1 / fps + prev_frame = None + + if force_rate == 0: + target_frame_time = base_frame_time + else: + target_frame_time = 1/force_rate + + yield (width, height, fps, duration, total_frames, target_frame_time) + + time_offset=target_frame_time - base_frame_time + while video_cap.isOpened(): + if time_offset < target_frame_time: + is_returned = video_cap.grab() + # if didn't return frame, video has ended + if not is_returned: + break + time_offset += base_frame_time + if time_offset < target_frame_time: + continue + time_offset -= target_frame_time + # if not at start_index, skip doing anything with frame + total_frame_count += 1 + if total_frame_count <= skip_first_frames: + continue + else: + total_frames_evaluated += 1 + + # if should not be selected, skip doing anything with frame + if total_frames_evaluated%select_every_nth != 0: + continue + + # opencv loads images in BGR format (yuck), so need to convert to RGB for ComfyUI use + # follow up: can videos ever have an alpha channel? + # To my testing: No. opencv has no support for alpha + unused, frame = video_cap.retrieve() + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # convert frame to comfyui's expected format + # TODO: frame contains no exif information. Check if opencv2 has already applied + frame = np.array(frame, dtype=np.float32) + torch.from_numpy(frame).div_(255) + if prev_frame is not None: + inp = yield prev_frame + if inp is not None: + #ensure the finally block is called + return + prev_frame = frame + frames_added += 1 + if pbar is not None: + pbar.update_absolute(frames_added, frame_load_cap) + # if cap exists and we've reached it, stop processing frames + if frame_load_cap > 0 and frames_added >= frame_load_cap: + break + if meta_batch is not None: + meta_batch.inputs.pop(unique_id) + meta_batch.has_closed_inputs = True + if prev_frame is not None: + yield prev_frame + +def load_video_cv(video: str, force_rate: int, force_size: str, + custom_width: int,custom_height: int, frame_load_cap: int, + skip_first_frames: int, select_every_nth: int, + meta_batch=None, unique_id=None, memory_limit_mb=None): + print(meta_batch) + if meta_batch is None or unique_id not in meta_batch.inputs: + gen = cv_frame_generator(video, force_rate, frame_load_cap, skip_first_frames, + select_every_nth, meta_batch, unique_id) + (width, height, fps, duration, total_frames, target_frame_time) = next(gen) + + if meta_batch is not None: + meta_batch.inputs[unique_id] = (gen, width, height, fps, duration, total_frames, target_frame_time) + + else: + (gen, width, height, fps, duration, total_frames, target_frame_time) = meta_batch.inputs[unique_id] + + if memory_limit_mb is not None: + memory_limit *= 2 ** 20 + else: + #TODO: verify if garbage collection should be performed here. + #leaves ~128 MB unreserved for safety + memory_limit = (psutil.virtual_memory().available + psutil.swap_memory().free) - 2 ** 27 + #space required to load as f32, exist as latent with wiggle room, decode to f32 + max_loadable_frames = int(memory_limit//(width*height*3*(4+4+1/10))) + if meta_batch is not None: + if meta_batch.frames_per_batch > max_loadable_frames: + raise RuntimeError(f"Meta Batch set to {meta_batch.frames_per_batch} frames but only {max_loadable_frames} can fit in memory") + gen = itertools.islice(gen, meta_batch.frames_per_batch) + else: + original_gen = gen + gen = itertools.islice(gen, max_loadable_frames) + + #Some minor wizardry to eliminate a copy and reduce max memory by a factor of ~2 + images = torch.from_numpy(np.fromiter(gen, np.dtype((np.float32, (height, width, 3))))) + if meta_batch is None: + try: + next(original_gen) + raise RuntimeError(f"Memory limit hit after loading {len(images)} frames. Stopping execution.") + except StopIteration: + pass + if len(images) == 0: + raise RuntimeError("No frames generated") + if force_size != "Disabled": + new_size = target_size(width, height, force_size, custom_width, custom_height) + if new_size[0] != width or new_size[1] != height: + s = images.movedim(-1,1) + s = common_upscale(s, new_size[0], new_size[1], "lanczos", "center") + images = s.movedim(1,-1) + + #Setup lambda for lazy audio capture + audio = lambda : get_audio(video, skip_first_frames * target_frame_time, + frame_load_cap*target_frame_time*select_every_nth) + #Adjust target_frame_time for select_every_nth + target_frame_time *= select_every_nth + video_info = { + "source_fps": fps, + "source_frame_count": total_frames, + "source_duration": duration, + "source_width": width, + "source_height": height, + "loaded_fps": 1/target_frame_time, + "loaded_frame_count": len(images), + "loaded_duration": len(images) * target_frame_time, + "loaded_width": images.shape[2], + "loaded_height": images.shape[1], + } + print("images", type(images)) + return (images, len(images), lazy_eval(audio), video_info) + + + + + + +class DeepFuzeAdavance: + @classmethod + def INPUT_TYPES(s): + ffmpeg_formats = get_video_formats() + return { + "required": { + "images": ("IMAGE",), + "audio": ("AUDIO",), + "enhancer": ("None,codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,gpen_bfr_1024,gpen_bfr_2048,restoreformer_plus_plus".split(","),{"default":'None'}), + "frame_enhancer": ("None,clear_reality_x4,lsdir_x4,nomos8k_sc_x4,real_esrgan_x2,real_esrgan_x2_fp16,real_esrgan_x4,real_esrgan_x4_fp16,real_hatgan_x4,span_kendata_x4,ultra_sharp_x4".split(","),{"default":'None'}), + "face_mask_padding_left": ("FLOAT",{"default":0,"min":0,"max":3,"step":0.1}), + "face_mask_padding_right": ("FLOAT",{"default":0,"min":0,"max":3,"step":0.1}), + "face_mask_padding_bottom": ("FLOAT",{"default":0,"min":0,"max":3,"step":0.1}), + "face_mask_padding_top": ("FLOAT",{"default":0,"min":0,"max":3,"step":0.1}), + "trim_frame_start": ("INT",{"default":0,"max":2000},), + "trim_frame_end": ("INT",{"default":0,"max":2000},), + "device" : (["cpu","gpu"],{"default":"cpu"}), + "frame_rate": ( + "FLOAT", + {"default": 25, "min": 1, "step": 1}, + ), + + }, + "optional": { + "meta_batch": ("VHS_BatchManager",), + "loop_count": ("INT", {"default": 0, "min": 0, "max": 100, "step": 1}), + "filename_prefix": ("STRING", {"default": "deepfuze"}), + "pingpong": ("BOOLEAN", {"default": False}), + "save_output": ("BOOLEAN", {"default": True}), + }, + "hidden": { + "prompt": "PROMPT", + "format": (["image/gif", "image/webp"] + ffmpeg_formats,{"default":"video/h265-mp4"}), + "extra_pnginfo": "EXTRA_PNGINFO", + "unique_id": "UNIQUE_ID" + }, + } + + + RETURN_TYPES = ("IMAGE", "INT", "VHS_AUDIO", "VHS_VIDEOINFO",) + RETURN_NAMES = ("IMAGE", "frame_count", "audio", "video_info",) + + # RETURN_TYPES = ("VHS_FILENAMES",) + # RETURN_NAMES = ("Filenames",) + # OUTPUT_NODE = True + CATEGORY = "DeepFuze" + FUNCTION = "lipsyncgenerate" + + def lipsyncgenerate( + self, + images, + audio, + enhancer, + frame_enhancer, + face_mask_padding_left, + face_mask_padding_right, + face_mask_padding_bottom, + face_mask_padding_top, + trim_frame_start, + trim_frame_end, + device, + frame_rate: int, + loop_count: int, + filename_prefix="deepfuze", + format="video/h265-mp4", + pingpong=False, + save_output=True, + prompt=None, + extra_pnginfo=None, + unique_id=None, + manual_format_widgets=None, + meta_batch=None + ): + print(enhancer,frame_rate,format) + if isinstance(images, torch.Tensor) and images.size(0) == 0: + return ("",) + pbar = ProgressBar(len(images)) + trim_frame_end = len(images)-trim_frame_end + + first_image = images[0] + # get output information + output_dir = ( + folder_paths.get_output_directory() + if save_output + else folder_paths.get_temp_directory() + ) + ( + full_output_folder, + filename, + _, + subfolder, + _, + ) = folder_paths.get_save_image_path(filename_prefix, output_dir) + output_files = [] + + metadata = PngInfo() + video_metadata = {} + if prompt is not None: + metadata.add_text("prompt", json.dumps(prompt)) + video_metadata["prompt"] = prompt + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata.add_text(x, json.dumps(extra_pnginfo[x])) + video_metadata[x] = extra_pnginfo[x] + metadata.add_text("CreationTime", datetime.datetime.now().isoformat(" ")[:19]) + + if meta_batch is not None and unique_id in meta_batch.outputs: + (counter, output_process) = meta_batch.outputs[unique_id] + else: + # comfy counter workaround + max_counter = 0 + + # Loop through the existing files + matcher = re.compile(f"{re.escape(filename)}_(\\d+)\\D*\\..+", re.IGNORECASE) + for existing_file in os.listdir(full_output_folder): + # Check if the file matches the expected format + match = matcher.fullmatch(existing_file) + if match: + # Extract the numeric portion of the filename + file_counter = int(match.group(1)) + # Update the maximum counter value if necessary + if file_counter > max_counter: + max_counter = file_counter + + # Increment the counter by 1 to get the next available value + counter = max_counter + 1 + output_process = None + + # save first frame as png to keep metadata + file = f"{filename}_{counter:05}.png" + file_path = os.path.join(full_output_folder, file) + Image.fromarray(tensor_to_bytes(first_image)).save( + file_path, + pnginfo=metadata, + compress_level=4, + ) + output_files.append(file_path) + + format_type, format_ext = format.split("/") + print(format_type, format_ext) + if format_type == "image": + if meta_batch is not None: + raise Exception("Pillow('image/') formats are not compatible with batched output") + image_kwargs = {} + if format_ext == "gif": + image_kwargs['disposal'] = 2 + if format_ext == "webp": + #Save timestamp information + exif = Image.Exif() + exif[ExifTags.IFD.Exif] = {36867: datetime.datetime.now().isoformat(" ")[:19]} + image_kwargs['exif'] = exif + file = f"{filename}_{counter:05}.{format_ext}" + file_path = os.path.join(full_output_folder, file) + if pingpong: + images = to_pingpong(images) + frames = map(lambda x : Image.fromarray(tensor_to_bytes(x)), images) + # Use pillow directly to save an animated image + next(frames).save( + file_path, + format=format_ext.upper(), + save_all=True, + append_images=frames, + duration=round(1000 / frame_rate), + loop=loop_count, + compress_level=4, + **image_kwargs + ) + output_files.append(file_path) + else: + # Use ffmpeg to save a video + if ffmpeg_path is None: + raise ProcessLookupError(f"ffmpeg is required for video outputs and could not be found.\nIn order to use video outputs, you must either:\n- Install imageio-ffmpeg with pip,\n- Place a ffmpeg executable in {os.path.abspath('')}, or\n- Install ffmpeg and add it to the system path.") + + #Acquire additional format_widget values + kwargs = None + if manual_format_widgets is None: + if prompt is not None: + kwargs = prompt[unique_id]['inputs'] + else: + manual_format_widgets = {} + if kwargs is None: + kwargs = get_format_widget_defaults(format_ext) + missing = {} + for k in kwargs.keys(): + if k in manual_format_widgets: + kwargs[k] = manual_format_widgets[k] + else: + missing[k] = kwargs[k] + if len(missing) > 0: + print("Extra format values were not provided, the following defaults will be used: " + str(kwargs) + "\nThis is likely due to usage of ComfyUI-to-python. These values can be manually set by supplying a manual_format_widgets argument") + kwargs["format"] = format + kwargs['pix_fmt'] = 'yuv420p10le' + kwargs['crf'] = 22 + kwargs["save_metadata"] = ["save_metadata", "BOOLEAN", {"default": True}] + print(kwargs) + video_format = apply_format_widgets(format_ext, kwargs) + has_alpha = first_image.shape[-1] == 4 + dim_alignment = video_format.get("dim_alignment", 8) + if (first_image.shape[1] % dim_alignment) or (first_image.shape[0] % dim_alignment): + #output frames must be padded + to_pad = (-first_image.shape[1] % dim_alignment, + -first_image.shape[0] % dim_alignment) + padding = (to_pad[0]//2, to_pad[0] - to_pad[0]//2, + to_pad[1]//2, to_pad[1] - to_pad[1]//2) + padfunc = torch.nn.ReplicationPad2d(padding) + def pad(image): + image = image.permute((2,0,1))#HWC to CHW + padded = padfunc(image.to(dtype=torch.float32)) + return padded.permute((1,2,0)) + images = map(pad, images) + new_dims = (-first_image.shape[1] % dim_alignment + first_image.shape[1], + -first_image.shape[0] % dim_alignment + first_image.shape[0]) + dimensions = f"{new_dims[0]}x{new_dims[1]}" + print("Output images were not of valid resolution and have had padding applied") + else: + dimensions = f"{first_image.shape[1]}x{first_image.shape[0]}" + if loop_count > 0: + loop_args = ["-vf", "loop=loop=" + str(loop_count)+":size=" + str(len(images))] + else: + loop_args = [] + if pingpong: + if meta_batch is not None: + print("pingpong is incompatible with batched output") + images = to_pingpong(images) + if video_format.get('input_color_depth', '8bit') == '16bit': + images = map(tensor_to_shorts, images) + if has_alpha: + i_pix_fmt = 'rgba64' + else: + i_pix_fmt = 'rgb48' + else: + images = map(tensor_to_bytes, images) + if has_alpha: + i_pix_fmt = 'rgba' + else: + i_pix_fmt = 'rgb24' + file = f"{filename}_{counter:05}.{video_format['extension']}" + file_path = os.path.join(full_output_folder, file) + if loop_count > 0: + loop_args = ["-vf", "loop=loop=" + str(loop_count)+":size=" + str(len(images))] + else: + loop_args = [] + bitrate_arg = [] + bitrate = video_format.get('bitrate') + if bitrate is not None: + bitrate_arg = ["-b:v", str(bitrate) + "M" if video_format.get('megabit') == 'True' else str(bitrate) + "K"] + args = [ffmpeg_path, "-v", "error", "-f", "rawvideo", "-pix_fmt", i_pix_fmt, + "-s", dimensions, "-r", str(frame_rate), "-i", "-"] \ + + loop_args + + images = map(lambda x: x.tobytes(), images) + env=os.environ.copy() + if "environment" in video_format: + env.update(video_format["environment"]) + + if "pre_pass" in video_format: + if meta_batch is not None: + #Performing a prepass requires keeping access to all frames. + #Potential solutions include keeping just output frames in + #memory or using 3 passes with intermediate file, but + #very long gifs probably shouldn't be encouraged + raise Exception("Formats which require a pre_pass are incompatible with Batch Manager.") + images = [b''.join(images)] + os.makedirs(folder_paths.get_temp_directory(), exist_ok=True) + pre_pass_args = args[:13] + video_format['pre_pass'] + try: + subprocess.run(pre_pass_args, input=images[0], env=env, + capture_output=True, check=True) + except subprocess.CalledProcessError as e: + raise Exception("An error occurred in the ffmpeg prepass:\n" \ + + e.stderr.decode("utf-8")) + if "inputs_main_pass" in video_format: + args = args[:13] + video_format['inputs_main_pass'] + args[13:] + + if output_process is None: + if 'gifski_pass' in video_format: + output_process = gifski_process(args, video_format, file_path, env) + else: + args += video_format['main_pass'] + bitrate_arg + output_process = ffmpeg_process(args, video_format, video_metadata, file_path, env) + #Proceed to first yield + output_process.send(None) + if meta_batch is not None: + meta_batch.outputs[unique_id] = (counter, output_process) + + for image in images: + pbar.update(1) + output_process.send(image) + if meta_batch is not None: + requeue_workflow((meta_batch.unique_id, not meta_batch.has_closed_inputs)) + if meta_batch is None or meta_batch.has_closed_inputs: + #Close pipe and wait for termination. + try: + total_frames_output = output_process.send(None) + output_process.send(None) + except StopIteration: + pass + if meta_batch is not None: + meta_batch.outputs.pop(unique_id) + if len(meta_batch.outputs) == 0: + meta_batch.reset() + else: + #batch is unfinished + #TODO: Check if empty output breaks other custom nodes + return {"ui": {"unfinished_batch": [True]}, "result": ((save_output, []),)} + + output_files.append(file_path) + + audio_file = os.path.join(audio_dir,str(time.time()).replace(".","")+".wav") + write(audio_file,audio.sample_rate,audio.audio_data) + print(audio_file) + filename = os.path.join(result_dir,f"{str(time.time()).replace('.','')}.mp4") + enhanced_filename = os.path.join(result_dir,f"enhanced_{str(time.time()).replace('.','')}.mp4") + command = [ + 'python', + './run.py', # Script to run + '--frame-processors', + "lip_syncer", + "-s", + audio_file, + '-t', # Argument: segmentation path + output_files[-1], + '-o', + filename, + '--trim-frame-start', + str(trim_frame_start), + '--trim-frame-end', + str(trim_frame_end), + # '--face-mask-padding', + # [str(face_mask_padding_top),str(face_mask_padding_bottom),str(face_mask_padding_left),str(face_mask_padding_right)], + '--headless' + ] + if device=="gpu": + command.extend(['--execution-providers',"coreml"]) + print(command) + result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) + # print(result.stdout.splitlines()[-1]) + if enhancer!="None": + command = [ + 'python', + './run.py', # Script to run + '--frame-processors', + "face_enhancer", + "-t", + filename, + '-o', + enhanced_filename, + '--headless' + ] + print(command) + result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) + filename = enhanced_filename + + if frame_enhancer!="None": + command = [ + 'python', + './run.py', # Script to run + '--frame-processors', + "frame_enhancer", + "-t", + filename, + '-o', + enhanced_filename, + '--headless' + ] + print(command) + result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) + filename = enhanced_filename + + print(result.stderr) + try: + os.system(f"rm {audio_file}") + except: pass + return load_video_cv(filename,0,'Disabled',512,512,0,0,1) + previews = [ + { + "filename": file, + "subfolder": subfolder, + "type": "output" if save_output else "temp", + "format": format, + } + ] + return {"ui": {"gifs": previews}, "result": ((save_output, output_files),)} + + +import folder_paths +import torch +import time +import os +from TTS.api import TTS +from pydub import AudioSegment + +from scipy.io.wavfile import write + +if torch.backends.mps.is_available(): + device = "mps" +elif torch.cuda.is_available(): + device = "cuda" +else: + device = "cpu" + + + +import numpy as np +from scipy.fft import fft + +class AudioData: + def __init__(self, audio_file) -> None: + + # Extract the sample rate + sample_rate = audio_file.frame_rate + + # Get the number of audio channels + num_channels = audio_file.channels + + # Extract the audio data as a NumPy array + audio_data = np.array(audio_file.get_array_of_samples()) + self.audio_data = audio_data + self.sample_rate = sample_rate + self.num_channels = num_channels + + def get_channel_audio_data(self, channel: int): + if channel < 0 or channel >= self.num_channels: + raise IndexError(f"Channel '{channel}' out of range. total channels is '{self.num_channels}'.") + return self.audio_data[channel::self.num_channels] + + def get_channel_fft(self, channel: int): + audio_data = self.get_channel_audio_data(channel) + return fft(audio_data) + + + + +checkpoint_path_voice = os.path.join(folder_paths.models_dir,"deepfuze") +print(checkpoint_path_voice) + +audio_path = os.path.join(folder_paths.get_input_directory(),"audio") + +tts = TTS() +tts.load_tts_model_by_path(model_path=checkpoint_path_voice,config_path=os.path.join(checkpoint_path_voice,"config.json")) +os.makedirs(audio_path,exist_ok=True) + +class TTS_generation: + + @classmethod + def INPUT_TYPES(self): + return { + "required": { + "audio": ("AUDIO",), + "text": ("STRING",{ + "multiline": True, + "default": "Uploaded Audio and text should be in same language" + }), + "device": (["cpu","cuda","mps"],), + "supported_language": ("English (en), Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Turkish (tr), Russian (ru), Dutch (nl), Czech (cs), Arabic (ar), Chinese (zh-cn), Japanese (ja), Hungarian (hu), Korean (ko), Hindi (hi)".split(","),), + } + } + + RETURN_TYPES = ("AUDIO",) # Output type(s) of the node + FUNCTION = "generate_audio" # Entry-point method name + + CATEGORY = "DeepFuze" # Category for the node in the UI + + def generate_audio(self, audio, text,device): + print(text) + try: + tts.to(device) + except:pass + try: + file_path = os.path.join(audio_path,str(time.time()).replace(".","")+".wav") + write(file_path,audio.sample_rate,audio.audio_data) + tts.tts_to_file(text=text, speaker_wav=file_path, language="en", file_path=file_path) + audio_file = AudioSegment.from_file(file_path, format="wav") + audio_data = AudioData(audio_file) + except:pass + try: + tts.to("cpu") + except: pass + return (audio_data,) + + +NODE_CLASS_MAPPINGS = { + "DeepFuzeAdavance": DeepFuzeAdavance, + "TTS_generation":TTS_generation, + "LLM_node": LLM_node, + "PlayBackAudio": PlayBackAudio + +} +NODE_DISPLAY_NAME_MAPPINGS = { + "DeepFuzeAdavance": "DeepFuze Lipsync", + "TTS_generation":"DeepFuze TTS", + "LLM_node": "Openai LLM", + "PlayBackAudio": "Play Audio" +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..001a7cc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +filetype==1.2.0 +gradio==3.50.2 +numpy==1.26.4 +onnx==1.16.0 +onnxruntime==1.17.3 +opencv-python==4.9.0.80 +psutil==5.9.8 +tqdm==4.66.4 +scipy==1.13.0 +openai +sounddevice +pydub +TTS \ No newline at end of file diff --git a/run.py b/run.py new file mode 100755 index 0000000..b26c475 --- /dev/null +++ b/run.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from deepfuze import core + +if __name__ == '__main__': + core.cli() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000..148f01b --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,26 @@ +import subprocess +import pytest + +from deepfuze.audio import get_audio_frame, read_static_audio +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) + + +def test_get_audio_frame() -> None: + assert get_audio_frame('../../models/facefusion/examples/source.mp3', 25) is not None + assert get_audio_frame('../../models/facefusion/examples/source.wav', 25) is not None + assert get_audio_frame('invalid', 25) is None + + +def test_read_static_audio() -> None: + assert len(read_static_audio('../../models/facefusion/examples/source.mp3', 25)) == 280 + assert len(read_static_audio('../../models/facefusion/examples/source.wav', 25)) == 280 + assert read_static_audio('invalid', 25) is None diff --git a/tests/test_cli_face_debugger.py b/tests/test_cli_face_debugger.py new file mode 100644 index 0000000..694fb9e --- /dev/null +++ b/tests/test_cli_face_debugger.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + + +def test_debug_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_debug_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_debug_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_face_enhancer.py b/tests/test_cli_face_enhancer.py new file mode 100644 index 0000000..6def546 --- /dev/null +++ b/tests/test_cli_face_enhancer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + + +def test_enhance_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_enhance_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() + diff --git a/tests/test_cli_face_swapper.py b/tests/test_cli_face_swapper.py new file mode 100644 index 0000000..d269422 --- /dev/null +++ b/tests/test_cli_face_swapper.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + + +def test_swap_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_swap_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_swap_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_frame_colorizer.py b/tests/test_cli_frame_colorizer.py new file mode 100644 index 0000000..0f254fe --- /dev/null +++ b/tests/test_cli_frame_colorizer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.mp4' ]) + + +def test_colorize_frame_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.jpg', '-o', '../../models/facefusion/examples/test_colorize_frame_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_colorize_frame_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.mp4', '-o', '../../models/facefusion/examples/test_colorize_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_frame_enhancer.py b/tests/test_cli_frame_enhancer.py new file mode 100644 index 0000000..b2b34bc --- /dev/null +++ b/tests/test_cli_frame_enhancer.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + + +def test_enhance_frame_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_frame_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_enhance_frame_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_lip_syncer.py b/tests/test_cli_lip_syncer.py new file mode 100644 index 0000000..29991d2 --- /dev/null +++ b/tests/test_cli_lip_syncer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from deepfuze.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + + +def test_sync_lip_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_sync_lip_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_sync_lip_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py new file mode 100644 index 0000000..a83f540 --- /dev/null +++ b/tests/test_common_helper.py @@ -0,0 +1,15 @@ +from deepfuze.common_helper import create_metavar, create_int_range, create_float_range + + +def test_create_metavar() -> None: + assert create_metavar([ 1, 2, 3, 4, 5 ]) == '[1-5]' + + +def test_create_int_range() -> None: + assert create_int_range(0, 2, 1) == [ 0, 1, 2 ] + assert create_float_range(0, 1, 1) == [ 0, 1 ] + + +def test_create_float_range() -> None: + assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] + assert create_float_range(0.0, 1.0, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0 ] diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..f16bcaa --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,96 @@ +from configparser import ConfigParser +import pytest + +from deepfuze import config + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + config.CONFIG = ConfigParser() + config.CONFIG.read_dict( + { + 'str': + { + 'valid': 'a', + 'unset': '' + }, + 'int': + { + 'valid': '1', + 'unset': '' + }, + 'float': + { + 'valid': '1.0', + 'unset': '' + }, + 'bool': + { + 'valid': 'True', + 'unset': '' + }, + 'str_list': + { + 'valid': 'a b c', + 'unset': '' + }, + 'int_list': + { + 'valid': '1 2 3', + 'unset': '' + }, + 'float_list': + { + 'valid': '1.0 2.0 3.0', + 'unset': '' + } + }) + + +def test_get_str_value() -> None: + assert config.get_str_value('str.valid') == 'a' + assert config.get_str_value('str.unset', 'b') == 'b' + assert config.get_str_value('str.unset') is None + assert config.get_str_value('str.invalid') is None + + +def test_get_int_value() -> None: + assert config.get_int_value('int.valid') == 1 + assert config.get_int_value('int.unset', '1') == 1 + assert config.get_int_value('int.unset') is None + assert config.get_int_value('int.invalid') is None + + +def test_get_float_value() -> None: + assert config.get_float_value('float.valid') == 1.0 + assert config.get_float_value('float.unset', '1.0') == 1.0 + assert config.get_float_value('float.unset') is None + assert config.get_float_value('float.invalid') is None + + +def test_get_bool_value() -> None: + assert config.get_bool_value('bool.valid') is True + assert config.get_bool_value('bool.unset', 'False') is False + assert config.get_bool_value('bool.unset') is None + assert config.get_bool_value('bool.invalid') is None + + +def test_get_str_list() -> None: + assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] + assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] + assert config.get_str_list('str_list.unset') is None + assert config.get_str_list('str_list.invalid') is None + + +def test_get_int_list() -> None: + assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] + assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] + assert config.get_int_list('int_list.unset') is None + assert config.get_int_list('int_list.invalid') is None + + +def test_get_float_list() -> None: + assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] + assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] + assert config.get_float_list('float_list.unset') is None + assert config.get_float_list('float_list.invalid') is None diff --git a/tests/test_download.py b/tests/test_download.py new file mode 100644 index 0000000..69b8abb --- /dev/null +++ b/tests/test_download.py @@ -0,0 +1,23 @@ +import pytest + +from deepfuze.download import conditional_download, get_download_size, is_download_done + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + + +def test_get_download_size() -> None: + assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4') == 191675 + assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4') == 370732 + assert get_download_size('invalid') == 0 + + +def test_is_download_done() -> None: + assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') is True + assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False + assert is_download_done('invalid', 'invalid') is False diff --git a/tests/test_execution.py b/tests/test_execution.py new file mode 100644 index 0000000..7ad7fcc --- /dev/null +++ b/tests/test_execution.py @@ -0,0 +1,27 @@ +from deepfuze.execution import encode_execution_providers, decode_execution_providers, has_execution_provider, apply_execution_provider_options + + +def test_encode_execution_providers() -> None: + assert encode_execution_providers([ 'CPUExecutionProvider' ]) == [ 'cpu' ] + + +def test_decode_execution_providers() -> None: + assert decode_execution_providers([ 'cpu' ]) == [ 'CPUExecutionProvider' ] + + +def test_has_execution_provider() -> None: + assert has_execution_provider('CPUExecutionProvider') is True + assert has_execution_provider('InvalidExecutionProvider') is False + + +def test_multiple_execution_providers() -> None: + execution_provider_with_options =\ + [ + 'CPUExecutionProvider', + ('CUDAExecutionProvider', + { + 'device_id': '1', + 'cudnn_conv_algo_search': 'DEFAULT' + }) + ] + assert apply_execution_provider_options('1', [ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == execution_provider_with_options diff --git a/tests/test_face_analyser.py b/tests/test_face_analyser.py new file mode 100644 index 0000000..62126eb --- /dev/null +++ b/tests/test_face_analyser.py @@ -0,0 +1,103 @@ +import subprocess +import pytest + +import deepfuze.globals +from deepfuze.download import conditional_download +from deepfuze.face_analyser import pre_check, clear_face_analyser, get_one_face +from deepfuze.typing import Face +from deepfuze.vision import read_static_image + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) + + +@pytest.fixture(autouse = True) +def before_each() -> None: + deepfuze.globals.face_detector_score = 0.5 + deepfuze.globals.face_landmarker_score = 0.5 + deepfuze.globals.face_recognizer_model = 'arcface_inswapper' + clear_face_analyser() + + +def test_get_one_face_with_retinaface() -> None: + deepfuze.globals.face_detector_model = 'retinaface' + deepfuze.globals.face_detector_size = '320x320' + + pre_check() + source_paths =\ + [ + '../../models/facefusion/examples/source.jpg', + '../../models/facefusion/examples/source-80crop.jpg', + '../../models/facefusion/examples/source-70crop.jpg', + '../../models/facefusion/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_scrfd() -> None: + deepfuze.globals.face_detector_model = 'scrfd' + deepfuze.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '../../models/facefusion/examples/source.jpg', + '../../models/facefusion/examples/source-80crop.jpg', + '../../models/facefusion/examples/source-70crop.jpg', + '../../models/facefusion/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yoloface() -> None: + deepfuze.globals.face_detector_model = 'yoloface' + deepfuze.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '../../models/facefusion/examples/source.jpg', + '../../models/facefusion/examples/source-80crop.jpg', + '../../models/facefusion/examples/source-70crop.jpg', + '../../models/facefusion/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yunet() -> None: + deepfuze.globals.face_detector_model = 'yunet' + deepfuze.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '../../models/facefusion/examples/source.jpg', + '../../models/facefusion/examples/source-80crop.jpg', + '../../models/facefusion/examples/source-70crop.jpg', + '../../models/facefusion/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py new file mode 100644 index 0000000..2a428d8 --- /dev/null +++ b/tests/test_ffmpeg.py @@ -0,0 +1,113 @@ +import glob +import subprocess +import pytest + +import deepfuze.globals +from deepfuze import process_manager +from deepfuze.filesystem import get_temp_directory_path, create_temp, clear_temp +from deepfuze.download import conditional_download +from deepfuze.ffmpeg import extract_frames, read_audio_buffer + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + process_manager.start() + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) + + +@pytest.fixture(scope = 'function', autouse = True) +def before_each() -> None: + deepfuze.globals.trim_frame_start = None + deepfuze.globals.trim_frame_end = None + deepfuze.globals.temp_frame_format = 'jpg' + + +def test_extract_frames() -> None: + target_paths =\ + [ + '../../models/facefusion/examples/target-240p-25fps.mp4', + '../../models/facefusion/examples/target-240p-30fps.mp4', + '../../models/facefusion/examples/target-240p-60fps.mp4' + ] + + for target_path in target_paths: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == 324 + + clear_temp(target_path) + + +def test_extract_frames_with_trim_start() -> None: + deepfuze.globals.trim_frame_start = 224 + data_provider =\ + [ + ('../../models/facefusion/examples/target-240p-25fps.mp4', 55), + ('../../models/facefusion/examples/target-240p-30fps.mp4', 100), + ('../../models/facefusion/examples/target-240p-60fps.mp4', 212) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_extract_frames_with_trim_start_and_trim_end() -> None: + deepfuze.globals.trim_frame_start = 124 + deepfuze.globals.trim_frame_end = 224 + data_provider =\ + [ + ('../../models/facefusion/examples/target-240p-25fps.mp4', 120), + ('../../models/facefusion/examples/target-240p-30fps.mp4', 100), + ('../../models/facefusion/examples/target-240p-60fps.mp4', 50) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_extract_frames_with_trim_end() -> None: + deepfuze.globals.trim_frame_end = 100 + data_provider =\ + [ + ('../../models/facefusion/examples/target-240p-25fps.mp4', 120), + ('../../models/facefusion/examples/target-240p-30fps.mp4', 100), + ('../../models/facefusion/examples/target-240p-60fps.mp4', 50) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '426x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_read_audio_buffer() -> None: + assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.mp3', 1, 1), bytes) + assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.wav', 1, 1), bytes) + assert read_audio_buffer('../../models/facefusion/examples/invalid.mp3', 1, 1) is None diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py new file mode 100644 index 0000000..dde07aa --- /dev/null +++ b/tests/test_filesystem.py @@ -0,0 +1,90 @@ +import shutil +import pytest + +from deepfuze.common_helper import is_windows +from deepfuze.download import conditional_download +from deepfuze.filesystem import get_file_size, is_file, is_directory, is_audio, has_audio, is_image, has_image, is_video, filter_audio_paths, filter_image_paths, list_directory, sanitize_path_for_windows + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + shutil.copyfile('../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/söurce.jpg') + + +def test_get_file_size() -> None: + assert get_file_size('../../models/facefusion/examples/source.jpg') > 0 + assert get_file_size('invalid') == 0 + + +def test_is_file() -> None: + assert is_file('../../models/facefusion/examples/source.jpg') is True + assert is_file('../../models/facefusion/examples') is False + assert is_file('invalid') is False + + +def test_is_directory() -> None: + assert is_directory('../../models/facefusion/examples') is True + assert is_directory('../../models/facefusion/examples/source.jpg') is False + assert is_directory('invalid') is False + + +def test_is_audio() -> None: + assert is_audio('../../models/facefusion/examples/source.mp3') is True + assert is_audio('../../models/facefusion/examples/source.jpg') is False + assert is_audio('invalid') is False + + +def test_has_audio() -> None: + assert has_audio([ '../../models/facefusion/examples/source.mp3' ]) is True + assert has_audio([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.jpg' ]) is True + assert has_audio([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) is False + assert has_audio([ 'invalid' ]) is False + + +def test_is_image() -> None: + assert is_image('../../models/facefusion/examples/source.jpg') is True + assert is_image('../../models/facefusion/examples/target-240p.mp4') is False + assert is_image('invalid') is False + + +def test_has_image() -> None: + assert has_image([ '../../models/facefusion/examples/source.jpg' ]) is True + assert has_image([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) is True + assert has_image([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) is False + assert has_image([ 'invalid' ]) is False + + +def test_is_video() -> None: + assert is_video('../../models/facefusion/examples/target-240p.mp4') is True + assert is_video('../../models/facefusion/examples/source.jpg') is False + assert is_video('invalid') is False + + +def test_filter_audio_paths() -> None: + assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.mp3' ] + assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + +def test_filter_image_paths() -> None: + assert filter_image_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.jpg' ] + assert filter_image_paths([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + +def test_list_directory() -> None: + assert list_directory('../../models/facefusion/examples') + assert list_directory('../../models/facefusion/examples/source.jpg') is None + assert list_directory('invalid') is None + + +def test_sanitize_path_for_windows() -> None: + if is_windows(): + assert sanitize_path_for_windows('../../models/facefusion/examples/söurce.jpg') == 'ASSETS~1/examples/SURCE~1.JPG' + assert sanitize_path_for_windows('invalid') is None diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 0000000..36a3619 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,8 @@ +from deepfuze.common_helper import is_linux, is_macos +from deepfuze.memory import limit_system_memory + + +def test_limit_system_memory() -> None: + assert limit_system_memory(4) is True + if is_linux() or is_macos(): + assert limit_system_memory(1024) is False diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py new file mode 100644 index 0000000..8c7aa5c --- /dev/null +++ b/tests/test_normalizer.py @@ -0,0 +1,30 @@ +from deepfuze.common_helper import is_linux, is_macos +from deepfuze.normalizer import normalize_output_path, normalize_padding, normalize_fps + + +def test_normalize_output_path() -> None: + if is_linux() or is_macos(): + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None + assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None + assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None + + +def test_normalize_padding() -> None: + assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) + assert normalize_padding([ 1 ]) == (1, 1, 1, 1) + assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) + assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) + assert normalize_padding(None) is None + + +def test_normalize_fps() -> None: + assert normalize_fps(0.0) == 1.0 + assert normalize_fps(25.0) == 25.0 + assert normalize_fps(61.0) == 60.0 + assert normalize_fps(None) is None diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py new file mode 100644 index 0000000..b5f783d --- /dev/null +++ b/tests/test_process_manager.py @@ -0,0 +1,22 @@ +from deepfuze.process_manager import set_process_state, is_processing, is_stopping, is_pending, start, stop, end + + +def test_start() -> None: + set_process_state('pending') + start() + + assert is_processing() + + +def test_stop() -> None: + set_process_state('processing') + stop() + + assert is_stopping() + + +def test_end() -> None: + set_process_state('processing') + end() + + assert is_pending() diff --git a/tests/test_vision.py b/tests/test_vision.py new file mode 100644 index 0000000..1faf6b5 --- /dev/null +++ b/tests/test_vision.py @@ -0,0 +1,109 @@ +import subprocess +import pytest + +from deepfuze.download import conditional_download +from deepfuze.vision import detect_image_resolution, restrict_image_resolution, create_image_resolutions, get_video_frame, count_video_frame_total, detect_video_fps, restrict_video_fps, detect_video_resolution, restrict_video_resolution, create_video_resolutions, normalize_resolution, pack_resolution, unpack_resolution + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('../../models/facefusion/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) + + +def test_detect_image_resolution() -> None: + assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) + assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) + assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) + assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048) + assert detect_image_resolution('invalid') is None + + +def test_restrict_image_resolution() -> None: + assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (426, 226)) == (426, 226) + assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) + assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) + + +def test_create_image_resolutions() -> None: + assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] + assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] + assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] + assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] + assert create_image_resolutions(None) == [] + + +def test_get_video_frame() -> None: + assert get_video_frame('../../models/facefusion/examples/target-240p-25fps.mp4') is not None + assert get_video_frame('invalid') is None + + +def test_count_video_frame_total() -> None: + assert count_video_frame_total('../../models/facefusion/examples/target-240p-25fps.mp4') == 270 + assert count_video_frame_total('../../models/facefusion/examples/target-240p-30fps.mp4') == 324 + assert count_video_frame_total('../../models/facefusion/examples/target-240p-60fps.mp4') == 648 + assert count_video_frame_total('invalid') == 0 + + +def test_detect_video_fps() -> None: + assert detect_video_fps('../../models/facefusion/examples/target-240p-25fps.mp4') == 25.0 + assert detect_video_fps('../../models/facefusion/examples/target-240p-30fps.mp4') == 30.0 + assert detect_video_fps('../../models/facefusion/examples/target-240p-60fps.mp4') == 60.0 + assert detect_video_fps('invalid') is None + + +def test_restrict_video_fps() -> None: + assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 20.0) == 20.0 + assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 25.0) == 25.0 + assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 60.0) == 25.0 + + +def test_detect_video_resolution() -> None: + assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) + assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) + assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) + assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048) + assert detect_video_resolution('invalid') is None + + +def test_restrict_video_resolution() -> None: + assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (426, 226)) == (426, 226) + assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) + assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) + + +def test_create_video_resolutions() -> None: + assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] + assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] + assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] + assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] + assert create_video_resolutions(None) == [] + + +def test_normalize_resolution() -> None: + assert normalize_resolution((2.5, 2.5)) == (2, 2) + assert normalize_resolution((3.0, 3.0)) == (4, 4) + assert normalize_resolution((6.5, 6.5)) == (6, 6) + + +def test_pack_resolution() -> None: + assert pack_resolution((1, 1)) == '0x0' + assert pack_resolution((2, 2)) == '2x2' + + +def test_unpack_resolution() -> None: + assert unpack_resolution('0x0') == (0, 0) + assert unpack_resolution('2x2') == (2, 2) diff --git a/tests/test_wording.py b/tests/test_wording.py new file mode 100644 index 0000000..7813345 --- /dev/null +++ b/tests/test_wording.py @@ -0,0 +1,7 @@ +from deepfuze import wording + + +def test_get() -> None: + assert wording.get('python_not_supported') + assert wording.get('help.source') + assert wording.get('invalid') is None diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..5d7c284 --- /dev/null +++ b/utils.py @@ -0,0 +1,234 @@ +import hashlib +import os +from typing import Iterable +import shutil +import subprocess +import re + +import server + +BIGMIN = -(2**53-1) +BIGMAX = (2**53-1) + +DIMMAX = 8192 + +def ffmpeg_suitability(path): + try: + version = subprocess.run([path, "-version"], check=True, + capture_output=True).stdout.decode("utf-8") + except: + return 0 + score = 0 + #rough layout of the importance of various features + simple_criterion = [("libvpx", 20),("264",10), ("265",3), + ("svtav1",5),("libopus", 1)] + for criterion in simple_criterion: + if version.find(criterion[0]) >= 0: + score += criterion[1] + #obtain rough compile year from copyright information + copyright_index = version.find('2000-2') + if copyright_index >= 0: + copyright_year = version[copyright_index+6:copyright_index+9] + if copyright_year.isnumeric(): + score += int(copyright_year) + return score + + +if "VHS_FORCE_FFMPEG_PATH" in os.environ: + ffmpeg_path = os.environ.get("VHS_FORCE_FFMPEG_PATH") +else: + ffmpeg_paths = [] + try: + from imageio_ffmpeg import get_ffmpeg_exe + imageio_ffmpeg_path = get_ffmpeg_exe() + ffmpeg_paths.append(imageio_ffmpeg_path) + except: + if "VHS_USE_IMAGEIO_FFMPEG" in os.environ: + raise + logger.warn("Failed to import imageio_ffmpeg") + if "VHS_USE_IMAGEIO_FFMPEG" in os.environ: + ffmpeg_path = imageio_ffmpeg_path + else: + system_ffmpeg = shutil.which("ffmpeg") + if system_ffmpeg is not None: + ffmpeg_paths.append(system_ffmpeg) + if os.path.isfile("ffmpeg"): + ffmpeg_paths.append(os.path.abspath("ffmpeg")) + if os.path.isfile("ffmpeg.exe"): + ffmpeg_paths.append(os.path.abspath("ffmpeg.exe")) + if len(ffmpeg_paths) == 0: + logger.error("No valid ffmpeg found.") + ffmpeg_path = None + elif len(ffmpeg_paths) == 1: + #Evaluation of suitability isn't required, can take sole option + #to reduce startup time + ffmpeg_path = ffmpeg_paths[0] + else: + ffmpeg_path = max(ffmpeg_paths, key=ffmpeg_suitability) +gifski_path = os.environ.get("VHS_GIFSKI", None) +if gifski_path is None: + gifski_path = os.environ.get("JOV_GIFSKI", None) + if gifski_path is None: + gifski_path = shutil.which("gifski") + +def is_safe_path(path): + if "VHS_STRICT_PATHS" not in os.environ: + return True + basedir = os.path.abspath('.') + try: + common_path = os.path.commonpath([basedir, path]) + except: + #Different drive on windows + return False + return common_path == basedir + +def get_sorted_dir_files_from_directory(directory: str, skip_first_images: int=0, select_every_nth: int=1, extensions: Iterable=None): + directory = strip_path(directory) + dir_files = os.listdir(directory) + dir_files = sorted(dir_files) + dir_files = [os.path.join(directory, x) for x in dir_files] + dir_files = list(filter(lambda filepath: os.path.isfile(filepath), dir_files)) + # filter by extension, if needed + if extensions is not None: + extensions = list(extensions) + new_dir_files = [] + for filepath in dir_files: + ext = "." + filepath.split(".")[-1] + if ext.lower() in extensions: + new_dir_files.append(filepath) + dir_files = new_dir_files + # start at skip_first_images + dir_files = dir_files[skip_first_images:] + dir_files = dir_files[0::select_every_nth] + return dir_files + + +# modified from https://stackoverflow.com/questions/22058048/hashing-a-file-in-python +def calculate_file_hash(filename: str, hash_every_n: int = 1): + #Larger video files were taking >.5 seconds to hash even when cached, + #so instead the modified time from the filesystem is used as a hash + h = hashlib.sha256() + h.update(filename.encode()) + h.update(str(os.path.getmtime(filename)).encode()) + return h.hexdigest() + +prompt_queue = server.PromptServer.instance.prompt_queue +def requeue_workflow_unchecked(): + """Requeues the current workflow without checking for multiple requeues""" + currently_running = prompt_queue.currently_running + (_, _, prompt, extra_data, outputs_to_execute) = next(iter(currently_running.values())) + + #Ensure batch_managers are marked stale + prompt = prompt.copy() + for uid in prompt: + if prompt[uid]['class_type'] == 'VHS_BatchManager': + prompt[uid]['inputs']['requeue'] = prompt[uid]['inputs'].get('requeue',0)+1 + + #execution.py has guards for concurrency, but server doesn't. + #TODO: Check that this won't be an issue + number = -server.PromptServer.instance.number + server.PromptServer.instance.number += 1 + prompt_id = str(server.uuid.uuid4()) + prompt_queue.put((number, prompt_id, prompt, extra_data, outputs_to_execute)) + +requeue_guard = [None, 0, 0, {}] +def requeue_workflow(requeue_required=(-1,True)): + assert(len(prompt_queue.currently_running) == 1) + global requeue_guard + (run_number, _, prompt, _, _) = next(iter(prompt_queue.currently_running.values())) + if requeue_guard[0] != run_number: + #Calculate a count of how many outputs are managed by a batch manager + managed_outputs=0 + for bm_uid in prompt: + if prompt[bm_uid]['class_type'] == 'VHS_BatchManager': + for output_uid in prompt: + if prompt[output_uid]['class_type'] in ["VHS_VideoCombine"]: + for inp in prompt[output_uid]['inputs'].values(): + if inp == [bm_uid, 0]: + managed_outputs+=1 + requeue_guard = [run_number, 0, managed_outputs, {}] + requeue_guard[1] = requeue_guard[1]+1 + requeue_guard[3][requeue_required[0]] = requeue_required[1] + if requeue_guard[1] == requeue_guard[2] and max(requeue_guard[3].values()): + requeue_workflow_unchecked() + +def get_audio(file, start_time=0, duration=0): + args = [ffmpeg_path, "-v", "error", "-i", file] + if start_time > 0: + args += ["-ss", str(start_time)] + if duration > 0: + args += ["-t", str(duration)] + try: + res = subprocess.run(args + ["-f", "wav", "-"], + stdout=subprocess.PIPE, check=True).stdout + except subprocess.CalledProcessError as e: + logger.warning(f"Failed to extract audio from: {file}") + return False + return res + + +def lazy_eval(func): + class Cache: + def __init__(self, func): + self.res = None + self.func = func + def get(self): + if self.res is None: + self.res = self.func() + return self.res + cache = Cache(func) + return lambda : cache.get() + + +def is_url(url): + return url.split("://")[0] in ["http", "https"] + +def validate_sequence(path): + #Check if path is a valid ffmpeg sequence that points to at least one file + (path, file) = os.path.split(path) + if not os.path.isdir(path): + return False + match = re.search('%0?\\d+d', file) + if not match: + return False + seq = match.group() + if seq == '%d': + seq = '\\\\d+' + else: + seq = '\\\\d{%s}' % seq[1:-1] + file_matcher = re.compile(re.sub('%0?\\d+d', seq, file)) + for file in os.listdir(path): + if file_matcher.fullmatch(file): + return True + return False + +def strip_path(path): + #This leaves whitespace inside quotes and only a single " + #thus ' ""test"' -> '"test' + #consider path.strip(string.whitespace+"\"") + #or weightier re.fullmatch("[\\s\"]*(.+?)[\\s\"]*", path).group(1) + path = path.strip() + if path.startswith("\""): + path = path[1:] + if path.endswith("\""): + path = path[:-1] + return path +def hash_path(path): + if path is None: + return "input" + if is_url(path): + return "url" + return calculate_file_hash(strip_path(path)) + + +def validate_path(path, allow_none=False, allow_url=True): + if path is None: + return allow_none + if is_url(path): + #Probably not feasible to check if url resolves here + if not allow_url: + return "URLs are unsupported for this path" + return is_safe_path(path) + if not os.path.isfile(strip_path(path)): + return "Invalid file path: {}".format(path) + return is_safe_path(path)