Rename audio extractor to voice extractor

This commit is contained in:
henryruhs
2024-04-02 11:23:26 +02:00
parent f73c00acf5
commit 79d7e2acea
4 changed files with 23 additions and 22 deletions
+4 -4
View File
@@ -6,7 +6,7 @@ import scipy
from facefusion.filesystem import is_audio
from facefusion.ffmpeg import read_audio_buffer
from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame
from facefusion.audio_extractor import batch_extract_voice
from facefusion.voice_extractor import batch_extract_voice
def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]:
@@ -19,9 +19,9 @@ def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Opti
def get_voice_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]:
if is_audio(audio_path):
audio_frames = read_static_voice(audio_path, fps)
if frame_number in range(len(audio_frames)):
return audio_frames[frame_number]
voice_frames = read_static_voice(audio_path, fps)
if frame_number in range(len(voice_frames)):
return voice_frames[frame_number]
return None
+3 -3
View File
@@ -15,7 +15,7 @@ import facefusion.choices
import facefusion.globals
from facefusion.face_analyser import get_one_face, get_average_face
from facefusion.face_store import get_reference_faces, append_reference_face
from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording, audio_extractor
from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording, voice_extractor
from facefusion.content_analyser import analyse_image, analyse_video
from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module
from facefusion.common_helper import create_metavar, get_first
@@ -193,7 +193,7 @@ def run(program : ArgumentParser) -> None:
if facefusion.globals.force_download:
force_download()
return
if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check() or not audio_extractor.pre_check():
if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check() or not voice_extractor.pre_check():
return
for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors):
if not frame_processor_module.pre_check():
@@ -271,7 +271,7 @@ def force_download() -> None:
content_analyser.MODELS,
face_analyser.MODELS,
face_masker.MODELS,
audio_extractor.MODELS
voice_extractor.MODELS
]
for frame_processor_module in get_frame_processors_modules(available_frame_processors):
@@ -24,7 +24,7 @@ from facefusion.filesystem import is_image, is_video, filter_audio_paths
from facefusion.common_helper import get_first
from facefusion.vision import read_image, write_image, read_static_image
from facefusion.processors.frame.typings import LipSyncerInputs
from facefusion.audio_extractor import clear_voice_extractor
from facefusion.voice_extractor import clear_voice_extractor
from facefusion.processors.frame import globals as frame_processors_globals
from facefusion.processors.frame import choices as frame_processors_choices
@@ -10,17 +10,18 @@ import facefusion.globals
from facefusion import process_manager
from facefusion.typing import ModelSet, AudioChunk, Audio
from facefusion.execution import apply_execution_provider_options
from facefusion.filesystem import resolve_relative_path
from facefusion.filesystem import resolve_relative_path, is_file
from facefusion.download import conditional_download
VOICE_EXTRACTOR = None
THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore()
THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : ModelSet =\
{
'voice_extractor':
{
'url': 'https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/Kim_Vocal_2.onnx',
'path': resolve_relative_path('../.assets/models/Kim_Vocal_2.onnx')
'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/voice_extractor.onnx',
'path': resolve_relative_path('../.assets/models/voice_extractor.onnx')
}
}
@@ -44,16 +45,15 @@ def clear_voice_extractor() -> None:
def pre_check() -> bool:
download_directory_path = resolve_relative_path('../.assets/models')
model_url = MODELS.get('voice_extractor').get('url')
model_path = MODELS.get('voice_extractor').get('path')
if not facefusion.globals.skip_download:
download_directory_path = resolve_relative_path('../.assets/models')
model_urls =\
[
MODELS.get('voice_extractor').get('url'),
]
process_manager.check()
conditional_download(download_directory_path, model_urls)
conditional_download(download_directory_path, [ model_url ])
process_manager.end()
return True
return is_file(model_path)
@lru_cache(maxsize = None)
@@ -84,10 +84,11 @@ def extract_voice(audio_chunk : AudioChunk) -> AudioChunk:
chunk_size = hop_length * (extractor_shape[2] - 1)
audio_chunk, pad_size = prepare_audio_chunk(audio_chunk, chunk_size, trim_size)
audio_chunk = decompose_audio_chunk(audio_chunk, filter_size, hop_length, frequency_bins, extractor_shape)
audio_chunk = voice_extractor.run(None,
{
voice_extractor.get_inputs()[0].name: audio_chunk
})[0]
with THREAD_SEMAPHORE:
audio_chunk = voice_extractor.run(None,
{
voice_extractor.get_inputs()[0].name: audio_chunk
})[0]
audio_chunk = compose_audio_chunk(audio_chunk, filter_size, hop_length, frequency_bins, extractor_shape)
audio_chunk = normalize_audio_chunk(audio_chunk, chunk_size, trim_size, pad_size)
return audio_chunk