diff --git a/facefusion/audio.py b/facefusion/audio.py index c529236e..d15a2b9d 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -1,8 +1,8 @@ from typing import Optional, Any, List - +from functools import lru_cache import numpy import scipy -from functools import lru_cache + from facefusion.ffmpeg import read_audio_buffer from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame @@ -33,8 +33,8 @@ def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio: return audio -def filter_audio(audio : Audio, filter_coefficient: float) -> Audio: - audio = scipy.signal.lfilter([1.0, filter_coefficient], [1.0], audio) +def filter_audio(audio : Audio, filter_coefficient : float) -> Audio: + audio = scipy.signal.lfilter([ 1.0, filter_coefficient ], [1.0], audio) return audio @@ -63,7 +63,7 @@ def create_spectrogram(audio : Audio, sample_rate : int, filter_total : int, fil return spectrogram -def extract_audio_frames(spectrogram: Spectrogram, filter_total: int, audio_frame_step: int, fps: Fps) -> List[AudioFrame]: +def extract_audio_frames(spectrogram : Spectrogram, filter_total : int, audio_frame_step : int, fps : Fps) -> List[AudioFrame]: indices = numpy.arange(0, spectrogram.shape[1], filter_total / fps).astype(numpy.int16) indices = indices[indices >= audio_frame_step] audio_frames = [] diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 68cafcbe..aee3239d 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -65,6 +65,15 @@ def merge_video(target_path : str, video_fps : Fps) -> bool: return run_ffmpeg(commands) +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] + process = open_ffmpeg(commands) + audio_buffer, error = process.communicate() + if process.returncode == 0: + return audio_buffer + return None + + def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end @@ -80,19 +89,9 @@ def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool return run_ffmpeg(commands) -def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: - commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] - process = open_ffmpeg(commands) - audio_buffer, error = process.communicate() - if process.returncode == 0: - return audio_buffer - logger.debug(error.decode().strip(), __name__.upper()) - return None - - def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: temp_output_path = get_temp_output_video_path(target_path) - commands = [ '-i', temp_output_path, '-i', audio_path, '-c:v', 'copy', '-af', 'apad', '-shortest', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ] + commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', audio_path, '-c:v', 'copy', '-af', 'apad', '-shortest', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ] return run_ffmpeg(commands)