From bc408a91027e0176ca7edada64513c19405236ed Mon Sep 17 00:00:00 2001 From: Harisreedhar <46858047+harisreedhar@users.noreply.github.com> Date: Tue, 30 Jan 2024 15:11:36 +0530 Subject: [PATCH] include audio file functions --- facefusion/audio.py | 22 +++++++++++++--------- tests/test_audio.py | 16 ++++++++++------ tests/test_ffmpeg.py | 11 ++++------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/facefusion/audio.py b/facefusion/audio.py index d15a2b9d..cd4a08b1 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -3,12 +3,13 @@ from functools import lru_cache import numpy import scipy +from facefusion.filesystem import is_audio from facefusion.ffmpeg import read_audio_buffer from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: - if audio_path: + if is_audio(audio_path): audio_frames = read_static_audio(audio_path, fps) if frame_number < len(audio_frames): return audio_frames[frame_number] @@ -16,14 +17,16 @@ def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Opti @lru_cache(maxsize = None) -def read_static_audio(audio_path : str, fps : Fps) -> List[AudioFrame]: - audio_buffer = read_audio_buffer(audio_path, 16000, 2) - audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) - audio = normalize_audio(audio) - audio = filter_audio(audio, -0.97) - spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) - audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) - return audio_frames +def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, 16000, 2) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = normalize_audio(audio) + audio = filter_audio(audio, -0.97) + spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) + audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) + return audio_frames + return None def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio: @@ -48,6 +51,7 @@ def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, An @lru_cache(maxsize = None) def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> numpy.ndarray[Any, Any]: + frequency_maximum = min(sample_rate / 2, frequency_maximum) mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1)) mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2) indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16) diff --git a/tests/test_audio.py b/tests/test_audio.py index fb437aff..0836692c 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,7 +1,7 @@ import subprocess import pytest -#from facefusion.audio import get_audio_frame +from facefusion.audio import get_audio_frame, read_static_audio from facefusion.download import conditional_download @@ -15,8 +15,12 @@ def before_all() -> None: def test_get_audio_frame() -> None: - pass - # todo: testing - #assert get_audio_frame('.assets/examples/source.mp3', 25) is not None - # assert get_audio_frame('.assets/examples/source.wav', 25) is not None - #assert get_audio_frame('invalid', 25) is None + assert get_audio_frame('.assets/examples/source.mp3', 25) is not None + assert get_audio_frame('.assets/examples/source.wav', 25) is not None + assert get_audio_frame('invalid', 25) is None + + +def test_read_static_audio() -> None: + assert len(read_static_audio('.assets/examples/source.mp3', 25)) == 91 + assert len(read_static_audio('.assets/examples/source.wav', 25)) == 91 + assert read_static_audio('invalid', 25) is None diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 6f179357..3f142681 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -5,7 +5,7 @@ import pytest import facefusion.globals from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp from facefusion.download import conditional_download -from facefusion.ffmpeg import extract_frames# #read_audio_buffer, replace_audio +from facefusion.ffmpeg import extract_frames, read_audio_buffer @pytest.fixture(scope = 'module', autouse = True) @@ -103,12 +103,9 @@ def test_extract_frames_with_trim_end() -> None: def test_read_audio_buffer() -> None: - # todo: testing - pass - # todo: if you can use different values, then test not just against None or not None - #assert read_audio_buffer('.assets/examples/source.mp3', 1, 1) is not None - #assert read_audio_buffer('.assets/examples/source.wav', 1, 1) is not None - #assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None + assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes) + assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes) + assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None def test_replace_audio() -> None: