include audio file functions

This commit is contained in:
Harisreedhar
2024-01-30 15:11:36 +05:30
parent 4b56086fd0
commit bc408a9102
3 changed files with 27 additions and 22 deletions
+13 -9
View File
@@ -3,12 +3,13 @@ from functools import lru_cache
import numpy
import scipy
from facefusion.filesystem import is_audio
from facefusion.ffmpeg import read_audio_buffer
from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame
def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]:
if audio_path:
if is_audio(audio_path):
audio_frames = read_static_audio(audio_path, fps)
if frame_number < len(audio_frames):
return audio_frames[frame_number]
@@ -16,14 +17,16 @@ def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Opti
@lru_cache(maxsize = None)
def read_static_audio(audio_path : str, fps : Fps) -> List[AudioFrame]:
audio_buffer = read_audio_buffer(audio_path, 16000, 2)
audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2)
audio = normalize_audio(audio)
audio = filter_audio(audio, -0.97)
spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0)
audio_frames = extract_audio_frames(spectrogram, 80, 16, fps)
return audio_frames
def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]:
if is_audio(audio_path):
audio_buffer = read_audio_buffer(audio_path, 16000, 2)
audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2)
audio = normalize_audio(audio)
audio = filter_audio(audio, -0.97)
spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0)
audio_frames = extract_audio_frames(spectrogram, 80, 16, fps)
return audio_frames
return None
def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio:
@@ -48,6 +51,7 @@ def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, An
@lru_cache(maxsize = None)
def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> numpy.ndarray[Any, Any]:
frequency_maximum = min(sample_rate / 2, frequency_maximum)
mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1))
mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2)
indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16)
+10 -6
View File
@@ -1,7 +1,7 @@
import subprocess
import pytest
#from facefusion.audio import get_audio_frame
from facefusion.audio import get_audio_frame, read_static_audio
from facefusion.download import conditional_download
@@ -15,8 +15,12 @@ def before_all() -> None:
def test_get_audio_frame() -> None:
pass
# todo: testing
#assert get_audio_frame('.assets/examples/source.mp3', 25) is not None
# assert get_audio_frame('.assets/examples/source.wav', 25) is not None
#assert get_audio_frame('invalid', 25) is None
assert get_audio_frame('.assets/examples/source.mp3', 25) is not None
assert get_audio_frame('.assets/examples/source.wav', 25) is not None
assert get_audio_frame('invalid', 25) is None
def test_read_static_audio() -> None:
assert len(read_static_audio('.assets/examples/source.mp3', 25)) == 91
assert len(read_static_audio('.assets/examples/source.wav', 25)) == 91
assert read_static_audio('invalid', 25) is None
+4 -7
View File
@@ -5,7 +5,7 @@ import pytest
import facefusion.globals
from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp
from facefusion.download import conditional_download
from facefusion.ffmpeg import extract_frames# #read_audio_buffer, replace_audio
from facefusion.ffmpeg import extract_frames, read_audio_buffer
@pytest.fixture(scope = 'module', autouse = True)
@@ -103,12 +103,9 @@ def test_extract_frames_with_trim_end() -> None:
def test_read_audio_buffer() -> None:
# todo: testing
pass
# todo: if you can use different values, then test not just against None or not None
#assert read_audio_buffer('.assets/examples/source.mp3', 1, 1) is not None
#assert read_audio_buffer('.assets/examples/source.wav', 1, 1) is not None
#assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None
assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes)
assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes)
assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None
def test_replace_audio() -> None: