mirror of
https://github.com/facefusion/facefusion.git
synced 2026-04-24 02:16:12 +02:00
include audio file functions
This commit is contained in:
+13
-9
@@ -3,12 +3,13 @@ from functools import lru_cache
|
||||
import numpy
|
||||
import scipy
|
||||
|
||||
from facefusion.filesystem import is_audio
|
||||
from facefusion.ffmpeg import read_audio_buffer
|
||||
from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame
|
||||
|
||||
|
||||
def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]:
|
||||
if audio_path:
|
||||
if is_audio(audio_path):
|
||||
audio_frames = read_static_audio(audio_path, fps)
|
||||
if frame_number < len(audio_frames):
|
||||
return audio_frames[frame_number]
|
||||
@@ -16,14 +17,16 @@ def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Opti
|
||||
|
||||
|
||||
@lru_cache(maxsize = None)
|
||||
def read_static_audio(audio_path : str, fps : Fps) -> List[AudioFrame]:
|
||||
audio_buffer = read_audio_buffer(audio_path, 16000, 2)
|
||||
audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2)
|
||||
audio = normalize_audio(audio)
|
||||
audio = filter_audio(audio, -0.97)
|
||||
spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0)
|
||||
audio_frames = extract_audio_frames(spectrogram, 80, 16, fps)
|
||||
return audio_frames
|
||||
def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]:
|
||||
if is_audio(audio_path):
|
||||
audio_buffer = read_audio_buffer(audio_path, 16000, 2)
|
||||
audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2)
|
||||
audio = normalize_audio(audio)
|
||||
audio = filter_audio(audio, -0.97)
|
||||
spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0)
|
||||
audio_frames = extract_audio_frames(spectrogram, 80, 16, fps)
|
||||
return audio_frames
|
||||
return None
|
||||
|
||||
|
||||
def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio:
|
||||
@@ -48,6 +51,7 @@ def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, An
|
||||
|
||||
@lru_cache(maxsize = None)
|
||||
def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> numpy.ndarray[Any, Any]:
|
||||
frequency_maximum = min(sample_rate / 2, frequency_maximum)
|
||||
mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1))
|
||||
mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2)
|
||||
indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16)
|
||||
|
||||
+10
-6
@@ -1,7 +1,7 @@
|
||||
import subprocess
|
||||
import pytest
|
||||
|
||||
#from facefusion.audio import get_audio_frame
|
||||
from facefusion.audio import get_audio_frame, read_static_audio
|
||||
from facefusion.download import conditional_download
|
||||
|
||||
|
||||
@@ -15,8 +15,12 @@ def before_all() -> None:
|
||||
|
||||
|
||||
def test_get_audio_frame() -> None:
|
||||
pass
|
||||
# todo: testing
|
||||
#assert get_audio_frame('.assets/examples/source.mp3', 25) is not None
|
||||
# assert get_audio_frame('.assets/examples/source.wav', 25) is not None
|
||||
#assert get_audio_frame('invalid', 25) is None
|
||||
assert get_audio_frame('.assets/examples/source.mp3', 25) is not None
|
||||
assert get_audio_frame('.assets/examples/source.wav', 25) is not None
|
||||
assert get_audio_frame('invalid', 25) is None
|
||||
|
||||
|
||||
def test_read_static_audio() -> None:
|
||||
assert len(read_static_audio('.assets/examples/source.mp3', 25)) == 91
|
||||
assert len(read_static_audio('.assets/examples/source.wav', 25)) == 91
|
||||
assert read_static_audio('invalid', 25) is None
|
||||
|
||||
@@ -5,7 +5,7 @@ import pytest
|
||||
import facefusion.globals
|
||||
from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp
|
||||
from facefusion.download import conditional_download
|
||||
from facefusion.ffmpeg import extract_frames# #read_audio_buffer, replace_audio
|
||||
from facefusion.ffmpeg import extract_frames, read_audio_buffer
|
||||
|
||||
|
||||
@pytest.fixture(scope = 'module', autouse = True)
|
||||
@@ -103,12 +103,9 @@ def test_extract_frames_with_trim_end() -> None:
|
||||
|
||||
|
||||
def test_read_audio_buffer() -> None:
|
||||
# todo: testing
|
||||
pass
|
||||
# todo: if you can use different values, then test not just against None or not None
|
||||
#assert read_audio_buffer('.assets/examples/source.mp3', 1, 1) is not None
|
||||
#assert read_audio_buffer('.assets/examples/source.wav', 1, 1) is not None
|
||||
#assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None
|
||||
assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes)
|
||||
assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes)
|
||||
assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None
|
||||
|
||||
|
||||
def test_replace_audio() -> None:
|
||||
|
||||
Reference in New Issue
Block a user