Cosmetic changes

This commit is contained in:
henryruhs
2024-04-02 12:56:40 +02:00
parent 79d7e2acea
commit bc57669da0
2 changed files with 5 additions and 3 deletions
+3 -1
View File
@@ -83,6 +83,7 @@ def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size
mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1))
mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2)
indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16)
for index in range(filter_total):
mel_filter[index, indices[index]: indices[index + 1]] = scipy.signal.windows.triang(indices[index + 1] - indices[index])
return mel_filter
@@ -96,9 +97,10 @@ def create_spectrogram(audio : Audio, sample_rate : int, filter_total : int, fil
def extract_audio_frames(spectrogram : Spectrogram, filter_total : int, audio_frame_step : int, fps : Fps) -> List[AudioFrame]:
audio_frames = []
indices = numpy.arange(0, spectrogram.shape[1], filter_total / fps).astype(numpy.int16)
indices = indices[indices >= audio_frame_step]
audio_frames = []
for index in indices:
audio_frames.append(spectrogram[:, max(0, index - audio_frame_step) : index])
return audio_frames
+2 -2
View File
@@ -100,8 +100,8 @@ def prepare_audio_chunk(audio_chunk : AudioChunk, chunk_size : int, trim_size :
pad_size = step_size - audio_chunk.shape[1] % step_size
audio_chunk_size = audio_chunk.shape[1] + pad_size
audio_chunk = audio_chunk.astype(numpy.float32) / numpy.iinfo(numpy.int16).max
audio_chunk = numpy.pad(audio_chunk, ((0, 0), (trim_size, trim_size + pad_size)), mode='constant', constant_values = 0)
audio_chunk = numpy.concatenate([audio_chunk[:,i:i + chunk_size] for i in range(0, audio_chunk_size, step_size)], axis = 0)
audio_chunk = numpy.pad(audio_chunk, ((0, 0), (trim_size, trim_size + pad_size)), mode = 'constant', constant_values = 0)
audio_chunk = numpy.concatenate([ audio_chunk[:,i:i + chunk_size ] for i in range(0, audio_chunk_size, step_size)], axis = 0)
audio_chunk = audio_chunk.reshape((-1, chunk_size))
return audio_chunk, pad_size