diff --git a/facefusion/voice_extractor.py b/facefusion/voice_extractor.py index d73bae3f..47970a3e 100644 --- a/facefusion/voice_extractor.py +++ b/facefusion/voice_extractor.py @@ -65,13 +65,13 @@ def create_static_hanning_window(filter_size : int) -> Any: def batch_extract_voice(audio : Audio, chunk_size : int, overlap_size : float) -> Audio: step_size = int(chunk_size * (1 - overlap_size)) audio_total = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) - audio_count = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + chunk_total = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) for start in range(0, audio.shape[0], step_size): end = min(start + chunk_size, audio.shape[0]) audio_total[start:end, ...] += extract_voice(audio[start:end, ...]) - audio_count[start:end, ...] += 1 - audio = audio_total / audio_count + chunk_total[start:end, ...] += 1 + audio = audio_total / chunk_total return audio