diff --git a/facefusion/apis/stream_helper.py b/facefusion/apis/stream_helper.py index b3eb982a..ec2281ea 100644 --- a/facefusion/apis/stream_helper.py +++ b/facefusion/apis/stream_helper.py @@ -1,5 +1,4 @@ import asyncio -import ctypes from collections import deque from collections.abc import AsyncIterator from typing import Tuple @@ -144,8 +143,7 @@ async def handle_video_stream(websocket : WebSocket) -> None: while len(audio_temp) >= 1920: audio_chunk = audio_temp[:1920] audio_temp = audio_temp[1920:] - pcm_pointer = audio_chunk.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) - audio_buffer = encode_opus_buffer(opus_encoder, pcm_pointer, 960) + audio_buffer = encode_opus_buffer(opus_encoder, audio_chunk.tobytes(), 960) if audio_buffer: rtc_store.send_rtc_audio(session_id, audio_buffer, audio_timestamp) diff --git a/facefusion/audio_encoder.py b/facefusion/audio_encoder.py index 26521039..c9dfcde3 100644 --- a/facefusion/audio_encoder.py +++ b/facefusion/audio_encoder.py @@ -14,13 +14,14 @@ def create_opus_encoder(sample_rate : int, channel_total : int) -> Optional[Opus return None -def encode_opus_buffer(opus_encoder : OpusEncoder, pcm_pointer : ctypes.c_void_p, frame_size : int) -> bytes: +def encode_opus_buffer(opus_encoder : OpusEncoder, input_buffer : bytes, frame_size : int) -> bytes: opus_library = opus_module.create_static_library() output_buffer = b'' if opus_library: temp_buffer = ctypes.create_string_buffer(4000) - encode_length = opus_library.opus_encode_float(opus_encoder, pcm_pointer, frame_size, temp_buffer, 4000) + encode_buffer = ctypes.cast(ctypes.create_string_buffer(input_buffer), ctypes.POINTER(ctypes.c_float)) + encode_length = opus_library.opus_encode_float(opus_encoder, encode_buffer, frame_size, temp_buffer, 4000) if encode_length: output_buffer = temp_buffer.raw[:encode_length] diff --git a/tests/test_audio_encoder.py b/tests/test_audio_encoder.py index 051c701d..bff9d19d 100644 --- a/tests/test_audio_encoder.py +++ b/tests/test_audio_encoder.py @@ -1,4 +1,3 @@ -import ctypes from unittest.mock import patch import numpy @@ -30,15 +29,15 @@ def test_create_opus_encoder() -> None: def test_encode_opus_buffer() -> None: audio_buffer = read_audio_buffer(get_test_example_file('source.mp3'), 48000, 16, 2) - pcm_samples = numpy.frombuffer(audio_buffer, dtype = numpy.int16).astype(numpy.float32) / 32768.0 - pcm_pointer = pcm_samples[:1920].ctypes.data_as(ctypes.POINTER(ctypes.c_float)) + input_samples = numpy.frombuffer(audio_buffer, dtype = numpy.int16).astype(numpy.float32) / 32768.0 + input_buffer = input_samples[:1920].tobytes() opus_encoder = create_opus_encoder(48000, 2) if is_linux() or is_windows(): - assert create_hash(encode_opus_buffer(opus_encoder, pcm_pointer, 960)) == '8abe71cf' + assert create_hash(encode_opus_buffer(opus_encoder, input_buffer, 960)) == '8abe71cf' if is_macos(): - assert create_hash(encode_opus_buffer(opus_encoder, pcm_pointer, 960)) == '8ecd1108' + assert create_hash(encode_opus_buffer(opus_encoder, input_buffer, 960)) == '8ecd1108' def test_destroy_opus_encoder() -> None: