try to unify structure of encode_opus_buffer and encode_vpx_buffer (#1107)

This commit is contained in:
Harisreedhar
2026-05-13 16:26:30 +05:30
committed by GitHub
parent 9453a042a1
commit bff222a12f
4 changed files with 41 additions and 36 deletions
+4 -5
View File
@@ -44,22 +44,21 @@ async def receive_vision_frames(websocket : WebSocket) -> AsyncIterator[VisionFr
# TODO: move to facefusion/vpx_encoder.py, throttle loop to avoid spinning on same frame
def run_video_encode_loop(vision_frame_deque : deque[VisionFrame], session_id : SessionId, initial_resolution : Resolution, keyframe_interval : int) -> None:
vpx_encoder = create_vpx_encoder(initial_resolution[0], initial_resolution[1], 4500, 8, 16)
vpx_encoder = create_vpx_encoder(initial_resolution, 4500, 8, 16)
current_resolution = initial_resolution
pts = 0
while vision_frame_deque:
vision_frame = vision_frame_deque[-1]
output_frame = process_vision_frame(vision_frame)
height, width = output_frame.shape[:2]
frame_resolution = (width, height)
frame_resolution = (output_frame.shape[1], output_frame.shape[0])
if frame_resolution[0] != current_resolution[0] or frame_resolution[1] != current_resolution[1]:
if vpx_encoder:
destroy_vpx_encoder(vpx_encoder)
current_resolution = frame_resolution
vpx_encoder = create_vpx_encoder(current_resolution[0], current_resolution[1], 4500, 8, 16)
vpx_encoder = create_vpx_encoder(current_resolution, 4500, 8, 16)
pts = 0
if vpx_encoder:
@@ -69,7 +68,7 @@ def run_video_encode_loop(vision_frame_deque : deque[VisionFrame], session_id :
if pts % keyframe_interval == 0:
vpx_flags = 1
frame_buffer = encode_vpx_buffer(vpx_encoder, yuv_frame.tobytes(), width, height, pts, vpx_flags)
frame_buffer = encode_vpx_buffer(vpx_encoder, yuv_frame.tobytes(), frame_resolution, pts, vpx_flags)
if frame_buffer:
rtc_store.send_rtc_video(session_id, frame_buffer)
+3 -3
View File
@@ -21,10 +21,10 @@ def encode_opus_buffer(opus_encoder : OpusEncoder, pcm_pointer : ctypes.c_void_p
if opus_library:
output_buffer = ctypes.create_string_buffer(4000)
encoded_length = opus_library.opus_encode_float(opus_encoder, pcm_pointer, frame_size, output_buffer, 4000)
encode_length = opus_library.opus_encode_float(opus_encoder, pcm_pointer, frame_size, output_buffer, 4000)
if encoded_length > 0:
audio_buffer = output_buffer.raw[:encoded_length]
if encode_length > 0:
audio_buffer = output_buffer.raw[:encode_length]
return audio_buffer
+26 -20
View File
@@ -3,10 +3,10 @@ import struct
from typing import Optional
from facefusion.libraries import vpx as vpx_module
from facefusion.types import BitRate, VpxEncoder
from facefusion.types import BitRate, Resolution, VpxEncoder
def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_count : int, cpu_count : int) -> Optional[VpxEncoder]:
def create_vpx_encoder(frame_resolution : Resolution, bitrate : BitRate, thread_count : int, cpu_count : int) -> Optional[VpxEncoder]:
vpx_library = vpx_module.create_static_library()
if vpx_library:
@@ -17,8 +17,8 @@ def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_coun
if vpx_library.vpx_codec_enc_config_default(ctypes.byref(vp8_codec), config_buffer, 0) == 0:
struct.pack_into('I', config_buffer, 4, thread_count)
struct.pack_into('I', config_buffer, 12, width)
struct.pack_into('I', config_buffer, 16, height)
struct.pack_into('I', config_buffer, 12, frame_resolution[0])
struct.pack_into('I', config_buffer, 16, frame_resolution[1])
struct.pack_into('I', config_buffer, 28, 1)
struct.pack_into('I', config_buffer, 36, 0)
struct.pack_into('I', config_buffer, 72, 0)
@@ -37,27 +37,33 @@ def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_coun
return None
# TODO this method needs refinement
def encode_vpx_buffer(vpx_encoder : VpxEncoder, yuv_buffer : bytes, width : int, height : int, presentation_timestamp : int, flags : int) -> bytes:
def collect_vpx_frame_buffer(vpx_encoder : VpxEncoder) -> bytes:
vpx_library = vpx_module.create_static_library()
frame_buffer = b''
iterator = ctypes.c_void_p(0)
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
while packet:
if ctypes.c_int.from_address(packet).value == 0:
buffer_pointer = ctypes.c_void_p.from_address(packet + 8).value
buffer_size = ctypes.c_size_t.from_address(packet + 16).value
frame_buffer += ctypes.string_at(buffer_pointer, buffer_size)
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
return frame_buffer
def encode_vpx_buffer(vpx_encoder : VpxEncoder, raw_frame_buffer : bytes, frame_resolution : Resolution, presentation_timestamp : int, flags : int) -> bytes:
vpx_library = vpx_module.create_static_library()
frame_buffer = b''
if vpx_library:
image_buffer = ctypes.create_string_buffer(512)
yuv_string_buffer = ctypes.create_string_buffer(yuv_buffer)
output_buffer = ctypes.create_string_buffer(512)
encode_string_buffer = ctypes.create_string_buffer(raw_frame_buffer)
if vpx_library.vpx_img_wrap(image_buffer, 0x102, width, height, 1, yuv_string_buffer):
if vpx_library.vpx_codec_encode(vpx_encoder, image_buffer, presentation_timestamp, 1, flags, 1) == 0:
iterator = ctypes.c_void_p(0)
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
while packet:
if ctypes.c_int.from_address(packet).value == 0:
buffer_pointer = ctypes.c_void_p.from_address(packet + 8).value
buffer_size = ctypes.c_size_t.from_address(packet + 16).value
frame_buffer += ctypes.string_at(buffer_pointer, buffer_size)
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
if vpx_library.vpx_img_wrap(output_buffer, 0x102, frame_resolution[0], frame_resolution[1], 1, encode_string_buffer) and vpx_library.vpx_codec_encode(vpx_encoder, output_buffer, presentation_timestamp, 1, flags, 1) == 0:
frame_buffer = collect_vpx_frame_buffer(vpx_encoder)
return frame_buffer
+8 -8
View File
@@ -23,29 +23,29 @@ def before_all() -> None:
def test_create_vpx_encoder() -> None:
assert create_vpx_encoder(320, 240, 1000, 8, 16)
assert create_vpx_encoder(0, 0, 0, 0, 0) is None
assert create_vpx_encoder((320, 240), 1000, 8, 16)
assert create_vpx_encoder((0, 0), 0, 0, 0) is None
def test_encode_vpx_buffer() -> None:
vision_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
height, width = vision_frame.shape[:2]
vpx_encoder = create_vpx_encoder(width, height, 1000, 1, 0)
frame_resolution = (vision_frame.shape[1], vision_frame.shape[0])
vpx_encoder = create_vpx_encoder(frame_resolution, 1000, 1, 0)
buffer_valid = cv2.cvtColor(vision_frame, cv2.COLOR_BGR2YUV_I420).tobytes()
buffer_invalid = bytes(0)
if is_linux() or is_windows():
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, width, height, 3, 1)) == 'ce133a1f'
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, frame_resolution, 3, 1)) == 'ce133a1f'
if is_macos():
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, width, height, 3, 1)) == '21c36925'
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, frame_resolution, 3, 1)) == '21c36925'
assert encode_vpx_buffer(vpx_encoder, buffer_invalid, width, height, 0, 0) == b''
assert encode_vpx_buffer(vpx_encoder, buffer_invalid, frame_resolution, 0, 0) == b''
def test_destroy_vpx_encoder() -> None:
vpx_encoder = create_vpx_encoder(320, 240, 1000, 8, 16)
vpx_encoder = create_vpx_encoder((320, 240), 1000, 8, 16)
with patch.object(vpx_module.create_static_library(), 'vpx_codec_destroy') as mock:
destroy_vpx_encoder(vpx_encoder)