mirror of
https://github.com/facefusion/facefusion.git
synced 2026-06-02 10:51:39 +02:00
try to unify structure of encode_opus_buffer and encode_vpx_buffer (#1107)
This commit is contained in:
@@ -44,22 +44,21 @@ async def receive_vision_frames(websocket : WebSocket) -> AsyncIterator[VisionFr
|
||||
|
||||
# TODO: move to facefusion/vpx_encoder.py, throttle loop to avoid spinning on same frame
|
||||
def run_video_encode_loop(vision_frame_deque : deque[VisionFrame], session_id : SessionId, initial_resolution : Resolution, keyframe_interval : int) -> None:
|
||||
vpx_encoder = create_vpx_encoder(initial_resolution[0], initial_resolution[1], 4500, 8, 16)
|
||||
vpx_encoder = create_vpx_encoder(initial_resolution, 4500, 8, 16)
|
||||
current_resolution = initial_resolution
|
||||
pts = 0
|
||||
|
||||
while vision_frame_deque:
|
||||
vision_frame = vision_frame_deque[-1]
|
||||
output_frame = process_vision_frame(vision_frame)
|
||||
height, width = output_frame.shape[:2]
|
||||
frame_resolution = (width, height)
|
||||
frame_resolution = (output_frame.shape[1], output_frame.shape[0])
|
||||
|
||||
if frame_resolution[0] != current_resolution[0] or frame_resolution[1] != current_resolution[1]:
|
||||
if vpx_encoder:
|
||||
destroy_vpx_encoder(vpx_encoder)
|
||||
|
||||
current_resolution = frame_resolution
|
||||
vpx_encoder = create_vpx_encoder(current_resolution[0], current_resolution[1], 4500, 8, 16)
|
||||
vpx_encoder = create_vpx_encoder(current_resolution, 4500, 8, 16)
|
||||
pts = 0
|
||||
|
||||
if vpx_encoder:
|
||||
@@ -69,7 +68,7 @@ def run_video_encode_loop(vision_frame_deque : deque[VisionFrame], session_id :
|
||||
if pts % keyframe_interval == 0:
|
||||
vpx_flags = 1
|
||||
|
||||
frame_buffer = encode_vpx_buffer(vpx_encoder, yuv_frame.tobytes(), width, height, pts, vpx_flags)
|
||||
frame_buffer = encode_vpx_buffer(vpx_encoder, yuv_frame.tobytes(), frame_resolution, pts, vpx_flags)
|
||||
|
||||
if frame_buffer:
|
||||
rtc_store.send_rtc_video(session_id, frame_buffer)
|
||||
|
||||
@@ -21,10 +21,10 @@ def encode_opus_buffer(opus_encoder : OpusEncoder, pcm_pointer : ctypes.c_void_p
|
||||
|
||||
if opus_library:
|
||||
output_buffer = ctypes.create_string_buffer(4000)
|
||||
encoded_length = opus_library.opus_encode_float(opus_encoder, pcm_pointer, frame_size, output_buffer, 4000)
|
||||
encode_length = opus_library.opus_encode_float(opus_encoder, pcm_pointer, frame_size, output_buffer, 4000)
|
||||
|
||||
if encoded_length > 0:
|
||||
audio_buffer = output_buffer.raw[:encoded_length]
|
||||
if encode_length > 0:
|
||||
audio_buffer = output_buffer.raw[:encode_length]
|
||||
|
||||
return audio_buffer
|
||||
|
||||
|
||||
+26
-20
@@ -3,10 +3,10 @@ import struct
|
||||
from typing import Optional
|
||||
|
||||
from facefusion.libraries import vpx as vpx_module
|
||||
from facefusion.types import BitRate, VpxEncoder
|
||||
from facefusion.types import BitRate, Resolution, VpxEncoder
|
||||
|
||||
|
||||
def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_count : int, cpu_count : int) -> Optional[VpxEncoder]:
|
||||
def create_vpx_encoder(frame_resolution : Resolution, bitrate : BitRate, thread_count : int, cpu_count : int) -> Optional[VpxEncoder]:
|
||||
vpx_library = vpx_module.create_static_library()
|
||||
|
||||
if vpx_library:
|
||||
@@ -17,8 +17,8 @@ def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_coun
|
||||
|
||||
if vpx_library.vpx_codec_enc_config_default(ctypes.byref(vp8_codec), config_buffer, 0) == 0:
|
||||
struct.pack_into('I', config_buffer, 4, thread_count)
|
||||
struct.pack_into('I', config_buffer, 12, width)
|
||||
struct.pack_into('I', config_buffer, 16, height)
|
||||
struct.pack_into('I', config_buffer, 12, frame_resolution[0])
|
||||
struct.pack_into('I', config_buffer, 16, frame_resolution[1])
|
||||
struct.pack_into('I', config_buffer, 28, 1)
|
||||
struct.pack_into('I', config_buffer, 36, 0)
|
||||
struct.pack_into('I', config_buffer, 72, 0)
|
||||
@@ -37,27 +37,33 @@ def create_vpx_encoder(width : int, height : int, bitrate : BitRate, thread_coun
|
||||
return None
|
||||
|
||||
|
||||
# TODO this method needs refinement
|
||||
def encode_vpx_buffer(vpx_encoder : VpxEncoder, yuv_buffer : bytes, width : int, height : int, presentation_timestamp : int, flags : int) -> bytes:
|
||||
def collect_vpx_frame_buffer(vpx_encoder : VpxEncoder) -> bytes:
|
||||
vpx_library = vpx_module.create_static_library()
|
||||
frame_buffer = b''
|
||||
iterator = ctypes.c_void_p(0)
|
||||
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
|
||||
|
||||
while packet:
|
||||
if ctypes.c_int.from_address(packet).value == 0:
|
||||
buffer_pointer = ctypes.c_void_p.from_address(packet + 8).value
|
||||
buffer_size = ctypes.c_size_t.from_address(packet + 16).value
|
||||
frame_buffer += ctypes.string_at(buffer_pointer, buffer_size)
|
||||
|
||||
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
|
||||
|
||||
return frame_buffer
|
||||
|
||||
|
||||
def encode_vpx_buffer(vpx_encoder : VpxEncoder, raw_frame_buffer : bytes, frame_resolution : Resolution, presentation_timestamp : int, flags : int) -> bytes:
|
||||
vpx_library = vpx_module.create_static_library()
|
||||
frame_buffer = b''
|
||||
|
||||
if vpx_library:
|
||||
image_buffer = ctypes.create_string_buffer(512)
|
||||
yuv_string_buffer = ctypes.create_string_buffer(yuv_buffer)
|
||||
output_buffer = ctypes.create_string_buffer(512)
|
||||
encode_string_buffer = ctypes.create_string_buffer(raw_frame_buffer)
|
||||
|
||||
if vpx_library.vpx_img_wrap(image_buffer, 0x102, width, height, 1, yuv_string_buffer):
|
||||
if vpx_library.vpx_codec_encode(vpx_encoder, image_buffer, presentation_timestamp, 1, flags, 1) == 0:
|
||||
iterator = ctypes.c_void_p(0)
|
||||
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
|
||||
|
||||
while packet:
|
||||
if ctypes.c_int.from_address(packet).value == 0:
|
||||
buffer_pointer = ctypes.c_void_p.from_address(packet + 8).value
|
||||
buffer_size = ctypes.c_size_t.from_address(packet + 16).value
|
||||
frame_buffer += ctypes.string_at(buffer_pointer, buffer_size)
|
||||
|
||||
packet = vpx_library.vpx_codec_get_cx_data(vpx_encoder, ctypes.byref(iterator))
|
||||
if vpx_library.vpx_img_wrap(output_buffer, 0x102, frame_resolution[0], frame_resolution[1], 1, encode_string_buffer) and vpx_library.vpx_codec_encode(vpx_encoder, output_buffer, presentation_timestamp, 1, flags, 1) == 0:
|
||||
frame_buffer = collect_vpx_frame_buffer(vpx_encoder)
|
||||
|
||||
return frame_buffer
|
||||
|
||||
|
||||
@@ -23,29 +23,29 @@ def before_all() -> None:
|
||||
|
||||
|
||||
def test_create_vpx_encoder() -> None:
|
||||
assert create_vpx_encoder(320, 240, 1000, 8, 16)
|
||||
assert create_vpx_encoder(0, 0, 0, 0, 0) is None
|
||||
assert create_vpx_encoder((320, 240), 1000, 8, 16)
|
||||
assert create_vpx_encoder((0, 0), 0, 0, 0) is None
|
||||
|
||||
|
||||
def test_encode_vpx_buffer() -> None:
|
||||
vision_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
|
||||
height, width = vision_frame.shape[:2]
|
||||
vpx_encoder = create_vpx_encoder(width, height, 1000, 1, 0)
|
||||
frame_resolution = (vision_frame.shape[1], vision_frame.shape[0])
|
||||
vpx_encoder = create_vpx_encoder(frame_resolution, 1000, 1, 0)
|
||||
|
||||
buffer_valid = cv2.cvtColor(vision_frame, cv2.COLOR_BGR2YUV_I420).tobytes()
|
||||
buffer_invalid = bytes(0)
|
||||
|
||||
if is_linux() or is_windows():
|
||||
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, width, height, 3, 1)) == 'ce133a1f'
|
||||
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, frame_resolution, 3, 1)) == 'ce133a1f'
|
||||
|
||||
if is_macos():
|
||||
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, width, height, 3, 1)) == '21c36925'
|
||||
assert create_hash(encode_vpx_buffer(vpx_encoder, buffer_valid, frame_resolution, 3, 1)) == '21c36925'
|
||||
|
||||
assert encode_vpx_buffer(vpx_encoder, buffer_invalid, width, height, 0, 0) == b''
|
||||
assert encode_vpx_buffer(vpx_encoder, buffer_invalid, frame_resolution, 0, 0) == b''
|
||||
|
||||
|
||||
def test_destroy_vpx_encoder() -> None:
|
||||
vpx_encoder = create_vpx_encoder(320, 240, 1000, 8, 16)
|
||||
vpx_encoder = create_vpx_encoder((320, 240), 1000, 8, 16)
|
||||
|
||||
with patch.object(vpx_module.create_static_library(), 'vpx_codec_destroy') as mock:
|
||||
destroy_vpx_encoder(vpx_encoder)
|
||||
|
||||
Reference in New Issue
Block a user