From 99fa2875a97fcfe17741610e8864678e64f7a4a8 Mon Sep 17 00:00:00 2001 From: Harisreedhar <46858047+harisreedhar@users.noreply.github.com> Date: Thu, 7 May 2026 21:34:57 +0530 Subject: [PATCH] Fix SDP line endings and media description builder (#1085) * SDP \r\n fix + build_media_description * improve tests and remove default arguments * improve tests * Refine RTC types and tests with loopback SDP validation * cleanup --- facefusion/rtc.py | 77 +++++++++++++++++++++----------------- facefusion/rtc_bindings.py | 6 +++ facefusion/rtc_store.py | 4 +- facefusion/types.py | 6 ++- tests/stream_helper.py | 28 ++------------ tests/test_rtc.py | 67 +++++++++++++++++++++++++++++++++ 6 files changed, 125 insertions(+), 63 deletions(-) create mode 100644 tests/test_rtc.py diff --git a/facefusion/rtc.py b/facefusion/rtc.py index b5baee25..08386a15 100644 --- a/facefusion/rtc.py +++ b/facefusion/rtc.py @@ -1,5 +1,4 @@ import ctypes -import os import time from functools import lru_cache from typing import Dict, List, Optional @@ -8,7 +7,7 @@ from facefusion.common_helper import is_linux, is_macos, is_windows from facefusion.download import conditional_download_hashes, conditional_download_sources from facefusion.filesystem import resolve_relative_path from facefusion.rtc_bindings import RTC_CONFIGURATION, RTC_PACKETIZER_INIT, init_ctypes -from facefusion.types import DownloadSet, RtcAudioTrack, RtcPeer, RtcSdpAnswer, RtcSdpOffer, RtcVideoTrack +from facefusion.types import DownloadSet, MediaDirection, PeerConnection, RtcAudioTrack, RtcPeer, RtcVideoTrack, SdpAnswer, SdpOffer def resolve_binary_file() -> Optional[str]: @@ -77,7 +76,7 @@ def create_peer_connection( port_range_begin : int = 0, port_range_end : int = 0, max_packet_size : int = 0, - max_message_size : int = 0) -> int: + max_message_size : int = 0) -> PeerConnection: rtc_library = create_static_rtc_library() rtc_configuration = RTC_CONFIGURATION() @@ -100,25 +99,29 @@ def create_peer_connection( return rtc_library.rtcCreatePeerConnection(ctypes.byref(rtc_configuration)) -def add_audio_track(peer_connection : int, sync_source_id : int = 43, canonical_name : bytes = b'audio', payload_type : int = 111, clock_rate : int = 48000) -> RtcAudioTrack: - rtc_library = create_static_rtc_library() - media_description = os.linesep.join( +def build_media_description(media_type : str, payload_type : int, rtp_codec : str, media_direction : MediaDirection, media_id : int) -> bytes: + return '\r\n'.join( [ - 'm=audio 9 UDP/TLS/RTP/SAVPF 111', - 'a=rtpmap:111 opus/48000/2', - 'a=sendonly', - 'a=mid:1', + 'm=' + media_type + ' 9 UDP/TLS/RTP/SAVPF ' + str(payload_type), + 'a=rtpmap:' + str(payload_type) + ' ' + rtp_codec, + 'a=' + media_direction, + 'a=mid:' + str(media_id), 'a=rtcp-mux', '' ]).encode() + +def add_audio_track(peer_connection : PeerConnection, media_direction : MediaDirection) -> RtcAudioTrack: + rtc_library = create_static_rtc_library() + media_description = build_media_description('audio', 111, 'opus/48000/2', media_direction, 1) + audio_track = rtc_library.rtcAddTrack(peer_connection, media_description) audio_packetizer = RTC_PACKETIZER_INIT() - audio_packetizer.ssrc = sync_source_id - audio_packetizer.cname = canonical_name - audio_packetizer.payloadType = payload_type - audio_packetizer.clockRate = clock_rate + audio_packetizer.ssrc = 43 + audio_packetizer.cname = b'audio' + audio_packetizer.payloadType = 111 + audio_packetizer.clockRate = 48000 rtc_library.rtcSetOpusPacketizer(audio_track, ctypes.byref(audio_packetizer)) rtc_library.rtcChainRtcpSrReporter(audio_track) @@ -126,37 +129,41 @@ def add_audio_track(peer_connection : int, sync_source_id : int = 43, canonical_ return audio_track -def add_video_track(peer_connection : int, sync_source_id : int = 42, canonical_name : bytes = b'video', payload_type : int = 96, clock_rate : int = 90000, max_fragment_size : int = 1200, nack_buffer_size : int = 512) -> RtcVideoTrack: +def add_video_track(peer_connection : PeerConnection, media_direction : MediaDirection) -> RtcVideoTrack: rtc_library = create_static_rtc_library() - media_description = os.linesep.join( - [ - 'm=video 9 UDP/TLS/RTP/SAVPF 96', - 'a=rtpmap:96 VP8/90000', - 'a=sendonly', - 'a=mid:0', - 'a=rtcp-mux', - '' - ]).encode() + media_description = build_media_description('video', 96, 'VP8/90000', media_direction, 0) video_track = rtc_library.rtcAddTrack(peer_connection, media_description) video_packetizer = RTC_PACKETIZER_INIT() - video_packetizer.ssrc = sync_source_id - video_packetizer.cname = canonical_name - video_packetizer.payloadType = payload_type - video_packetizer.clockRate = clock_rate - video_packetizer.maxFragmentSize = max_fragment_size + video_packetizer.ssrc = 42 + video_packetizer.cname = b'video' + video_packetizer.payloadType = 96 + video_packetizer.clockRate = 90000 + video_packetizer.maxFragmentSize = 1200 rtc_library.rtcSetVP8Packetizer(video_track, ctypes.byref(video_packetizer)) rtc_library.rtcChainRtcpSrReporter(video_track) - rtc_library.rtcChainRtcpNackResponder(video_track, nack_buffer_size) + rtc_library.rtcChainRtcpNackResponder(video_track, 512) return video_track -def negotiate_sdp(peer_connection : int, sdp_offer : str) -> Optional[str]: +def create_sdp(peer_connection : PeerConnection) -> Optional[SdpOffer]: rtc_library = create_static_rtc_library() - rtc_library.rtcSetRemoteDescription(peer_connection, sdp_offer.encode('utf-8'), b'offer') + rtc_library.rtcSetLocalDescription(peer_connection, b'offer') + buffer_size = 16384 + buffer_string = ctypes.create_string_buffer(buffer_size) + + if rtc_library.rtcGetLocalDescription(peer_connection, buffer_string, buffer_size) > 0: + return buffer_string.value.decode() + + return None + + +def negotiate_sdp(peer_connection : PeerConnection, sdp_offer : SdpOffer) -> Optional[SdpAnswer]: + rtc_library = create_static_rtc_library() + rtc_library.rtcSetRemoteDescription(peer_connection, sdp_offer.encode(), b'offer') buffer_size = 16384 buffer_string = ctypes.create_string_buffer(buffer_size) wait_limit = time.monotonic() + 5 @@ -169,10 +176,10 @@ def negotiate_sdp(peer_connection : int, sdp_offer : str) -> Optional[str]: return None -def handle_whep_offer(peers : List[RtcPeer], sdp_offer : RtcSdpOffer) -> Optional[RtcSdpAnswer]: +def handle_whep_offer(peers : List[RtcPeer], sdp_offer : SdpOffer) -> Optional[SdpAnswer]: peer_connection = create_peer_connection() - audio_track = add_audio_track(peer_connection) - video_track = add_video_track(peer_connection) + audio_track = add_audio_track(peer_connection, 'sendonly') + video_track = add_video_track(peer_connection, 'sendonly') local_sdp = negotiate_sdp(peer_connection, sdp_offer) if local_sdp: diff --git a/facefusion/rtc_bindings.py b/facefusion/rtc_bindings.py index 44e2045c..81621e51 100644 --- a/facefusion/rtc_bindings.py +++ b/facefusion/rtc_bindings.py @@ -52,6 +52,9 @@ def init_ctypes(rtc_library : ctypes.CDLL) -> ctypes.CDLL: rtc_library.rtcDeletePeerConnection.argtypes = [ ctypes.c_int ] rtc_library.rtcDeletePeerConnection.restype = ctypes.c_int + rtc_library.rtcSetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p ] + rtc_library.rtcSetLocalDescription.restype = ctypes.c_int + rtc_library.rtcSetRemoteDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p ] rtc_library.rtcSetRemoteDescription.restype = ctypes.c_int @@ -79,6 +82,9 @@ def init_ctypes(rtc_library : ctypes.CDLL) -> ctypes.CDLL: rtc_library.rtcGetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p, ctypes.c_int ] rtc_library.rtcGetLocalDescription.restype = ctypes.c_int + rtc_library.rtcSetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p ] + rtc_library.rtcSetLocalDescription.restype = ctypes.c_int + rtc_library.rtcSetOpusPacketizer.argtypes = [ ctypes.c_int, ctypes.POINTER(RTC_PACKETIZER_INIT) ] rtc_library.rtcSetOpusPacketizer.restype = ctypes.c_int diff --git a/facefusion/rtc_store.py b/facefusion/rtc_store.py index 4be9e271..ceb0a1a7 100644 --- a/facefusion/rtc_store.py +++ b/facefusion/rtc_store.py @@ -1,7 +1,7 @@ from typing import List, Optional from facefusion import rtc -from facefusion.types import RtcPeer, RtcSdpAnswer, RtcSdpOffer, RtcStreamStore, SessionId +from facefusion.types import RtcPeer, RtcStreamStore, SdpAnswer, SdpOffer, SessionId RTC_STREAMS : RtcStreamStore = {} @@ -21,7 +21,7 @@ def destroy_rtc_stream(session_id : SessionId) -> None: rtc.delete_peers(peers) -def add_rtc_viewer(session_id : SessionId, sdp_offer : RtcSdpOffer) -> Optional[RtcSdpAnswer]: +def add_rtc_viewer(session_id : SessionId, sdp_offer : SdpOffer) -> Optional[SdpAnswer]: if session_id in RTC_STREAMS: return rtc.handle_whep_offer(RTC_STREAMS.get(session_id), sdp_offer) diff --git a/facefusion/types.py b/facefusion/types.py index 165d1856..f6082ed0 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -281,8 +281,10 @@ RtcPeer = TypedDict('RtcPeer', RtcVideoTrack : TypeAlias = int RtcAudioTrack : TypeAlias = int -RtcSdpOffer : TypeAlias = str -RtcSdpAnswer : TypeAlias = str +PeerConnection : TypeAlias = int +SdpOffer : TypeAlias = str +SdpAnswer : TypeAlias = str +MediaDirection : TypeAlias = Literal['sendonly', 'recvonly', 'sendrecv', 'inactive'] RtcStreamStore : TypeAlias = Dict[str, List[RtcPeer]] ModelOptions : TypeAlias = Dict[str, Any] diff --git a/tests/stream_helper.py b/tests/stream_helper.py index c22c927d..6947bd99 100644 --- a/tests/stream_helper.py +++ b/tests/stream_helper.py @@ -1,5 +1,4 @@ import ctypes -import os import threading import time from typing import Optional @@ -7,33 +6,15 @@ from typing import Optional from starlette.testclient import TestClient from facefusion import rtc -from facefusion.types import RtcSdpOffer +from facefusion.types import SdpOffer -# TODO: reuse media description building from rtc.py -def create_sdp_offer() -> Optional[RtcSdpOffer]: +def create_sdp_offer() -> Optional[SdpOffer]: rtc_library = rtc.create_static_rtc_library() peer_connection = rtc.create_peer_connection(disable_auto_negotiation = True) - media_video = os.linesep.join( - [ - 'm=video 9 UDP/TLS/RTP/SAVPF 96', - 'a=rtpmap:96 VP8/90000', - 'a=recvonly', - 'a=mid:0', - '' - ]).encode() - media_audio = os.linesep.join( - [ - 'm=audio 9 UDP/TLS/RTP/SAVPF 111', - 'a=rtpmap:111 opus/48000/2', - 'a=recvonly', - 'a=mid:1', - '' - ]).encode() - - rtc_library.rtcAddTrack(peer_connection, media_video) - rtc_library.rtcAddTrack(peer_connection, media_audio) + rtc_library.rtcAddTrack(peer_connection, rtc.build_media_description('video', 96, 'VP8/90000', 'recvonly', 0)) + rtc_library.rtcAddTrack(peer_connection, rtc.build_media_description('audio', 111, 'opus/48000/2', 'recvonly', 1)) rtc_library.rtcSetLocalDescription(peer_connection, b'offer') buffer_size = 16384 @@ -44,7 +25,6 @@ def create_sdp_offer() -> Optional[RtcSdpOffer]: if rtc_library.rtcGetLocalDescription(peer_connection, buffer_string, buffer_size) > 0: sdp = buffer_string.value.decode() rtc_library.rtcDeletePeerConnection(peer_connection) - #TODO: use return buffer_string.value.decode() return sdp time.sleep(0.05) diff --git a/tests/test_rtc.py b/tests/test_rtc.py new file mode 100644 index 00000000..c0704795 --- /dev/null +++ b/tests/test_rtc.py @@ -0,0 +1,67 @@ +import pytest + +from facefusion import rtc + + +@pytest.fixture(scope = 'module') +def before_all() -> None: + rtc.pre_check() + + +def test_build_media_description() -> None: + assert rtc.build_media_description('audio', 111, 'opus/48000/2', 'sendonly', 1) == b'm=audio 9 UDP/TLS/RTP/SAVPF 111\r\na=rtpmap:111 opus/48000/2\r\na=sendonly\r\na=mid:1\r\na=rtcp-mux\r\n' + assert rtc.build_media_description('video', 96, 'VP8/90000', 'recvonly', 0) == b'm=video 9 UDP/TLS/RTP/SAVPF 96\r\na=rtpmap:96 VP8/90000\r\na=recvonly\r\na=mid:0\r\na=rtcp-mux\r\n' + + +def test_create_peer_connection() -> None: + peer_connection = rtc.create_peer_connection() + rtc_library = rtc.create_static_rtc_library() + + assert peer_connection > 0 + assert rtc_library.rtcDeletePeerConnection(peer_connection) == 0 + + +def test_add_audio_track() -> None: + rtc_library = rtc.create_static_rtc_library() + + sender_connection = rtc.create_peer_connection() + sender_audio_track = rtc.add_audio_track(sender_connection, 'sendonly') + sdp_offer = rtc.create_sdp(sender_connection) + + receiver_connection = rtc.create_peer_connection() + receiver_audio_track = rtc.add_audio_track(receiver_connection, 'recvonly') + sdp_answer = rtc.negotiate_sdp(receiver_connection, sdp_offer) + + assert sender_audio_track > 0 + assert receiver_audio_track > 0 + + assert 'm=audio' in sdp_offer + assert 'm=audio' in sdp_answer + assert 'opus/48000/2' in sdp_offer + assert 'opus/48000/2' in sdp_answer + + assert rtc_library.rtcDeletePeerConnection(sender_connection) == 0 + assert rtc_library.rtcDeletePeerConnection(receiver_connection) == 0 + + +def test_add_video_track() -> None: + rtc_library = rtc.create_static_rtc_library() + + sender_connection = rtc.create_peer_connection() + sender_video_track = rtc.add_video_track(sender_connection, 'sendonly') + sdp_offer = rtc.create_sdp(sender_connection) + + receiver_connection = rtc.create_peer_connection() + receiver_video_track = rtc.add_video_track(receiver_connection, 'recvonly') + sdp_answer = rtc.negotiate_sdp(receiver_connection, sdp_offer) + + assert sender_video_track > 0 + assert receiver_video_track > 0 + + assert 'm=video' in sdp_offer + assert 'm=video' in sdp_answer + assert 'VP8/90000' in sdp_offer + assert 'VP8/90000' in sdp_answer + + assert rtc_library.rtcDeletePeerConnection(sender_connection) == 0 + assert rtc_library.rtcDeletePeerConnection(receiver_connection) == 0