Fix SDP line endings and media description builder (#1085)

* SDP \r\n fix + build_media_description

* improve tests and remove default arguments

* improve tests

* Refine RTC types and tests with loopback SDP validation

* cleanup
This commit is contained in:
Harisreedhar
2026-05-07 21:34:57 +05:30
committed by henryruhs
parent b9445fd3a4
commit 99fa2875a9
6 changed files with 125 additions and 63 deletions
+42 -35
View File
@@ -1,5 +1,4 @@
import ctypes
import os
import time
from functools import lru_cache
from typing import Dict, List, Optional
@@ -8,7 +7,7 @@ from facefusion.common_helper import is_linux, is_macos, is_windows
from facefusion.download import conditional_download_hashes, conditional_download_sources
from facefusion.filesystem import resolve_relative_path
from facefusion.rtc_bindings import RTC_CONFIGURATION, RTC_PACKETIZER_INIT, init_ctypes
from facefusion.types import DownloadSet, RtcAudioTrack, RtcPeer, RtcSdpAnswer, RtcSdpOffer, RtcVideoTrack
from facefusion.types import DownloadSet, MediaDirection, PeerConnection, RtcAudioTrack, RtcPeer, RtcVideoTrack, SdpAnswer, SdpOffer
def resolve_binary_file() -> Optional[str]:
@@ -77,7 +76,7 @@ def create_peer_connection(
port_range_begin : int = 0,
port_range_end : int = 0,
max_packet_size : int = 0,
max_message_size : int = 0) -> int:
max_message_size : int = 0) -> PeerConnection:
rtc_library = create_static_rtc_library()
rtc_configuration = RTC_CONFIGURATION()
@@ -100,25 +99,29 @@ def create_peer_connection(
return rtc_library.rtcCreatePeerConnection(ctypes.byref(rtc_configuration))
def add_audio_track(peer_connection : int, sync_source_id : int = 43, canonical_name : bytes = b'audio', payload_type : int = 111, clock_rate : int = 48000) -> RtcAudioTrack:
rtc_library = create_static_rtc_library()
media_description = os.linesep.join(
def build_media_description(media_type : str, payload_type : int, rtp_codec : str, media_direction : MediaDirection, media_id : int) -> bytes:
return '\r\n'.join(
[
'm=audio 9 UDP/TLS/RTP/SAVPF 111',
'a=rtpmap:111 opus/48000/2',
'a=sendonly',
'a=mid:1',
'm=' + media_type + ' 9 UDP/TLS/RTP/SAVPF ' + str(payload_type),
'a=rtpmap:' + str(payload_type) + ' ' + rtp_codec,
'a=' + media_direction,
'a=mid:' + str(media_id),
'a=rtcp-mux',
''
]).encode()
def add_audio_track(peer_connection : PeerConnection, media_direction : MediaDirection) -> RtcAudioTrack:
rtc_library = create_static_rtc_library()
media_description = build_media_description('audio', 111, 'opus/48000/2', media_direction, 1)
audio_track = rtc_library.rtcAddTrack(peer_connection, media_description)
audio_packetizer = RTC_PACKETIZER_INIT()
audio_packetizer.ssrc = sync_source_id
audio_packetizer.cname = canonical_name
audio_packetizer.payloadType = payload_type
audio_packetizer.clockRate = clock_rate
audio_packetizer.ssrc = 43
audio_packetizer.cname = b'audio'
audio_packetizer.payloadType = 111
audio_packetizer.clockRate = 48000
rtc_library.rtcSetOpusPacketizer(audio_track, ctypes.byref(audio_packetizer))
rtc_library.rtcChainRtcpSrReporter(audio_track)
@@ -126,37 +129,41 @@ def add_audio_track(peer_connection : int, sync_source_id : int = 43, canonical_
return audio_track
def add_video_track(peer_connection : int, sync_source_id : int = 42, canonical_name : bytes = b'video', payload_type : int = 96, clock_rate : int = 90000, max_fragment_size : int = 1200, nack_buffer_size : int = 512) -> RtcVideoTrack:
def add_video_track(peer_connection : PeerConnection, media_direction : MediaDirection) -> RtcVideoTrack:
rtc_library = create_static_rtc_library()
media_description = os.linesep.join(
[
'm=video 9 UDP/TLS/RTP/SAVPF 96',
'a=rtpmap:96 VP8/90000',
'a=sendonly',
'a=mid:0',
'a=rtcp-mux',
''
]).encode()
media_description = build_media_description('video', 96, 'VP8/90000', media_direction, 0)
video_track = rtc_library.rtcAddTrack(peer_connection, media_description)
video_packetizer = RTC_PACKETIZER_INIT()
video_packetizer.ssrc = sync_source_id
video_packetizer.cname = canonical_name
video_packetizer.payloadType = payload_type
video_packetizer.clockRate = clock_rate
video_packetizer.maxFragmentSize = max_fragment_size
video_packetizer.ssrc = 42
video_packetizer.cname = b'video'
video_packetizer.payloadType = 96
video_packetizer.clockRate = 90000
video_packetizer.maxFragmentSize = 1200
rtc_library.rtcSetVP8Packetizer(video_track, ctypes.byref(video_packetizer))
rtc_library.rtcChainRtcpSrReporter(video_track)
rtc_library.rtcChainRtcpNackResponder(video_track, nack_buffer_size)
rtc_library.rtcChainRtcpNackResponder(video_track, 512)
return video_track
def negotiate_sdp(peer_connection : int, sdp_offer : str) -> Optional[str]:
def create_sdp(peer_connection : PeerConnection) -> Optional[SdpOffer]:
rtc_library = create_static_rtc_library()
rtc_library.rtcSetRemoteDescription(peer_connection, sdp_offer.encode('utf-8'), b'offer')
rtc_library.rtcSetLocalDescription(peer_connection, b'offer')
buffer_size = 16384
buffer_string = ctypes.create_string_buffer(buffer_size)
if rtc_library.rtcGetLocalDescription(peer_connection, buffer_string, buffer_size) > 0:
return buffer_string.value.decode()
return None
def negotiate_sdp(peer_connection : PeerConnection, sdp_offer : SdpOffer) -> Optional[SdpAnswer]:
rtc_library = create_static_rtc_library()
rtc_library.rtcSetRemoteDescription(peer_connection, sdp_offer.encode(), b'offer')
buffer_size = 16384
buffer_string = ctypes.create_string_buffer(buffer_size)
wait_limit = time.monotonic() + 5
@@ -169,10 +176,10 @@ def negotiate_sdp(peer_connection : int, sdp_offer : str) -> Optional[str]:
return None
def handle_whep_offer(peers : List[RtcPeer], sdp_offer : RtcSdpOffer) -> Optional[RtcSdpAnswer]:
def handle_whep_offer(peers : List[RtcPeer], sdp_offer : SdpOffer) -> Optional[SdpAnswer]:
peer_connection = create_peer_connection()
audio_track = add_audio_track(peer_connection)
video_track = add_video_track(peer_connection)
audio_track = add_audio_track(peer_connection, 'sendonly')
video_track = add_video_track(peer_connection, 'sendonly')
local_sdp = negotiate_sdp(peer_connection, sdp_offer)
if local_sdp:
+6
View File
@@ -52,6 +52,9 @@ def init_ctypes(rtc_library : ctypes.CDLL) -> ctypes.CDLL:
rtc_library.rtcDeletePeerConnection.argtypes = [ ctypes.c_int ]
rtc_library.rtcDeletePeerConnection.restype = ctypes.c_int
rtc_library.rtcSetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p ]
rtc_library.rtcSetLocalDescription.restype = ctypes.c_int
rtc_library.rtcSetRemoteDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p ]
rtc_library.rtcSetRemoteDescription.restype = ctypes.c_int
@@ -79,6 +82,9 @@ def init_ctypes(rtc_library : ctypes.CDLL) -> ctypes.CDLL:
rtc_library.rtcGetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p, ctypes.c_int ]
rtc_library.rtcGetLocalDescription.restype = ctypes.c_int
rtc_library.rtcSetLocalDescription.argtypes = [ ctypes.c_int, ctypes.c_char_p ]
rtc_library.rtcSetLocalDescription.restype = ctypes.c_int
rtc_library.rtcSetOpusPacketizer.argtypes = [ ctypes.c_int, ctypes.POINTER(RTC_PACKETIZER_INIT) ]
rtc_library.rtcSetOpusPacketizer.restype = ctypes.c_int
+2 -2
View File
@@ -1,7 +1,7 @@
from typing import List, Optional
from facefusion import rtc
from facefusion.types import RtcPeer, RtcSdpAnswer, RtcSdpOffer, RtcStreamStore, SessionId
from facefusion.types import RtcPeer, RtcStreamStore, SdpAnswer, SdpOffer, SessionId
RTC_STREAMS : RtcStreamStore = {}
@@ -21,7 +21,7 @@ def destroy_rtc_stream(session_id : SessionId) -> None:
rtc.delete_peers(peers)
def add_rtc_viewer(session_id : SessionId, sdp_offer : RtcSdpOffer) -> Optional[RtcSdpAnswer]:
def add_rtc_viewer(session_id : SessionId, sdp_offer : SdpOffer) -> Optional[SdpAnswer]:
if session_id in RTC_STREAMS:
return rtc.handle_whep_offer(RTC_STREAMS.get(session_id), sdp_offer)
+4 -2
View File
@@ -281,8 +281,10 @@ RtcPeer = TypedDict('RtcPeer',
RtcVideoTrack : TypeAlias = int
RtcAudioTrack : TypeAlias = int
RtcSdpOffer : TypeAlias = str
RtcSdpAnswer : TypeAlias = str
PeerConnection : TypeAlias = int
SdpOffer : TypeAlias = str
SdpAnswer : TypeAlias = str
MediaDirection : TypeAlias = Literal['sendonly', 'recvonly', 'sendrecv', 'inactive']
RtcStreamStore : TypeAlias = Dict[str, List[RtcPeer]]
ModelOptions : TypeAlias = Dict[str, Any]
+4 -24
View File
@@ -1,5 +1,4 @@
import ctypes
import os
import threading
import time
from typing import Optional
@@ -7,33 +6,15 @@ from typing import Optional
from starlette.testclient import TestClient
from facefusion import rtc
from facefusion.types import RtcSdpOffer
from facefusion.types import SdpOffer
# TODO: reuse media description building from rtc.py
def create_sdp_offer() -> Optional[RtcSdpOffer]:
def create_sdp_offer() -> Optional[SdpOffer]:
rtc_library = rtc.create_static_rtc_library()
peer_connection = rtc.create_peer_connection(disable_auto_negotiation = True)
media_video = os.linesep.join(
[
'm=video 9 UDP/TLS/RTP/SAVPF 96',
'a=rtpmap:96 VP8/90000',
'a=recvonly',
'a=mid:0',
''
]).encode()
media_audio = os.linesep.join(
[
'm=audio 9 UDP/TLS/RTP/SAVPF 111',
'a=rtpmap:111 opus/48000/2',
'a=recvonly',
'a=mid:1',
''
]).encode()
rtc_library.rtcAddTrack(peer_connection, media_video)
rtc_library.rtcAddTrack(peer_connection, media_audio)
rtc_library.rtcAddTrack(peer_connection, rtc.build_media_description('video', 96, 'VP8/90000', 'recvonly', 0))
rtc_library.rtcAddTrack(peer_connection, rtc.build_media_description('audio', 111, 'opus/48000/2', 'recvonly', 1))
rtc_library.rtcSetLocalDescription(peer_connection, b'offer')
buffer_size = 16384
@@ -44,7 +25,6 @@ def create_sdp_offer() -> Optional[RtcSdpOffer]:
if rtc_library.rtcGetLocalDescription(peer_connection, buffer_string, buffer_size) > 0:
sdp = buffer_string.value.decode()
rtc_library.rtcDeletePeerConnection(peer_connection)
#TODO: use return buffer_string.value.decode()
return sdp
time.sleep(0.05)
+67
View File
@@ -0,0 +1,67 @@
import pytest
from facefusion import rtc
@pytest.fixture(scope = 'module')
def before_all() -> None:
rtc.pre_check()
def test_build_media_description() -> None:
assert rtc.build_media_description('audio', 111, 'opus/48000/2', 'sendonly', 1) == b'm=audio 9 UDP/TLS/RTP/SAVPF 111\r\na=rtpmap:111 opus/48000/2\r\na=sendonly\r\na=mid:1\r\na=rtcp-mux\r\n'
assert rtc.build_media_description('video', 96, 'VP8/90000', 'recvonly', 0) == b'm=video 9 UDP/TLS/RTP/SAVPF 96\r\na=rtpmap:96 VP8/90000\r\na=recvonly\r\na=mid:0\r\na=rtcp-mux\r\n'
def test_create_peer_connection() -> None:
peer_connection = rtc.create_peer_connection()
rtc_library = rtc.create_static_rtc_library()
assert peer_connection > 0
assert rtc_library.rtcDeletePeerConnection(peer_connection) == 0
def test_add_audio_track() -> None:
rtc_library = rtc.create_static_rtc_library()
sender_connection = rtc.create_peer_connection()
sender_audio_track = rtc.add_audio_track(sender_connection, 'sendonly')
sdp_offer = rtc.create_sdp(sender_connection)
receiver_connection = rtc.create_peer_connection()
receiver_audio_track = rtc.add_audio_track(receiver_connection, 'recvonly')
sdp_answer = rtc.negotiate_sdp(receiver_connection, sdp_offer)
assert sender_audio_track > 0
assert receiver_audio_track > 0
assert 'm=audio' in sdp_offer
assert 'm=audio' in sdp_answer
assert 'opus/48000/2' in sdp_offer
assert 'opus/48000/2' in sdp_answer
assert rtc_library.rtcDeletePeerConnection(sender_connection) == 0
assert rtc_library.rtcDeletePeerConnection(receiver_connection) == 0
def test_add_video_track() -> None:
rtc_library = rtc.create_static_rtc_library()
sender_connection = rtc.create_peer_connection()
sender_video_track = rtc.add_video_track(sender_connection, 'sendonly')
sdp_offer = rtc.create_sdp(sender_connection)
receiver_connection = rtc.create_peer_connection()
receiver_video_track = rtc.add_video_track(receiver_connection, 'recvonly')
sdp_answer = rtc.negotiate_sdp(receiver_connection, sdp_offer)
assert sender_video_track > 0
assert receiver_video_track > 0
assert 'm=video' in sdp_offer
assert 'm=video' in sdp_answer
assert 'VP8/90000' in sdp_offer
assert 'VP8/90000' in sdp_answer
assert rtc_library.rtcDeletePeerConnection(sender_connection) == 0
assert rtc_library.rtcDeletePeerConnection(receiver_connection) == 0