From 95435f842c12b598a6dc39bbda2cb9723c08ba3d Mon Sep 17 00:00:00 2001 From: Henry Ruhs Date: Fri, 15 May 2026 19:14:36 +0200 Subject: [PATCH] use datachannel to create proper rtc_track_init (#1117) * use datachannel to create proper rtc_track_init * fix lint * fix lint --- facefusion/libraries/datachannel.py | 20 ++++++++ facefusion/rtc.py | 72 ++++++++++++++--------------- facefusion/types.py | 4 +- tests/test_rtc.py | 13 ++---- 4 files changed, 61 insertions(+), 48 deletions(-) diff --git a/facefusion/libraries/datachannel.py b/facefusion/libraries/datachannel.py index 59618926..0d67ec4b 100644 --- a/facefusion/libraries/datachannel.py +++ b/facefusion/libraries/datachannel.py @@ -182,6 +182,8 @@ def init_ctypes(library : ctypes.CDLL) -> ctypes.CDLL: library.rtcAddTrack.argtypes = [ ctypes.c_int, ctypes.c_char_p ] library.rtcAddTrack.restype = ctypes.c_int + library.rtcAddTrackEx.restype = ctypes.c_int + library.rtcSendMessage.argtypes = [ ctypes.c_int, ctypes.c_void_p, ctypes.c_int ] library.rtcSendMessage.restype = ctypes.c_int @@ -246,6 +248,24 @@ def define_rtc_configuration() -> ctypes.Structure: })() +def define_rtc_track_init() -> ctypes.Structure: + return type('RTC_TRACK_INIT', (ctypes.Structure,), + { + '_fields_': + [ + ('direction', ctypes.c_int), + ('codec', ctypes.c_int), + ('payloadType', ctypes.c_int), + ('ssrc', ctypes.c_uint32), + ('mid', ctypes.c_char_p), + ('name', ctypes.c_char_p), + ('msid', ctypes.c_char_p), + ('trackId', ctypes.c_char_p), + ('profile', ctypes.c_char_p) + ] + })() + + def define_rtc_packetizer_init() -> ctypes.Structure: return type('RTC_PACKETIZER_INIT', (ctypes.Structure,), { diff --git a/facefusion/rtc.py b/facefusion/rtc.py index 856af9c2..47e31edd 100644 --- a/facefusion/rtc.py +++ b/facefusion/rtc.py @@ -3,7 +3,7 @@ import time from typing import Dict, List, Optional from facefusion.libraries import datachannel as datachannel_module -from facefusion.types import AudioCodec, MediaDirection, PeerConnection, RtcAudioTrack, RtcPeer, RtcVideoTrack, SdpAnswer, SdpOffer, VideoCodec +from facefusion.types import AudioCodec, MediaDirection, PeerConnection, RtcAudioTrack, RtcPeer, RtcTrackInit, RtcVideoTrack, SdpAnswer, SdpOffer, VideoCodec def create_peer_connection() -> PeerConnection: @@ -92,8 +92,8 @@ def delete_peers(rtc_peers : List[RtcPeer]) -> None: def add_audio_track(peer_connection : PeerConnection, media_direction : MediaDirection, audio_codec : AudioCodec, payload_type : int) -> RtcAudioTrack: datachannel_library = datachannel_module.create_static_library() - audio_description = create_audio_description(media_direction, audio_codec, payload_type) - audio_track = datachannel_library.rtcAddTrack(peer_connection, audio_description) + audio_track_init = create_audio_track_init(media_direction, audio_codec, payload_type) + audio_track = datachannel_library.rtcAddTrackEx(peer_connection, audio_track_init) audio_packetizer = datachannel_module.define_rtc_packetizer_init() audio_packetizer.ssrc = 43 @@ -111,8 +111,8 @@ def add_audio_track(peer_connection : PeerConnection, media_direction : MediaDir def add_video_track(peer_connection : PeerConnection, media_direction : MediaDirection, video_codec : VideoCodec, payload_type : int) -> RtcVideoTrack: datachannel_library = datachannel_module.create_static_library() - video_description = create_video_description(media_direction, video_codec, payload_type) - video_track = datachannel_library.rtcAddTrack(peer_connection, video_description) + video_track_init = create_video_track_init(media_direction, video_codec, payload_type) + video_track = datachannel_library.rtcAddTrackEx(peer_connection, video_track_init) video_packetizer = datachannel_module.define_rtc_packetizer_init() video_packetizer.ssrc = 42 @@ -134,46 +134,42 @@ def add_video_track(peer_connection : PeerConnection, media_direction : MediaDir return video_track -#TODO: needs revision -def create_audio_description(media_direction : MediaDirection, audio_codec : AudioCodec, payload_type : int) -> bytes: - rtp_codec = 'opus/48000/2' +def create_audio_track_init(media_direction : MediaDirection, audio_codec : AudioCodec, payload_type : int) -> RtcTrackInit: + track_init = datachannel_module.define_rtc_track_init() + + if media_direction == 'sendonly': + track_init.direction = 1 + if media_direction == 'recvonly': + track_init.direction = 2 if audio_codec == 'opus': - rtp_codec = 'opus/48000/2' + track_init.codec = 128 - lines =\ - [ - 'm=audio 9 UDP/TLS/RTP/SAVPF ' + str(payload_type), - 'a=rtpmap:' + str(payload_type) + ' ' + rtp_codec, - 'a=rtcp-fb:' + str(payload_type) + ' nack', - 'a=rtcp-fb:' + str(payload_type) + ' nack pli', - 'a=' + media_direction, - 'a=mid:1', - 'a=rtcp-mux', - '' - ] - return '\r\n'.join(lines).encode() + track_init.payloadType = payload_type + track_init.ssrc = 43 + track_init.name = b'audio' + track_init.mid = b'1' + + return ctypes.byref(track_init) -#TODO: needs revision -def create_video_description(media_direction : MediaDirection, video_codec : VideoCodec, payload_type : int) -> bytes: - rtp_codec = 'AV1/90000' +def create_video_track_init(media_direction : MediaDirection, video_codec : VideoCodec, payload_type : int) -> RtcTrackInit: + track_init = datachannel_module.define_rtc_track_init() + + if media_direction == 'sendonly': + track_init.direction = 1 + if media_direction == 'recvonly': + track_init.direction = 2 if video_codec == 'av1': - rtp_codec = 'AV1/90000' + track_init.codec = 4 if video_codec == 'vp8': - rtp_codec = 'VP8/90000' + track_init.codec = 1 - lines =\ - [ - 'm=video 9 UDP/TLS/RTP/SAVPF ' + str(payload_type), - 'a=rtpmap:' + str(payload_type) + ' ' + rtp_codec, - 'a=rtcp-fb:' + str(payload_type) + ' nack', - 'a=rtcp-fb:' + str(payload_type) + ' nack pli', - 'a=' + media_direction, - 'a=mid:0', - 'a=rtcp-mux', - '' - ] - return '\r\n'.join(lines).encode() + track_init.payloadType = payload_type + track_init.ssrc = 42 + track_init.name = b'video' + track_init.mid = b'0' + + return ctypes.byref(track_init) #TODO: needs revision diff --git a/facefusion/types.py b/facefusion/types.py index 9856e686..4d01b68d 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -274,7 +274,9 @@ StreamMode = Literal['udp', 'v4l2'] PeerConnection : TypeAlias = int SdpOffer : TypeAlias = str SdpAnswer : TypeAlias = str -MediaDirection : TypeAlias = Literal['sendonly', 'recvonly', 'sendrecv', 'inactive'] +MediaDirection : TypeAlias = Literal['sendonly', 'recvonly'] + +RtcTrackInit : TypeAlias = Any RtcVideoTrack : TypeAlias = int RtcAudioTrack : TypeAlias = int diff --git a/tests/test_rtc.py b/tests/test_rtc.py index c09e7f38..63d024b4 100644 --- a/tests/test_rtc.py +++ b/tests/test_rtc.py @@ -30,11 +30,12 @@ def test_create_sdp_offer() -> None: rtc.add_audio_track(peer_connection, 'sendonly', 'opus', 111) sdp_offer = rtc.create_sdp_offer(peer_connection) - assert sdp_offer assert 'm=video' in sdp_offer assert 'VP8/90000' in sdp_offer + assert 'a=ssrc:42 cname:video' in sdp_offer assert 'm=audio' in sdp_offer assert 'opus/48000/2' in sdp_offer + assert 'a=ssrc:43 cname:audio' in sdp_offer datachannel_module.create_static_library().rtcDeletePeerConnection(peer_connection) @@ -52,12 +53,12 @@ def test_negotiate_sdp_answer() -> None: rtc.add_audio_track(receiver_connection, 'recvonly', 'opus', 111) sdp_answer = rtc.negotiate_sdp_answer(receiver_connection, sdp_offer) - assert sdp_answer assert 'm=video' in sdp_answer assert 'VP8/90000' in sdp_answer + assert 'a=ssrc:42 cname:video' in sdp_answer assert 'm=audio' in sdp_answer assert 'opus/48000/2' in sdp_answer - # TODO: review + assert 'a=ssrc:43 cname:audio' in sdp_answer assert 'a=recvonly' in sdp_answer assert datachannel_library.rtcDeletePeerConnection(sender_connection) == 0 @@ -147,9 +148,3 @@ def test_add_video_track() -> None: datachannel_module.create_static_library().rtcDeletePeerConnection(peer_connection) -def test_create_audio_description() -> None: - assert rtc.create_audio_description('sendonly', 'opus', 111) == b'm=audio 9 UDP/TLS/RTP/SAVPF 111\r\na=rtpmap:111 opus/48000/2\r\na=rtcp-fb:111 nack\r\na=rtcp-fb:111 nack pli\r\na=sendonly\r\na=mid:1\r\na=rtcp-mux\r\n' - - -def test_create_video_description() -> None: - assert rtc.create_video_description('recvonly', 'vp8', 96) == b'm=video 9 UDP/TLS/RTP/SAVPF 96\r\na=rtpmap:96 VP8/90000\r\na=rtcp-fb:96 nack\r\na=rtcp-fb:96 nack pli\r\na=recvonly\r\na=mid:0\r\na=rtcp-mux\r\n'