From 036c5c022598ff31672eb1a159ddee0aafd7ac1a Mon Sep 17 00:00:00 2001 From: henryruhs Date: Sat, 6 Jun 2026 11:15:00 +0200 Subject: [PATCH] add vp9 support --- facefusion/apis/stream_manager.py | 3 +++ facefusion/apis/stream_video.py | 26 +++++++++++++++++++- facefusion/libraries/datachannel.py | 2 ++ facefusion/rtc.py | 14 +++++++++++ facefusion/types.py | 2 +- tests/test_api_stream_video.py | 38 +++++++++++++++++++++++------ 6 files changed, 75 insertions(+), 10 deletions(-) diff --git a/facefusion/apis/stream_manager.py b/facefusion/apis/stream_manager.py index 460f5305..d4eb7b31 100644 --- a/facefusion/apis/stream_manager.py +++ b/facefusion/apis/stream_manager.py @@ -43,6 +43,9 @@ async def receive_vision_frames(websocket : WebSocket) -> AsyncIterator[VisionFr def process_video(session_id : SessionId, sdp_offer : SdpOffer) -> Optional[SdpAnswer]: video_codec : VideoCodec = 'vp8' + if rtc.get_payload_type(sdp_offer, 'vp9'): + video_codec = 'vp9' + if rtc.get_payload_type(sdp_offer, 'av1'): video_codec = 'av1' diff --git a/facefusion/apis/stream_video.py b/facefusion/apis/stream_video.py index 52977a9d..dafb72e0 100644 --- a/facefusion/apis/stream_video.py +++ b/facefusion/apis/stream_video.py @@ -12,7 +12,7 @@ import numpy from facefusion import rtc, state_manager, streamer from facefusion.apis.stream_event import create_receive_event from facefusion.audio import create_empty_audio_frame -from facefusion.codecs import aom_decoder, aom_encoder, vpx_decoder, vpx_encoder +from facefusion.codecs import aom_decoder, aom_encoder, vp9_decoder, vp9_encoder, vpx_decoder, vpx_encoder from facefusion.types import AomDecoder, AomEncoder, AomPointer, BitRate, Resolution, RtcPeer, RtcPeerVideo, VideoCodec, VideoPack, VisionFrame, VpxDecoder, VpxEncoder, VpxPointer @@ -122,6 +122,12 @@ def decode_video_frame(video_codec : VideoCodec, video_decoder : VpxDecoder | Ao if vpx_pointer: return normalize_vision_frame(vpx_pointer) + if video_codec == 'vp9': + vpx_pointer = vp9_decoder.decode(video_decoder, input_buffer) + + if vpx_pointer: + return normalize_vision_frame(vpx_pointer) + return None @@ -132,6 +138,9 @@ def encode_video_frame(video_codec : VideoCodec, video_encoder : VpxEncoder | Ao if video_codec == 'vp8': return vpx_encoder.encode(video_encoder, input_buffer, frame_resolution, frame_index) + if video_codec == 'vp9': + return vp9_encoder.encode(video_encoder, input_buffer, frame_resolution, frame_index) + return bytes() @@ -148,6 +157,9 @@ def create_video_decoder(video_codec : VideoCodec) -> Optional[VpxDecoder | AomD if video_codec == 'vp8': return vpx_decoder.create(8) + if video_codec == 'vp9': + return vp9_decoder.create(8) + return None @@ -158,6 +170,9 @@ def create_video_encoder(video_codec : VideoCodec, frame_resolution : Resolution if video_codec == 'vp8': return vpx_encoder.create(frame_resolution, bitrate, 8, 10) + if video_codec == 'vp9': + return vp9_encoder.create(frame_resolution, bitrate, 8, 10) + return None @@ -168,6 +183,9 @@ def destroy_video_decoder(video_codec : VideoCodec, video_decoder : VpxDecoder | if video_codec == 'vp8': vpx_decoder.destroy(video_decoder) + if video_codec == 'vp9': + vp9_decoder.destroy(video_decoder) + def destroy_video_encoder(video_codec : VideoCodec, video_encoder : VpxEncoder | AomEncoder) -> None: if video_codec == 'av1': @@ -176,6 +194,9 @@ def destroy_video_encoder(video_codec : VideoCodec, video_encoder : VpxEncoder | if video_codec == 'vp8': vpx_encoder.destroy(video_encoder) + if video_codec == 'vp9': + vp9_encoder.destroy(video_encoder) + def update_video_encoder_bitrate(video_codec : VideoCodec, video_encoder : VpxEncoder | AomEncoder, bitrate : BitRate) -> bool: if video_codec == 'av1': @@ -184,6 +205,9 @@ def update_video_encoder_bitrate(video_codec : VideoCodec, video_encoder : VpxEn if video_codec == 'vp8': return vpx_encoder.update_bitrate(video_encoder, bitrate) + if video_codec == 'vp9': + return vp9_encoder.update_bitrate(video_encoder, bitrate) + return False diff --git a/facefusion/libraries/datachannel.py b/facefusion/libraries/datachannel.py index 743fd827..15b6dc39 100644 --- a/facefusion/libraries/datachannel.py +++ b/facefusion/libraries/datachannel.py @@ -186,6 +186,7 @@ def init_ctypes(library : ctypes.CDLL) -> ctypes.CDLL: library.rtcSetAV1Packetizer.restype = ctypes.c_int library.rtcSetVP8Packetizer.restype = ctypes.c_int + library.rtcSetVP9Packetizer.restype = ctypes.c_int library.rtcChainRtcpSrReporter.argtypes = [ ctypes.c_int ] library.rtcChainRtcpSrReporter.restype = ctypes.c_int @@ -210,6 +211,7 @@ def init_ctypes(library : ctypes.CDLL) -> ctypes.CDLL: library.rtcSetAV1Depacketizer.argtypes = [ ctypes.c_int, ctypes.c_int ] library.rtcSetAV1Depacketizer.restype = ctypes.c_int library.rtcSetVP8Depacketizer.restype = ctypes.c_int + library.rtcSetVP9Depacketizer.restype = ctypes.c_int library.rtcSetOpusDepacketizer.restype = ctypes.c_int library.rtcChainRtcpReceivingSession.argtypes = [ ctypes.c_int ] diff --git a/facefusion/rtc.py b/facefusion/rtc.py index 12cfc025..0097c46e 100644 --- a/facefusion/rtc.py +++ b/facefusion/rtc.py @@ -139,6 +139,9 @@ def add_video_track(peer_connection : PeerConnection, media_direction : MediaDir if video_codec == 'vp8': datachannel_library.rtcSetVP8Packetizer(video_track, ctypes.byref(video_packetizer)) + if video_codec == 'vp9': + datachannel_library.rtcSetVP9Packetizer(video_track, ctypes.byref(video_packetizer)) + datachannel_library.rtcChainRtcpSrReporter(video_track) datachannel_library.rtcChainRtcpNackResponder(video_track, 512) @@ -154,6 +157,14 @@ def add_video_track(peer_connection : PeerConnection, media_direction : MediaDir video_depacketizer.clockRate = 90000 datachannel_library.rtcSetVP8Depacketizer(video_track, ctypes.byref(video_depacketizer)) + if video_codec == 'vp9': + video_depacketizer = datachannel_module.define_rtc_packetizer_init() + video_depacketizer.ssrc = 0 + video_depacketizer.cname = b'video' + video_depacketizer.payloadType = payload_type + video_depacketizer.clockRate = 90000 + datachannel_library.rtcSetVP9Depacketizer(video_track, ctypes.byref(video_depacketizer)) + datachannel_library.rtcChainRtcpReceivingSession(video_track) return video_track @@ -211,6 +222,9 @@ def create_video_track_init(media_direction : MediaDirection, video_codec : Vide if video_codec == 'vp8': track_init.codec = 1 + if video_codec == 'vp9': + track_init.codec = 2 + return ctypes.byref(track_init) diff --git a/facefusion/types.py b/facefusion/types.py index afd4bf7e..d6a70e5d 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -97,7 +97,7 @@ Orientation = Literal['landscape', 'portrait'] Resolution : TypeAlias = Tuple[int, int] AudioCodec : TypeAlias = Literal['opus'] -VideoCodec : TypeAlias = Literal['av1', 'vp8'] +VideoCodec : TypeAlias = Literal['av1', 'vp8', 'vp9'] FrameHandler : TypeAlias = Callable[..., None] diff --git a/tests/test_api_stream_video.py b/tests/test_api_stream_video.py index 21413d66..486ba673 100644 --- a/tests/test_api_stream_video.py +++ b/tests/test_api_stream_video.py @@ -11,7 +11,7 @@ import pytest from facefusion import rtc, rtc_store, state_manager from facefusion.apis.stream_video import create_video_decoder, create_video_encoder, decode_video_frame, destroy_video_decoder, destroy_video_encoder, encode_video_frame, handle_video_frame, receive_video_frames, run_video_encode_loop, update_video_encoder_bitrate -from facefusion.codecs import aom_encoder, vpx_encoder +from facefusion.codecs import aom_encoder, vp9_encoder, vpx_encoder from facefusion.common_helper import is_linux, is_macos, is_windows from facefusion.download import conditional_download from facefusion.hash_helper import create_hash @@ -46,7 +46,7 @@ def set_ready_event(ready_event : threading.Event, track : int, close_callback : ready_event.set() -@pytest.mark.parametrize('video_codec, payload_type', [ ('av1', 35), ('vp8', 96) ]) +@pytest.mark.parametrize('video_codec, payload_type', [ ('av1', 35), ('vp8', 96), ('vp9', 98) ]) def test_run_video_encode_loop(video_codec : VideoCodec, payload_type : int) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) peer_connection = rtc.create_peer_connection() @@ -88,8 +88,11 @@ def test_run_video_encode_loop(video_codec : VideoCodec, payload_type : int) -> if video_codec == 'vp8': pytest.skip() + if video_codec == 'vp9': + pytest.skip() -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) + +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_receive_video_frames(video_codec : VideoCodec) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) video_queue : Queue[VideoPack] = Queue(maxsize = 30) @@ -122,7 +125,7 @@ def test_receive_video_frames(video_codec : VideoCodec) -> None: assert create_hash(vision_frame.tobytes()) == '38d00e2a' -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes() @@ -138,6 +141,9 @@ def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None: if video_codec == 'vp8': assert create_hash(decode_buffer) == '99ef2c25' + if video_codec == 'vp9': + assert create_hash(decode_buffer) == 'f2d3e3fb' + if is_macos(): if video_codec == 'av1': assert create_hash(decode_buffer) == 'eafd1fab' @@ -145,10 +151,13 @@ def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None: if video_codec == 'vp8': assert create_hash(decode_buffer) == 'ff3ecb43' + if video_codec == 'vp9': + assert create_hash(decode_buffer) == 'ff3ecb43' + assert decode_video_frame(video_codec, video_decoder, bytes()) is None -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes() @@ -159,6 +168,9 @@ def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None: if video_codec == 'vp8': video_encoder = vpx_encoder.create((426, 226), 1000, 1, 0) encode_buffer = vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 0) + if video_codec == 'vp9': + video_encoder = vp9_encoder.create((426, 226), 1000, 1, 0) + encode_buffer = vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 0) video_decoder = create_video_decoder(video_codec) @@ -169,7 +181,7 @@ def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None: assert decode_video_frame(video_codec, video_decoder, encode_buffer) is None -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes() @@ -179,6 +191,8 @@ def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None: assert aom_encoder.encode(video_encoder, input_buffer, (426, 226), 0) if video_codec == 'vp8': assert vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 0) + if video_codec == 'vp9': + assert vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 0) destroy_video_encoder(video_codec, video_encoder) @@ -186,9 +200,11 @@ def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None: assert aom_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes() if video_codec == 'vp8': assert vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes() + if video_codec == 'vp9': + assert vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes() -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None: video_encoder = create_video_encoder(video_codec, (426, 226), 4000) @@ -198,6 +214,9 @@ def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None: if video_codec == 'vp8': assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 4000 + if video_codec == 'vp9': + assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 4000 + assert update_video_encoder_bitrate(video_codec, video_encoder, 6000) if video_codec == 'av1': @@ -206,10 +225,13 @@ def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None: if video_codec == 'vp8': assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 6000 + if video_codec == 'vp9': + assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 6000 + destroy_video_encoder(video_codec, video_encoder) -@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ]) +@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ]) def test_handle_video_frame(video_codec : VideoCodec) -> None: video_frame = read_video_frame(get_test_example_file('target-240p.mp4')) video_decoder = create_video_decoder(video_codec)