add vp9 support

This commit is contained in:
henryruhs
2026-06-06 11:15:00 +02:00
parent 2e884941f8
commit 036c5c0225
6 changed files with 75 additions and 10 deletions
+3
View File
@@ -43,6 +43,9 @@ async def receive_vision_frames(websocket : WebSocket) -> AsyncIterator[VisionFr
def process_video(session_id : SessionId, sdp_offer : SdpOffer) -> Optional[SdpAnswer]:
video_codec : VideoCodec = 'vp8'
if rtc.get_payload_type(sdp_offer, 'vp9'):
video_codec = 'vp9'
if rtc.get_payload_type(sdp_offer, 'av1'):
video_codec = 'av1'
+25 -1
View File
@@ -12,7 +12,7 @@ import numpy
from facefusion import rtc, state_manager, streamer
from facefusion.apis.stream_event import create_receive_event
from facefusion.audio import create_empty_audio_frame
from facefusion.codecs import aom_decoder, aom_encoder, vpx_decoder, vpx_encoder
from facefusion.codecs import aom_decoder, aom_encoder, vp9_decoder, vp9_encoder, vpx_decoder, vpx_encoder
from facefusion.types import AomDecoder, AomEncoder, AomPointer, BitRate, Resolution, RtcPeer, RtcPeerVideo, VideoCodec, VideoPack, VisionFrame, VpxDecoder, VpxEncoder, VpxPointer
@@ -122,6 +122,12 @@ def decode_video_frame(video_codec : VideoCodec, video_decoder : VpxDecoder | Ao
if vpx_pointer:
return normalize_vision_frame(vpx_pointer)
if video_codec == 'vp9':
vpx_pointer = vp9_decoder.decode(video_decoder, input_buffer)
if vpx_pointer:
return normalize_vision_frame(vpx_pointer)
return None
@@ -132,6 +138,9 @@ def encode_video_frame(video_codec : VideoCodec, video_encoder : VpxEncoder | Ao
if video_codec == 'vp8':
return vpx_encoder.encode(video_encoder, input_buffer, frame_resolution, frame_index)
if video_codec == 'vp9':
return vp9_encoder.encode(video_encoder, input_buffer, frame_resolution, frame_index)
return bytes()
@@ -148,6 +157,9 @@ def create_video_decoder(video_codec : VideoCodec) -> Optional[VpxDecoder | AomD
if video_codec == 'vp8':
return vpx_decoder.create(8)
if video_codec == 'vp9':
return vp9_decoder.create(8)
return None
@@ -158,6 +170,9 @@ def create_video_encoder(video_codec : VideoCodec, frame_resolution : Resolution
if video_codec == 'vp8':
return vpx_encoder.create(frame_resolution, bitrate, 8, 10)
if video_codec == 'vp9':
return vp9_encoder.create(frame_resolution, bitrate, 8, 10)
return None
@@ -168,6 +183,9 @@ def destroy_video_decoder(video_codec : VideoCodec, video_decoder : VpxDecoder |
if video_codec == 'vp8':
vpx_decoder.destroy(video_decoder)
if video_codec == 'vp9':
vp9_decoder.destroy(video_decoder)
def destroy_video_encoder(video_codec : VideoCodec, video_encoder : VpxEncoder | AomEncoder) -> None:
if video_codec == 'av1':
@@ -176,6 +194,9 @@ def destroy_video_encoder(video_codec : VideoCodec, video_encoder : VpxEncoder |
if video_codec == 'vp8':
vpx_encoder.destroy(video_encoder)
if video_codec == 'vp9':
vp9_encoder.destroy(video_encoder)
def update_video_encoder_bitrate(video_codec : VideoCodec, video_encoder : VpxEncoder | AomEncoder, bitrate : BitRate) -> bool:
if video_codec == 'av1':
@@ -184,6 +205,9 @@ def update_video_encoder_bitrate(video_codec : VideoCodec, video_encoder : VpxEn
if video_codec == 'vp8':
return vpx_encoder.update_bitrate(video_encoder, bitrate)
if video_codec == 'vp9':
return vp9_encoder.update_bitrate(video_encoder, bitrate)
return False
+2
View File
@@ -186,6 +186,7 @@ def init_ctypes(library : ctypes.CDLL) -> ctypes.CDLL:
library.rtcSetAV1Packetizer.restype = ctypes.c_int
library.rtcSetVP8Packetizer.restype = ctypes.c_int
library.rtcSetVP9Packetizer.restype = ctypes.c_int
library.rtcChainRtcpSrReporter.argtypes = [ ctypes.c_int ]
library.rtcChainRtcpSrReporter.restype = ctypes.c_int
@@ -210,6 +211,7 @@ def init_ctypes(library : ctypes.CDLL) -> ctypes.CDLL:
library.rtcSetAV1Depacketizer.argtypes = [ ctypes.c_int, ctypes.c_int ]
library.rtcSetAV1Depacketizer.restype = ctypes.c_int
library.rtcSetVP8Depacketizer.restype = ctypes.c_int
library.rtcSetVP9Depacketizer.restype = ctypes.c_int
library.rtcSetOpusDepacketizer.restype = ctypes.c_int
library.rtcChainRtcpReceivingSession.argtypes = [ ctypes.c_int ]
+14
View File
@@ -139,6 +139,9 @@ def add_video_track(peer_connection : PeerConnection, media_direction : MediaDir
if video_codec == 'vp8':
datachannel_library.rtcSetVP8Packetizer(video_track, ctypes.byref(video_packetizer))
if video_codec == 'vp9':
datachannel_library.rtcSetVP9Packetizer(video_track, ctypes.byref(video_packetizer))
datachannel_library.rtcChainRtcpSrReporter(video_track)
datachannel_library.rtcChainRtcpNackResponder(video_track, 512)
@@ -154,6 +157,14 @@ def add_video_track(peer_connection : PeerConnection, media_direction : MediaDir
video_depacketizer.clockRate = 90000
datachannel_library.rtcSetVP8Depacketizer(video_track, ctypes.byref(video_depacketizer))
if video_codec == 'vp9':
video_depacketizer = datachannel_module.define_rtc_packetizer_init()
video_depacketizer.ssrc = 0
video_depacketizer.cname = b'video'
video_depacketizer.payloadType = payload_type
video_depacketizer.clockRate = 90000
datachannel_library.rtcSetVP9Depacketizer(video_track, ctypes.byref(video_depacketizer))
datachannel_library.rtcChainRtcpReceivingSession(video_track)
return video_track
@@ -211,6 +222,9 @@ def create_video_track_init(media_direction : MediaDirection, video_codec : Vide
if video_codec == 'vp8':
track_init.codec = 1
if video_codec == 'vp9':
track_init.codec = 2
return ctypes.byref(track_init)
+1 -1
View File
@@ -97,7 +97,7 @@ Orientation = Literal['landscape', 'portrait']
Resolution : TypeAlias = Tuple[int, int]
AudioCodec : TypeAlias = Literal['opus']
VideoCodec : TypeAlias = Literal['av1', 'vp8']
VideoCodec : TypeAlias = Literal['av1', 'vp8', 'vp9']
FrameHandler : TypeAlias = Callable[..., None]
+30 -8
View File
@@ -11,7 +11,7 @@ import pytest
from facefusion import rtc, rtc_store, state_manager
from facefusion.apis.stream_video import create_video_decoder, create_video_encoder, decode_video_frame, destroy_video_decoder, destroy_video_encoder, encode_video_frame, handle_video_frame, receive_video_frames, run_video_encode_loop, update_video_encoder_bitrate
from facefusion.codecs import aom_encoder, vpx_encoder
from facefusion.codecs import aom_encoder, vp9_encoder, vpx_encoder
from facefusion.common_helper import is_linux, is_macos, is_windows
from facefusion.download import conditional_download
from facefusion.hash_helper import create_hash
@@ -46,7 +46,7 @@ def set_ready_event(ready_event : threading.Event, track : int, close_callback :
ready_event.set()
@pytest.mark.parametrize('video_codec, payload_type', [ ('av1', 35), ('vp8', 96) ])
@pytest.mark.parametrize('video_codec, payload_type', [ ('av1', 35), ('vp8', 96), ('vp9', 98) ])
def test_run_video_encode_loop(video_codec : VideoCodec, payload_type : int) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
peer_connection = rtc.create_peer_connection()
@@ -88,8 +88,11 @@ def test_run_video_encode_loop(video_codec : VideoCodec, payload_type : int) ->
if video_codec == 'vp8':
pytest.skip()
if video_codec == 'vp9':
pytest.skip()
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_receive_video_frames(video_codec : VideoCodec) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
video_queue : Queue[VideoPack] = Queue(maxsize = 30)
@@ -122,7 +125,7 @@ def test_receive_video_frames(video_codec : VideoCodec) -> None:
assert create_hash(vision_frame.tobytes()) == '38d00e2a'
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes()
@@ -138,6 +141,9 @@ def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None:
if video_codec == 'vp8':
assert create_hash(decode_buffer) == '99ef2c25'
if video_codec == 'vp9':
assert create_hash(decode_buffer) == 'f2d3e3fb'
if is_macos():
if video_codec == 'av1':
assert create_hash(decode_buffer) == 'eafd1fab'
@@ -145,10 +151,13 @@ def test_encode_and_decode_video_frame(video_codec : VideoCodec) -> None:
if video_codec == 'vp8':
assert create_hash(decode_buffer) == 'ff3ecb43'
if video_codec == 'vp9':
assert create_hash(decode_buffer) == 'ff3ecb43'
assert decode_video_frame(video_codec, video_decoder, bytes()) is None
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes()
@@ -159,6 +168,9 @@ def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None:
if video_codec == 'vp8':
video_encoder = vpx_encoder.create((426, 226), 1000, 1, 0)
encode_buffer = vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 0)
if video_codec == 'vp9':
video_encoder = vp9_encoder.create((426, 226), 1000, 1, 0)
encode_buffer = vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 0)
video_decoder = create_video_decoder(video_codec)
@@ -169,7 +181,7 @@ def test_create_and_destroy_video_decoder(video_codec : VideoCodec) -> None:
assert decode_video_frame(video_codec, video_decoder, encode_buffer) is None
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
input_buffer = cv2.cvtColor(video_frame, cv2.COLOR_BGR2YUV_I420).tobytes()
@@ -179,6 +191,8 @@ def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None:
assert aom_encoder.encode(video_encoder, input_buffer, (426, 226), 0)
if video_codec == 'vp8':
assert vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 0)
if video_codec == 'vp9':
assert vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 0)
destroy_video_encoder(video_codec, video_encoder)
@@ -186,9 +200,11 @@ def test_create_and_destroy_video_encoder(video_codec : VideoCodec) -> None:
assert aom_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes()
if video_codec == 'vp8':
assert vpx_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes()
if video_codec == 'vp9':
assert vp9_encoder.encode(video_encoder, input_buffer, (426, 226), 1) == bytes()
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None:
video_encoder = create_video_encoder(video_codec, (426, 226), 4000)
@@ -198,6 +214,9 @@ def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None:
if video_codec == 'vp8':
assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 4000
if video_codec == 'vp9':
assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 4000
assert update_video_encoder_bitrate(video_codec, video_encoder, 6000)
if video_codec == 'av1':
@@ -206,10 +225,13 @@ def test_update_video_encoder_bitrate(video_codec : VideoCodec) -> None:
if video_codec == 'vp8':
assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 6000
if video_codec == 'vp9':
assert struct.unpack_from('I', video_encoder, 64 + 112)[0] == 6000
destroy_video_encoder(video_codec, video_encoder)
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8' ])
@pytest.mark.parametrize('video_codec', [ 'av1', 'vp8', 'vp9' ])
def test_handle_video_frame(video_codec : VideoCodec) -> None:
video_frame = read_video_frame(get_test_example_file('target-240p.mp4'))
video_decoder = create_video_decoder(video_codec)