From 47b48e0de5e77dc846b58c927682a17f2e495653 Mon Sep 17 00:00:00 2001 From: henryruhs Date: Wed, 25 Mar 2026 13:56:20 +0100 Subject: [PATCH] make it work under macos --- e2e_video_modes.py | 31 +++++++++++++++++++++++++++-- facefusion/apis/core.py | 6 +++--- facefusion/apis/endpoints/stream.py | 26 ++++++++++++++++++++++-- facefusion/mediamtx.py | 9 ++++++--- facefusion/rtc.py | 16 ++++++++------- test_stream.html | 10 +++++++--- 6 files changed, 78 insertions(+), 20 deletions(-) diff --git a/e2e_video_modes.py b/e2e_video_modes.py index 83aa8f7a..75428706 100644 --- a/e2e_video_modes.py +++ b/e2e_video_modes.py @@ -15,8 +15,13 @@ SOURCE_FILE : str = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.a def is_windows() -> bool: return platform.system().lower() == 'windows' +def is_macos() -> bool: + return platform.system().lower() == 'darwin' + if is_windows(): VIDEO_FILE : str = 'C:\\Users\\info\\Downloads\\face8k.mp4' +elif is_macos(): + VIDEO_FILE : str = '/Users/henry/Downloads/copy_face_instant.mp4' else: VIDEO_FILE : str = '/home/henry/Documents/examples/download.mp4' @@ -27,7 +32,7 @@ def safe_print(text : str) -> None: except UnicodeEncodeError: print(text.encode('ascii', errors='replace').decode('ascii')) -MODES =\ +_ALL_MODES =\ [ 'whip-mediamtx', 'whip-python', @@ -38,12 +43,14 @@ MODES =\ 'ws-mjpeg' ] +MODES = [ m for m in _ALL_MODES if not (is_macos() and m == 'whip-mediamtx') ] + def start_api() -> subprocess.Popen: env = os.environ.copy() python_cmd = 'python' if is_windows() else 'python3' - if not is_windows(): + if not is_windows() and not is_macos(): env['LD_LIBRARY_PATH'] = '/home/henry/local/lib:' + env.get('LD_LIBRARY_PATH', '') proc = subprocess.Popen( @@ -103,12 +110,32 @@ def kill_port_windows(port : int) -> None: subprocess.run([ 'taskkill', '/F', '/PID', pid ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) +def kill_port_macos(port : int) -> None: + pids = set() + + for proto in [ 'tcp', 'udp' ]: + result = subprocess.run( + [ 'lsof', '-ti', proto + ':' + str(port) ], + capture_output = True, text = True + ) + + for pid in result.stdout.split(): + if pid.isdigit(): + pids.add(pid) + + for pid in pids: + subprocess.run([ 'kill', '-9', pid ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) + + def kill_stale() -> None: ports = [ API_PORT, 8889, 8189, 9997, 8890, 8891, 8892 ] if is_windows(): for port in ports: kill_port_windows(port) + elif is_macos(): + for port in ports: + kill_port_macos(port) else: subprocess.run([ 'fuser', '-k', str(API_PORT) + '/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) subprocess.run([ 'fuser', '-k', '8889/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) diff --git a/facefusion/apis/core.py b/facefusion/apis/core.py index 5b5afe5c..c5dcaf9b 100644 --- a/facefusion/apis/core.py +++ b/facefusion/apis/core.py @@ -13,14 +13,14 @@ from facefusion.apis.endpoints.metrics import get_metrics, websocket_metrics from facefusion.apis.endpoints.ping import websocket_ping from facefusion.apis.endpoints.session import create_session, destroy_session, get_session, refresh_session from facefusion.apis.endpoints.state import get_state, set_state -from facefusion.common_helper import is_windows +from facefusion.common_helper import is_linux, is_windows from facefusion.apis.endpoints.stream import websocket_stream, websocket_stream_audio, websocket_stream_live, websocket_stream_mjpeg, websocket_stream_rtc, websocket_stream_rtc_relay, websocket_stream_whip, websocket_stream_whip_dc, websocket_stream_whip_py from facefusion.apis.middlewares.session import create_session_guard @asynccontextmanager async def lifespan(app : Starlette) -> AsyncGenerator[None, None]: - if not is_windows(): + if is_linux(): mediamtx.start() mediamtx.wait_for_ready() @@ -45,7 +45,7 @@ async def lifespan(app : Starlette) -> AsyncGenerator[None, None]: yield - if not is_windows(): + if is_linux(): mediamtx.stop() try: diff --git a/facefusion/apis/endpoints/stream.py b/facefusion/apis/endpoints/stream.py index 7007e53d..1ae0610e 100644 --- a/facefusion/apis/endpoints/stream.py +++ b/facefusion/apis/endpoints/stream.py @@ -12,6 +12,7 @@ from starlette.websockets import WebSocket from facefusion import logger, session_context, session_manager, state_manager from facefusion.common_helper import is_windows +from facefusion.apis.stream_helper import STREAM_AUDIO_RATE from facefusion.apis.api_helper import get_sec_websocket_protocol from facefusion.apis.session_helper import extract_access_token from facefusion import mediamtx @@ -158,7 +159,24 @@ async def websocket_stream_whip(websocket : WebSocket) -> None: await websocket.close() -def run_fmp4_pipeline(latest_frame_holder : list, lock : threading.Lock, stop_event : threading.Event, output_chunks : List[bytes], output_lock : threading.Lock, audio_write_fd_holder : list) -> None: +def run_audio_silence_feeder(audio_write_fd_holder : list, stop_event : threading.Event, audio_active_event : threading.Event) -> None: + frame_bytes = STREAM_AUDIO_RATE // 50 * 2 * 2 + silence = b'\x00' * frame_bytes + + while not stop_event.is_set(): + if not audio_active_event.is_set(): + fd = audio_write_fd_holder[0] + + if fd > 0: + try: + _os.write(fd, silence) + except OSError: + break + + time.sleep(0.02) + + +def run_fmp4_pipeline(latest_frame_holder : list, lock : threading.Lock, stop_event : threading.Event, output_chunks : List[bytes], output_lock : threading.Lock, audio_write_fd_holder : list, audio_active_event : threading.Event) -> None: encoder = None audio_write_fd = -1 reader_thread = None @@ -194,6 +212,8 @@ def run_fmp4_pipeline(latest_frame_holder : list, lock : threading.Lock, stop_ev audio_write_fd_holder[0] = audio_write_fd reader_thread = threading.Thread(target = read_fmp4_output, args = (encoder, output_chunks, output_lock), daemon = True) reader_thread.start() + silence_thread = threading.Thread(target = run_audio_silence_feeder, args = (audio_write_fd_holder, stop_event, audio_active_event), daemon = True) + silence_thread.start() logger.info('fmp4 encoder started ' + str(width) + 'x' + str(height), __name__) feed_whip_frame(encoder, temp_vision_frame) @@ -222,7 +242,8 @@ async def websocket_stream_live(websocket : WebSocket) -> None: lock = threading.Lock() output_lock = threading.Lock() stop_event = threading.Event() - worker = threading.Thread(target = run_fmp4_pipeline, args = (latest_frame_holder, lock, stop_event, output_chunks, output_lock, audio_write_fd_holder), daemon = True) + audio_active_event = threading.Event() + worker = threading.Thread(target = run_fmp4_pipeline, args = (latest_frame_holder, lock, stop_event, output_chunks, output_lock, audio_write_fd_holder, audio_active_event), daemon = True) worker.start() try: @@ -245,6 +266,7 @@ async def websocket_stream_live(websocket : WebSocket) -> None: latest_frame_holder[0] = frame if data[:2] != JPEG_MAGIC and audio_write_fd_holder[0] > 0: + audio_active_event.set() feed_whip_audio(audio_write_fd_holder[0], data) except Exception as exception: diff --git a/facefusion/mediamtx.py b/facefusion/mediamtx.py index b44bbc10..cabf4fcc 100644 --- a/facefusion/mediamtx.py +++ b/facefusion/mediamtx.py @@ -6,6 +6,8 @@ from typing import Optional import httpx +from facefusion.common_helper import is_linux + MEDIAMTX_WHIP_PORT : int = 8889 MEDIAMTX_API_PORT : int = 9997 @@ -56,9 +58,10 @@ def stop() -> None: def stop_stale() -> None: - subprocess.run([ 'fuser', '-k', str(MEDIAMTX_WHIP_PORT) + '/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) - subprocess.run([ 'fuser', '-k', '8189/udp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) - subprocess.run([ 'fuser', '-k', str(MEDIAMTX_API_PORT) + '/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) + if is_linux(): + subprocess.run([ 'fuser', '-k', str(MEDIAMTX_WHIP_PORT) + '/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) + subprocess.run([ 'fuser', '-k', '8189/udp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) + subprocess.run([ 'fuser', '-k', str(MEDIAMTX_API_PORT) + '/tcp' ], stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) time.sleep(1) diff --git a/facefusion/rtc.py b/facefusion/rtc.py index 2a1b1888..6ee0495f 100644 --- a/facefusion/rtc.py +++ b/facefusion/rtc.py @@ -7,7 +7,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer from typing import Dict, List, Optional, TypeAlias from facefusion import logger -from facefusion.common_helper import is_windows +from facefusion.common_helper import is_macos, is_windows RtcLib : TypeAlias = ctypes.CDLL WHEP_PORT : int = 8892 @@ -87,6 +87,8 @@ def find_library() -> Optional[str]: if is_windows(): return os.path.join(bin_dir, 'windows-x64-openssl-h264-vp8-av1-opus-datachannel-0.24.1.dll') + if is_macos(): + return os.path.join(bin_dir, 'macos-universal-openssl-h264-vp8-av1-opus-libdatachannel-0.24.1.dylib') return os.path.join(bin_dir, 'linux-x64-openssl-h264-vp8-av1-opus-libdatachannel-0.24.1.so') @@ -206,6 +208,8 @@ next_rtp_port : int = 16000 def create_session(stream_path : str) -> None: + global video_frame_count + video_frame_count = 0 sessions[stream_path] = {'viewers': [], 'tracks': [], 'rtp_port': 0, 'rtp_fd': None} @@ -288,6 +292,7 @@ def send_audio_to_viewers(stream_path : str, opus_data : bytes) -> None: send_start_time : float = 0 +video_frame_count : int = 0 audio_pts : int = 0 opus_enc = None audio_buffer : bytearray = bytearray() @@ -296,7 +301,7 @@ OPUS_FRAME_SAMPLES : int = 960 def send_to_viewers(stream_path : str, data : bytes) -> None: - global send_start_time + global video_frame_count session = sessions.get(stream_path) @@ -308,11 +313,8 @@ def send_to_viewers(stream_path : str, data : bytes) -> None: if not viewers: return - if send_start_time == 0: - send_start_time = _time.monotonic() - - elapsed = _time.monotonic() - send_start_time - timestamp = int(elapsed * 90000) & 0xFFFFFFFF + timestamp = int(video_frame_count * 3000) & 0xFFFFFFFF + video_frame_count += 1 buf = ctypes.create_string_buffer(data) data_len = len(data) diff --git a/test_stream.html b/test_stream.html index bc193d16..ad441f43 100644 --- a/test_stream.html +++ b/test_stream.html @@ -260,7 +260,7 @@
- +
@@ -415,7 +415,7 @@ function startStats() { prevBytes = bytes; prevFrames = frames; prevStatsTime = now; - setStat('statFrames', report.framesReceived || 0); + setStat('statFrames', report.framesDecoded || 0); if (report.frameWidth && report.frameHeight) { setStat('statResolution', report.frameWidth + 'x' + report.frameHeight); @@ -822,7 +822,11 @@ async function connectWhep() { pc.ontrack = function(event) { log('remote track: ' + event.track.kind + ' (' + Math.round(performance.now() - t0) + 'ms)', 'ok'); - document.getElementById('outputVideo').srcObject = event.streams[0]; + var video = document.getElementById('outputVideo'); + video.srcObject = event.streams[0]; + video.play().then(function() { + video.muted = false; + }).catch(function(e) { log('play error: ' + e.message, 'error'); }); }; pc.addTransceiver('video', { direction: 'recvonly' });