From e3d4e101c9b65a37821e0999e371c7955e48f928 Mon Sep 17 00:00:00 2001 From: henryruhs Date: Tue, 7 Apr 2026 14:52:57 +0200 Subject: [PATCH] video support --- facefusion/apis/endpoints/assets.py | 85 ++++- facefusion/apis/endpoints/nodes.py | 132 +++++++- facefusion/apis/static/index.html | 309 ++++++++++++++++-- .../processors/modules/face_debugger/core.py | 6 +- .../processors/modules/face_enhancer/core.py | 6 +- .../processors/modules/face_swapper/core.py | 6 +- 6 files changed, 505 insertions(+), 39 deletions(-) diff --git a/facefusion/apis/endpoints/assets.py b/facefusion/apis/endpoints/assets.py index 621b0e55..1346939f 100644 --- a/facefusion/apis/endpoints/assets.py +++ b/facefusion/apis/endpoints/assets.py @@ -1,15 +1,18 @@ import os +import uuid from typing import List from starlette.requests import Request from starlette.responses import FileResponse, JSONResponse, Response from starlette.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND, HTTP_415_UNSUPPORTED_MEDIA_TYPE -from facefusion import session_context, session_manager +from facefusion import session_context, session_manager, state_manager from facefusion.apis import asset_store from facefusion.apis.asset_helper import save_asset_files, validate_asset_files from facefusion.apis.endpoints.session import extract_access_token -from facefusion.filesystem import remove_file +from facefusion.filesystem import create_directory, remove_file +from facefusion.node import decode_vision_frame, encode_vision_frame +from facefusion.vision import read_video_frame async def upload_asset(request : Request) -> Response: @@ -133,3 +136,81 @@ async def delete_assets(request : Request) -> Response: return Response(status_code = HTTP_200_OK) return Response(status_code = HTTP_404_NOT_FOUND) + + +async def get_asset_frame(request : Request) -> Response: + access_token = extract_access_token(request.scope) + session_id = session_manager.find_session_id(access_token) + asset_id = request.path_params.get('asset_id') + frame_number = int(request.path_params.get('frame_number', 0)) + + if session_id and asset_id: + asset = asset_store.get_asset(session_id, asset_id) + + if asset: + asset_path = asset.get('path') + + if asset_path and os.path.exists(asset_path): + frame = read_video_frame(asset_path, frame_number) + + if frame is not None: + frame_b64 = encode_vision_frame(frame) + return JSONResponse({ 'frame' : frame_b64 }, status_code = HTTP_200_OK) + + return Response(status_code = HTTP_404_NOT_FOUND) + + +async def assemble_video(request : Request) -> Response: + import asyncio + + import cv2 + + access_token = extract_access_token(request.scope) + session_id = session_manager.find_session_id(access_token) + + if not session_id: + return Response(status_code = HTTP_400_BAD_REQUEST) + + session_context.set_session_id(session_id) + body = await request.json() + frames = body.get('frames', []) + fps = body.get('fps', 30) + + if not frames: + return Response(status_code = HTTP_400_BAD_REQUEST) + + temp_path = state_manager.get_temp_path() + create_directory(temp_path) + output_name = uuid.uuid4().hex + '.mp4' + output_path = os.path.join(temp_path, output_name) + + first_frame = decode_vision_frame(frames[0]) + height, width = first_frame.shape[:2] + fourcc = cv2.VideoWriter.fourcc(*'mp4v') + + def write_frames() -> bool: + writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) + + if not writer.isOpened(): + return False + + for frame_b64 in frames: + frame = decode_vision_frame(frame_b64) + + if frame is not None: + if frame.shape[:2] != (height, width): + frame = cv2.resize(frame, (width, height)) + writer.write(frame) + + writer.release() + return True + + success = await asyncio.to_thread(write_frames) + + if success and os.path.exists(output_path): + asset = asset_store.create_asset(session_id, 'target', output_path) + + if asset: + return JSONResponse({ 'asset_id' : asset.get('id') }, status_code = HTTP_201_CREATED) + + return Response(status_code = HTTP_400_BAD_REQUEST) diff --git a/facefusion/apis/endpoints/nodes.py b/facefusion/apis/endpoints/nodes.py index 64cf89e5..6483d154 100644 --- a/facefusion/apis/endpoints/nodes.py +++ b/facefusion/apis/endpoints/nodes.py @@ -1,10 +1,19 @@ +import os +import subprocess +import uuid + +import cv2 import numpy from starlette.requests import Request from starlette.responses import JSONResponse from starlette.status import HTTP_200_OK, HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND, HTTP_500_INTERNAL_SERVER_ERROR -from facefusion import state_manager +from facefusion import session_manager, state_manager +from facefusion.apis import asset_store +from facefusion.apis.session_helper import extract_access_token +from facefusion.filesystem import create_directory from facefusion.node import NODE_REGISTRY, NodeContext, decode_vision_frame, encode_vision_frame +from facefusion.vision import count_video_frame_total, detect_video_fps, read_video_frame NODES_LOADED = False @@ -18,6 +27,7 @@ def ensure_nodes_loaded() -> None: NODES_LOADED = True import facefusion.face_analyser + import facefusion.frame_picker processor_names =\ [ @@ -78,14 +88,26 @@ async def execute_node(request : Request) -> JSONResponse: # Decode inputs based on port types decoded_inputs = {} - input_port_types = { p.name: p.type for p in schema.inputs } + input_port_map = {} + + for port in schema.inputs: + if port.name not in input_port_map: + input_port_map[port.name] = [] + input_port_map[port.name].append(port.type) for field_name, value in raw_inputs.items(): - port_type = input_port_types.get(field_name, '') + port_types = input_port_map.get(field_name, []) - if port_type == 'image' and isinstance(value, str): + if isinstance(value, str) and 'video' in port_types and len(value) < 200: + access_token = extract_access_token(request.scope) + session_id = session_manager.find_session_id(access_token) + asset = asset_store.get_asset(session_id, value) + + if asset: + decoded_inputs[field_name] = asset.get('path') + elif isinstance(value, str) and 'image' in port_types: decoded_inputs[field_name] = decode_vision_frame(value) - elif port_type == 'image_list' and isinstance(value, list): + elif isinstance(value, list) and 'image_list' in port_types: decoded_inputs[field_name] = [ decode_vision_frame(v) for v in value ] else: decoded_inputs[field_name] = value @@ -98,18 +120,106 @@ async def execute_node(request : Request) -> JSONResponse: state_manager.set_item(key, value) try: + # Check if any input is a video path + video_input_name = None + video_path = None + + for field_name, value in decoded_inputs.items(): + if 'video' in input_port_map.get(field_name, []) and isinstance(value, str): + video_input_name = field_name + video_path = value + break + + output_port_map = {} + + for port in schema.outputs: + if port.name not in output_port_map: + output_port_map[port.name] = [] + output_port_map[port.name].append(port.type) + + has_video_output = any('video' in types for types in output_port_map.values()) + + # Video processing: loop all frames through the node + if video_path and has_video_output: + frame_total = count_video_frame_total(video_path) + fps = detect_video_fps(video_path) + + if not frame_total or not fps: + return JSONResponse({ 'message' : 'cannot read video' }, status_code = HTTP_400_BAD_REQUEST) + + first_frame = read_video_frame(video_path, 0) + height, width = first_frame.shape[:2] + temp_path = state_manager.get_temp_path() + create_directory(temp_path) + output_path = os.path.join(temp_path, uuid.uuid4().hex + '.mp4') + + ffmpeg_process = subprocess.Popen( + [ + 'ffmpeg', '-y', + '-f', 'rawvideo', '-pix_fmt', 'bgr24', + '-s', str(width) + 'x' + str(height), + '-r', str(fps), + '-i', 'pipe:0', + '-c:v', 'libx264', '-pix_fmt', 'yuv420p', + '-movflags', '+faststart', + '-preset', 'ultrafast', + output_path + ], stdin = subprocess.PIPE, stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) + + frame_result = {} + + for frame_number in range(frame_total): + frame = read_video_frame(video_path, frame_number) + + if frame is None: + continue + + frame_inputs = dict(decoded_inputs) + frame_inputs[video_input_name] = frame + + frame_result = registered.fn(frame_inputs) + + output_frame = frame + + for out_key, out_value in frame_result.items(): + if isinstance(out_value, numpy.ndarray): + output_frame = out_value + break + + if output_frame.shape[:2] != (height, width): + output_frame = cv2.resize(output_frame, (width, height)) + + ffmpeg_process.stdin.write(output_frame.tobytes()) + + ffmpeg_process.stdin.close() + ffmpeg_process.wait() + + access_token = extract_access_token(request.scope) + session_id = session_manager.find_session_id(access_token) + output_asset = asset_store.create_asset(session_id, 'target', output_path) + response = {} + + # Return image output from last frame for preview + for key, value in frame_result.items(): + if isinstance(value, numpy.ndarray): + response[key] = encode_vision_frame(value) + + # Return video asset ID + for port in schema.outputs: + if port.type == 'video' and output_asset: + response[port.name] = output_asset.get('id') + + return JSONResponse(response, status_code = HTTP_200_OK) + + # Single frame processing result = registered.fn(decoded_inputs) - # Encode outputs based on port types - output_port_types = { p.name: p.type for p in schema.outputs } response = {} for key, value in result.items(): - port_type = output_port_types.get(key, '') - - if port_type == 'image' and isinstance(value, numpy.ndarray): + if isinstance(value, numpy.ndarray): response[key] = encode_vision_frame(value) - elif port_type == 'image_list' and isinstance(value, list): + elif isinstance(value, list) and any(isinstance(v, numpy.ndarray) for v in value): response[key] = [ encode_vision_frame(v) for v in value if isinstance(v, numpy.ndarray) ] else: response[key] = value diff --git a/facefusion/apis/static/index.html b/facefusion/apis/static/index.html index 56628493..bd9796b8 100644 --- a/facefusion/apis/static/index.html +++ b/facefusion/apis/static/index.html @@ -36,6 +36,7 @@ .port { width: 14px; height: 14px; border-radius: 50%; border: 2px solid #0f0f14; cursor: crosshair; transition: transform 0.15s, box-shadow 0.15s; z-index: 10; } .port:hover { transform: scale(1.4); box-shadow: 0 0 10px currentColor; } .port-image { background: #38bdf8; color: #38bdf8; } + .port-video { background: #ef4444; color: #ef4444; } .port-json { background: #f59e0b; color: #f59e0b; } .port-image_list { background: #10b981; color: #10b981; } .connection-line { fill: none; stroke-width: 2.5; stroke-linecap: round; pointer-events: stroke; cursor: pointer; } @@ -178,6 +179,56 @@