diff --git a/facefusion.ini b/facefusion.ini index 4923cc28..8cc9d8e0 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -58,6 +58,7 @@ output_image_scale = output_audio_encoder = output_audio_quality = output_audio_volume = +output_audio_fps = output_video_encoder = output_video_preset = output_video_quality = diff --git a/facefusion/args_helper.py b/facefusion/args_helper.py index ff0a3988..a93176a1 100644 --- a/facefusion/args_helper.py +++ b/facefusion/args_helper.py @@ -52,6 +52,7 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('output_audio_encoder', args.get('output_audio_encoder')) apply_state_item('output_audio_quality', args.get('output_audio_quality')) apply_state_item('output_audio_volume', args.get('output_audio_volume')) + apply_state_item('output_audio_fps', normalize_fps(args.get('output_audio_fps'))) apply_state_item('output_video_encoder', args.get('output_video_encoder')) apply_state_item('output_video_preset', args.get('output_video_preset')) apply_state_item('output_video_quality', args.get('output_video_quality')) diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index b05e8d43..e310b7a1 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -235,8 +235,7 @@ def replace_audio(audio_path : str, output_path : str) -> bool: return run_ffmpeg(commands).returncode == 0 -def merge_video(target_path : str, output_path : str, temp_video_fps : Fps, output_video_resolution : Resolution, trim_frame_start : int, trim_frame_end : int) -> bool: - output_video_fps = state_manager.get_item('output_video_fps') +def merge_video(target_path : str, output_path : str, temp_video_fps : Fps, output_video_fps : Fps, output_video_resolution : Resolution, trim_frame_start : int, trim_frame_end : int) -> bool: output_video_encoder = state_manager.get_item('output_video_encoder') output_video_quality = state_manager.get_item('output_video_quality') output_video_preset = state_manager.get_item('output_video_preset') diff --git a/facefusion/locales.py b/facefusion/locales.py index c67a17b9..f025f65c 100644 --- a/facefusion/locales.py +++ b/facefusion/locales.py @@ -145,6 +145,7 @@ LOCALES : Locales =\ 'output_audio_encoder': 'specify the encoder used for the audio', 'output_audio_quality': 'specify the audio quality which translates to the audio compression', 'output_audio_volume': 'specify the audio volume based on the target video', + 'output_audio_fps': 'specify the fps used when converting audio to video frames', 'output_video_encoder': 'specify the encoder used for the video', 'output_video_preset': 'balance fast video processing and video file size', 'output_video_quality': 'specify the video quality which translates to the video compression', diff --git a/facefusion/program.py b/facefusion/program.py index 4ecd1cdb..54340083 100755 --- a/facefusion/program.py +++ b/facefusion/program.py @@ -700,6 +700,17 @@ def create_output_creation_program() -> ArgumentParser: ], scopes = [ 'api', 'cli' ] ) + capability_store.register_capability_set( + [ + group_output_creation.add_argument( + '--output-audio-fps', + help = translator.get('help.output_audio_fps'), + type = float, + default = config.get_float_value('output_creation', 'output_audio_fps', '25.0') + ) + ], + scopes = [ 'api', 'cli' ] + ) return program diff --git a/facefusion/types.py b/facefusion/types.py index a4b6c3e3..21215d5e 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -435,6 +435,7 @@ StateKey = Literal\ 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', + 'output_audio_fps', 'output_video_encoder', 'output_video_preset', 'output_video_quality', @@ -510,6 +511,7 @@ State = TypedDict('State', 'output_video_quality' : int, 'output_video_scale' : Scale, 'output_video_fps' : float, + 'output_audio_fps' : float, 'processors' : List[str], 'execution_device_ids' : List[int], 'execution_providers' : List[ExecutionProvider], diff --git a/facefusion/workflows/as_frames.py b/facefusion/workflows/as_frames.py index 88c39099..505aeb84 100644 --- a/facefusion/workflows/as_frames.py +++ b/facefusion/workflows/as_frames.py @@ -12,13 +12,13 @@ from facefusion.workflows.core import is_process_stopping def create_temp_frames() -> ErrorCode: - state_manager.set_item('output_video_fps', 25.0) # TODO: set default fps value + output_audio_fps = state_manager.get_item('output_audio_fps') source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) output_image_resolution = scale_resolution(detect_image_resolution(state_manager.get_item('target_path')), state_manager.get_item('output_image_scale')) temp_image_resolution = restrict_image_resolution(state_manager.get_item('target_path'), output_image_resolution) - trim_frame_start, trim_frame_end = restrict_trim_audio_frame(source_audio_path, state_manager.get_item('output_video_fps'), state_manager.get_item('trim_frame_start'), state_manager.get_item('trim_frame_end')) + trim_frame_start, trim_frame_end = restrict_trim_audio_frame(source_audio_path, output_audio_fps, state_manager.get_item('trim_frame_start'), state_manager.get_item('trim_frame_end')) - if ffmpeg.spawn_frames(state_manager.get_item('target_path'), state_manager.get_item('output_path'), temp_image_resolution, state_manager.get_item('output_video_fps'), trim_frame_start, trim_frame_end): + if ffmpeg.spawn_frames(state_manager.get_item('target_path'), state_manager.get_item('output_path'), temp_image_resolution, output_audio_fps, trim_frame_start, trim_frame_end): logger.debug(translator.get('spawning_frames_succeeded'), __name__) else: if is_process_stopping(): diff --git a/facefusion/workflows/core.py b/facefusion/workflows/core.py index 1ba61053..dde4460d 100644 --- a/facefusion/workflows/core.py +++ b/facefusion/workflows/core.py @@ -41,11 +41,12 @@ def analyse_image() -> ErrorCode: def conditional_get_source_audio_frame(frame_number : int) -> AudioFrame: if state_manager.get_item('workflow') in [ 'audio-to-image:frames', 'audio-to-image:video', 'image-to-video' ]: source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) - output_video_fps = state_manager.get_item('output_video_fps') + fps = state_manager.get_item('output_audio_fps') if state_manager.get_item('workflow') == 'image-to-video': - output_video_fps = restrict_video_fps(state_manager.get_item('target_path'), output_video_fps) - source_audio_frame = get_audio_frame(source_audio_path, output_video_fps, frame_number) + fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) + + source_audio_frame = get_audio_frame(source_audio_path, fps, frame_number) if numpy.any(source_audio_frame): return source_audio_frame @@ -56,11 +57,12 @@ def conditional_get_source_audio_frame(frame_number : int) -> AudioFrame: def conditional_get_source_voice_frame(frame_number: int) -> AudioFrame: if state_manager.get_item('workflow') in [ 'audio-to-image:frames', 'audio-to-image:video', 'image-to-video' ]: source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) - output_video_fps = state_manager.get_item('output_video_fps') + temp_fps = state_manager.get_item('output_audio_fps') if state_manager.get_item('workflow') == 'image-to-video': - output_video_fps = restrict_video_fps(state_manager.get_item('target_path'), output_video_fps) - source_voice_frame = get_voice_frame(source_audio_path, output_video_fps, frame_number) + temp_fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) + + source_voice_frame = get_voice_frame(source_audio_path, temp_fps, frame_number) if numpy.any(source_voice_frame): return source_voice_frame diff --git a/facefusion/workflows/to_video.py b/facefusion/workflows/to_video.py index 43366dd8..ddae07a2 100644 --- a/facefusion/workflows/to_video.py +++ b/facefusion/workflows/to_video.py @@ -39,9 +39,10 @@ def merge_frames() -> ErrorCode: trim_frame_start, trim_frame_end = restrict_trim_frame(len(temp_frame_paths), state_manager.get_item('trim_frame_start'), state_manager.get_item('trim_frame_end')) output_video_resolution = conditional_scale_resolution() temp_video_fps = conditional_restrict_video_fps() + output_fps = conditional_get_output_fps() - logger.info(translator.get('merging_video').format(resolution = pack_resolution(output_video_resolution), fps = state_manager.get_item('output_video_fps')), __name__) - if ffmpeg.merge_video(state_manager.get_item('target_path'), state_manager.get_item('output_path'), temp_video_fps, output_video_resolution, trim_frame_start, trim_frame_end): + logger.info(translator.get('merging_video').format(resolution = pack_resolution(output_video_resolution), fps = output_fps), __name__) + if ffmpeg.merge_video(state_manager.get_item('target_path'), state_manager.get_item('output_path'), temp_video_fps, output_fps, output_video_resolution, trim_frame_start, trim_frame_end): logger.debug(translator.get('merging_video_succeeded'), __name__) else: if is_process_stopping(): @@ -100,6 +101,12 @@ def conditional_clear_video_pool() -> None: def conditional_restrict_video_fps() -> Fps: if state_manager.get_item('workflow') == 'image-to-video': return restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) + return conditional_get_output_fps() + + +def conditional_get_output_fps() -> Fps: + if state_manager.get_item('workflow') in [ 'audio-to-image:frames', 'audio-to-image:video' ]: + return state_manager.get_item('output_audio_fps') return state_manager.get_item('output_video_fps') diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 192d96fb..dc48da54 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -132,7 +132,7 @@ def test_merge_video() -> None: create_temp_directory(state_manager.get_temp_path(), output_path) extract_frames(target_path, output_path, (452, 240), 25.0, 0, 1) - assert merge_video(target_path, output_path, 25.0, (452, 240), 0, 1) is True + assert merge_video(target_path, output_path, 25.0, 25.0, (452, 240), 0, 1) is True clear_temp_directory(state_manager.get_temp_path(), output_path)