diff --git a/.gitignore b/.gitignore index 8ee9a7e..52d5fa1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ __pycache__ +.assets +.datasets .idea +.inputs +.exports +.outputs .vscode diff --git a/arcface_converter/README.md b/arcface_converter/README.md index dbc3e1a..49b33d5 100644 --- a/arcface_converter/README.md +++ b/arcface_converter/README.md @@ -20,44 +20,58 @@ pip install -r requirements.txt ``` -Example -------- +Setup +----- -This example utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap. +This `config.ini` utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap. ``` [preparing.dataset] -dataset_path = datasets/megaface/train.rec +dataset_path = .datasets/megaface/train.rec crop_size = 112 process_limit = 650000 +``` +``` [preparing.model] -source_path = models/arcface_w600k_r50.onnx -target_path = models/arcface_simswap.onnx +source_path = .models/arcface_w600k_r50.onnx +target_path = .models/arcface_simswap.onnx +``` +``` [preparing.input] -directory_path = inputs -source_path = inputs/arcface_w600k_r50.npy -target_path = inputs/arcface_simswap.npy +directory_path = .inputs +source_path = .inputs/arcface_w600k_r50.npy +target_path = .inputs/arcface_simswap.npy +``` +``` [training.loader] split_ratio = 0.8 batch_size = 51200 num_workers = 8 +``` +``` [training.trainer] max_epochs = 4096 +``` +``` [training.output] -directory_path = outputs +directory_path = .outputs file_pattern = arcface_converter_simswap_{epoch:02d}_{val_loss:.4f} +``` +``` [exporting] -directory_path = exports -source_path = outputs/last.ckpt -target_path = exports/arcface_converter_simswap.onnx +directory_path = .exports +source_path = .outputs/last.ckpt +target_path = .exports/arcface_converter_simswap.onnx opset_version = 15 +``` +``` [execution] providers = CUDAExecutionProvider ``` diff --git a/arcface_converter/src/exporting.py b/arcface_converter/src/exporting.py index c5d9693..de551c8 100644 --- a/arcface_converter/src/exporting.py +++ b/arcface_converter/src/exporting.py @@ -16,7 +16,7 @@ def export() -> None: opset_version = CONFIG.getint('exporting', 'opset_version') makedirs(directory_path, exist_ok = True) - model = ArcFaceConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu') - model.eval() + arcface_converter_trainer = ArcFaceConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu') + arcface_converter_trainer.eval() input_tensor = torch.randn(1, 512) - torch.onnx.export(model, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version) + torch.onnx.export(arcface_converter_trainer, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version) diff --git a/arcface_converter/src/model.py b/arcface_converter/src/networks/arcface_converter.py similarity index 100% rename from arcface_converter/src/model.py rename to arcface_converter/src/networks/arcface_converter.py diff --git a/arcface_converter/src/preparing.py b/arcface_converter/src/preparing.py index 60151d0..6722f74 100644 --- a/arcface_converter/src/preparing.py +++ b/arcface_converter/src/preparing.py @@ -9,14 +9,14 @@ from mxnet.io import ImageRecordIter from onnxruntime import InferenceSession from tqdm import tqdm -from .typing import Embedding, EmbeddingPairs, VisionFrame +from types import Embedding, EmbeddingPairs, VisionFrame CONFIG = configparser.ConfigParser() CONFIG.read('config.ini') def prepare_crop_vision_frame(crop_vision_frame : VisionFrame) -> VisionFrame: - crop_vision_frame = crop_vision_frame.astype(numpy.float32) / 255 + crop_vision_frame = crop_vision_frame.astype(numpy.float32) / 255.0 crop_vision_frame = (crop_vision_frame - 0.5) * 2 return crop_vision_frame diff --git a/arcface_converter/src/training.py b/arcface_converter/src/training.py index 149bf43..2f5cb74 100644 --- a/arcface_converter/src/training.py +++ b/arcface_converter/src/training.py @@ -10,8 +10,8 @@ from pytorch_lightning.tuner.tuning import Tuner from torch import Tensor from torch.utils.data import DataLoader, Dataset, TensorDataset, random_split -from .model import ArcFaceConverter -from .typing import Batch, Loader +from networks.arcface_converter import ArcFaceConverter +from types import Batch, Loader CONFIG = configparser.ConfigParser() CONFIG.read('config.ini') @@ -20,12 +20,12 @@ CONFIG.read('config.ini') class ArcFaceConverterTrainer(pytorch_lightning.LightningModule): def __init__(self) -> None: super(ArcFaceConverterTrainer, self).__init__() - self.model = ArcFaceConverter() + self.arcface_converter = ArcFaceConverter() self.loss_fn = torch.nn.MSELoss() self.lr = 0.001 def forward(self, source_embedding : Tensor) -> Tensor: - return self.model(source_embedding) + return self.arcface_converter(source_embedding) def training_step(self, batch : Batch, batch_index : int) -> Tensor: source_embedding, target_embedding = batch @@ -110,7 +110,7 @@ def create_trainer() -> Trainer: def train() -> None: trainer = create_trainer() training_loader, validation_loader = create_loaders() - model = ArcFaceConverterTrainer() + arcface_converter = ArcFaceConverterTrainer() tuner = Tuner(trainer) - tuner.lr_find(model, training_loader, validation_loader) - trainer.fit(model, training_loader, validation_loader) + tuner.lr_find(arcface_converter, training_loader, validation_loader) + trainer.fit(arcface_converter, training_loader, validation_loader) diff --git a/arcface_converter/src/typing.py b/arcface_converter/src/types.py similarity index 100% rename from arcface_converter/src/typing.py rename to arcface_converter/src/types.py diff --git a/face_swapper/LivePortrait b/face_swapper/LivePortrait index 632da74..1b22029 160000 --- a/face_swapper/LivePortrait +++ b/face_swapper/LivePortrait @@ -1 +1 @@ -Subproject commit 632da7486d2c3fb86663fc44190a09aca4e1a8de +Subproject commit 1b22029ec07a827b48db2a82e2ced36393556c4a diff --git a/face_swapper/README.md b/face_swapper/README.md index 541ca3b..3125040 100644 --- a/face_swapper/README.md +++ b/face_swapper/README.md @@ -1,15 +1,9 @@ Face Swapper -================= +============ -> Swap one face over another face. +> Face shape and feature aware identity transfer. -![License](https://img.shields.io/badge/license-MIT-green) - - -Preview -------- - -![Preview]() +![License](https://img.shields.io/badge/license-ResearchRAIL--M-red) Installation @@ -23,74 +17,92 @@ pip install -r requirements.txt Example ------- -This example utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap. +This example utilizes the MegaFace dataset to train the Face Swapper model. ``` [preparing.dataset] -dataset_path = datasets/train +dataset_path = .datasets/train folder_pattern = {}/* image_pattern = {}/*.*g same_person_probability = 0.2 +``` +``` [training.loader] batch_size = 24 num_workers = 12 +``` +``` [training.model] -id_embedder_path = assets/models/id_embedder.pt -landmarker_path = assets/models/landmarker.pt -motion_extractor_path = assets/models/motion_extractor.pt +id_embedder_path = .models/id_embedder.pt +landmarker_path = .models/landmarker.pt +motion_extractor_path = .models/motion_extractor.pt +``` +``` [training.model.generator] num_blocks = 2 id_channels = 512 +``` +``` [training.model.discriminator] input_channels = 3 num_filters = 64 num_layers = 5 num_discriminators = 3 kernel_size = 4 +``` +``` [training.losses] weight_adversarial = 1 weight_id = 20 weight_attribute = 10 weight_reconstruction = 10 weight_pose = 100 +``` +``` [training.trainer] max_epochs = 50 learning_rate = 0.0004 precision = 16-mixed automatic_optimization = false +``` +``` [training.output] -checkpoint_path = outputs/last.ckpt -directory_path = outputs +directory_path = .outputs +file_path = .outputs/last.ckpt file_pattern = 'checkpoint-{epoch}-{step}-{l_G:.4f}-{l_D:.4f}' preview_frequency = 250 validation_frequency = 1000 +``` +``` [exporting] -directory_path = export -source_path = outputs/last.ckpt -target_path = export/face_swapper.onnx +directory_path = .exports +source_path = .outputs/last.ckpt +target_path = .exports/face_swapper.onnx opset_version = 15 +``` -[inference] -generator_path = outputs/last.ckpt -id_embedder_path = assets/models/id_embedder.pt -source_path = assets/images/source.jpg -target_path = assets/models/target.jpg -output_path = outputs/output.jpg +``` +[inferencing] +generator_path = .outputs/last.ckpt +id_embedder_path = .models/id_embedder.pt +source_path = .assets/source.jpg +target_path = .assets/target.jpg +output_path = .outputs/output.jpg ``` Training -------- -Train the Face swapper model. +Train the Face Swapper model. ``` python train.py @@ -105,3 +117,13 @@ Export the model to ONNX. ``` python export.py ``` + + +Inferencing +----------- + +Inference the model. + +``` +python infer.py +``` diff --git a/face_swapper/config.ini b/face_swapper/config.ini index 66dcdc1..6211385 100644 --- a/face_swapper/config.ini +++ b/face_swapper/config.ini @@ -38,8 +38,8 @@ precision = automatic_optimization = [training.output] -checkpoint_path = directory_path = +file_path = file_pattern = preview_frequency = validation_frequency = @@ -50,7 +50,7 @@ source_path = target_path = opset_version = -[inference] +[inferencing] generator_path = id_embedder_path = source_path = diff --git a/face_swapper/src/data_loader.py b/face_swapper/src/data_loader.py index 31df9ae..d648bdd 100644 --- a/face_swapper/src/data_loader.py +++ b/face_swapper/src/data_loader.py @@ -7,8 +7,8 @@ import torch import torchvision.transforms as transforms from torch.utils.data import TensorDataset -from .helper import read_image -from .typing import Batch, ImagePathList, ImagePathSet +from helper import read_image +from types import Batch, ImagePathList, ImagePathSet class DataLoaderVGG(TensorDataset): @@ -33,9 +33,9 @@ class DataLoaderVGG(TensorDataset): transform = transforms.Compose( [ transforms.ToPILImage(), - transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BICUBIC), - transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), - transforms.RandomAffine(4, translate=(0.01, 0.01), scale=(0.98, 1.02), shear=(1, 1), fill=0), + transforms.Resize((256, 256), interpolation = transforms.InterpolationMode.BICUBIC), + transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1), + transforms.RandomAffine(4, translate = (0.01, 0.01), scale = (0.98, 1.02), shear = (1, 1)), transforms.ToTensor(), transforms.Lambda(lambda temp_tensor : temp_tensor[[2, 1, 0], :, :]), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) diff --git a/face_swapper/src/discriminator.py b/face_swapper/src/discriminator.py index 9e15f37..47c8967 100644 --- a/face_swapper/src/discriminator.py +++ b/face_swapper/src/discriminator.py @@ -6,7 +6,7 @@ import torch.nn import torch.nn as nn from torch import Tensor -from .typing import DiscriminatorOutputs +from types import DiscriminatorOutputs class NLayerDiscriminator(nn.Module): diff --git a/face_swapper/src/generator.py b/face_swapper/src/generator.py index d48b5e1..db312ab 100644 --- a/face_swapper/src/generator.py +++ b/face_swapper/src/generator.py @@ -4,7 +4,7 @@ import torch import torch.nn as nn from torch import Tensor -from .typing import SourceEmbedding, TargetAttributes, VisionTensor +from types import SourceEmbedding, TargetAttributes, VisionTensor class AdaptiveEmbeddingIntegrationNetwork(nn.Module): diff --git a/face_swapper/src/helper.py b/face_swapper/src/helper.py index ce77c42..b8a93c6 100644 --- a/face_swapper/src/helper.py +++ b/face_swapper/src/helper.py @@ -4,7 +4,7 @@ import cv2 import numpy import torch -from .typing import IdEmbedder, IdEmbedding, Padding, Tensor, VisionFrame, VisionTensor +from types import IdEmbedder, IdEmbedding, Padding, Tensor, VisionFrame, VisionTensor def is_windows() -> bool: @@ -20,7 +20,7 @@ def read_image(image_path : str) -> VisionFrame: def convert_to_vision_tensor(vision_frame : VisionFrame) -> VisionTensor: vision_tensor = torch.from_numpy(vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32)) - vision_tensor = vision_tensor / 255 + vision_tensor = vision_tensor / 255.0 vision_tensor = (vision_tensor - 0.5) * 2 vision_tensor = vision_tensor.unsqueeze(0) return vision_tensor diff --git a/face_swapper/src/inferencing.py b/face_swapper/src/inferencing.py index 56ae4bc..8284f37 100644 --- a/face_swapper/src/inferencing.py +++ b/face_swapper/src/inferencing.py @@ -3,9 +3,9 @@ import configparser import cv2 import torch -from .generator import AdaptiveEmbeddingIntegrationNetwork -from .helper import calc_id_embedding, convert_to_vision_frame, convert_to_vision_tensor, read_image -from .typing import Generator, IdEmbedder, VisionFrame +from generator import AdaptiveEmbeddingIntegrationNetwork +from helper import calc_id_embedding, convert_to_vision_frame, convert_to_vision_tensor, read_image +from types import Generator, IdEmbedder, VisionFrame CONFIG = configparser.ConfigParser() CONFIG.read('config.ini') @@ -21,17 +21,17 @@ def run_swap(generator : Generator, id_embedder : IdEmbedder, source_vision_fram def infer() -> None: - generator_path = CONFIG.get('inference', 'generator_path') - id_embedder_path = CONFIG.get('inference', 'id_embedder_path') - source_path = CONFIG.get('inference', 'source_path') - target_path = CONFIG.get('inference', 'target_path') - output_path = CONFIG.get('inference', 'output_path') + generator_path = CONFIG.get('inferencing', 'generator_path') + id_embedder_path = CONFIG.get('inferencing', 'id_embedder_path') + source_path = CONFIG.get('inferencing', 'source_path') + target_path = CONFIG.get('inferencing', 'target_path') + output_path = CONFIG.get('inferencing', 'output_path') - state_dict = torch.load(generator_path, map_location='cpu').get('state_dict').get('generator') + state_dict = torch.load(generator_path, map_location = 'cpu').get('state_dict').get('generator') generator = AdaptiveEmbeddingIntegrationNetwork(512, 2) generator.load_state_dict(state_dict) generator.eval() - id_embedder = torch.jit.load(id_embedder_path, map_location='cpu') # type:ignore[no-untyped-call] + id_embedder = torch.jit.load(id_embedder_path, map_location = 'cpu') # type:ignore[no-untyped-call] id_embedder.eval() source_vision_frame = read_image(source_path) diff --git a/arcface_converter/src/__init__.py b/face_swapper/src/losses/.gitkeep similarity index 100% rename from arcface_converter/src/__init__.py rename to face_swapper/src/losses/.gitkeep diff --git a/face_swapper/src/networks/.gitkeep b/face_swapper/src/networks/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/face_swapper/src/training.py b/face_swapper/src/training.py index 33534aa..1c218f7 100644 --- a/face_swapper/src/training.py +++ b/face_swapper/src/training.py @@ -12,11 +12,11 @@ from pytorch_msssim import ssim from torch import Tensor from torch.utils.data import DataLoader -from .data_loader import DataLoaderVGG -from .discriminator import MultiscaleDiscriminator -from .generator import AdaptiveEmbeddingIntegrationNetwork -from .helper import calc_id_embedding, hinge_fake_loss, hinge_real_loss -from .typing import Batch, DiscriminatorLossSet, DiscriminatorOutputs, FaceLandmark203, GeneratorLossSet, LossTensor, SourceEmbedding, SwapAttributes, TargetAttributes, VisionTensor +from data_loader import DataLoaderVGG +from discriminator import MultiscaleDiscriminator +from generator import AdaptiveEmbeddingIntegrationNetwork +from helper import calc_id_embedding, hinge_fake_loss, hinge_real_loss +from types import Batch, DiscriminatorLossSet, DiscriminatorOutputs, FaceLandmark203, GeneratorLossSet, LossTensor, SourceEmbedding, SwapAttributes, TargetAttributes, VisionTensor CONFIG = configparser.ConfigParser() CONFIG.read('config.ini') @@ -237,16 +237,16 @@ def create_trainer() -> Trainer: def train() -> None: - batch_size = CONFIG.getint('training.loader', 'batch_size') - num_workers = CONFIG.getint('training.loader', 'num_workers') - checkpoint_path = CONFIG.get('training.output', 'checkpoint_path') dataset_path = CONFIG.get('preparing.dataset', 'dataset_path') dataset_image_pattern = CONFIG.get('preparing.dataset', 'image_pattern') dataset_directory_pattern = CONFIG.get('preparing.dataset', 'directory_pattern') same_person_probability = CONFIG.getfloat('preparing.dataset', 'same_person_probability') + batch_size = CONFIG.getint('training.loader', 'batch_size') + num_workers = CONFIG.getint('training.loader', 'num_workers') + file_path = CONFIG.get('training.output', 'file_path') dataset = DataLoaderVGG(dataset_path, dataset_image_pattern, dataset_directory_pattern, same_person_probability) data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True, num_workers = num_workers, drop_last = True, pin_memory = True, persistent_workers = True) face_swap_model = FaceSwapperTrain() trainer = create_trainer() - trainer.fit(face_swap_model, data_loader, ckpt_path = checkpoint_path) + trainer.fit(face_swap_model, data_loader, ckpt_path = file_path) diff --git a/face_swapper/src/typing.py b/face_swapper/src/types.py similarity index 100% rename from face_swapper/src/typing.py rename to face_swapper/src/types.py