Mixed bag of cleanups

This commit is contained in:
henryruhs
2025-02-05 19:00:09 +01:00
parent 5892460c3d
commit 9c15f584aa
19 changed files with 123 additions and 82 deletions
+5
View File
@@ -1,3 +1,8 @@
__pycache__
.assets
.datasets
.idea
.inputs
.exports
.outputs
.vscode
+27 -13
View File
@@ -20,44 +20,58 @@ pip install -r requirements.txt
```
Example
-------
Setup
-----
This example utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap.
This `config.ini` utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap.
```
[preparing.dataset]
dataset_path = datasets/megaface/train.rec
dataset_path = .datasets/megaface/train.rec
crop_size = 112
process_limit = 650000
```
```
[preparing.model]
source_path = models/arcface_w600k_r50.onnx
target_path = models/arcface_simswap.onnx
source_path = .models/arcface_w600k_r50.onnx
target_path = .models/arcface_simswap.onnx
```
```
[preparing.input]
directory_path = inputs
source_path = inputs/arcface_w600k_r50.npy
target_path = inputs/arcface_simswap.npy
directory_path = .inputs
source_path = .inputs/arcface_w600k_r50.npy
target_path = .inputs/arcface_simswap.npy
```
```
[training.loader]
split_ratio = 0.8
batch_size = 51200
num_workers = 8
```
```
[training.trainer]
max_epochs = 4096
```
```
[training.output]
directory_path = outputs
directory_path = .outputs
file_pattern = arcface_converter_simswap_{epoch:02d}_{val_loss:.4f}
```
```
[exporting]
directory_path = exports
source_path = outputs/last.ckpt
target_path = exports/arcface_converter_simswap.onnx
directory_path = .exports
source_path = .outputs/last.ckpt
target_path = .exports/arcface_converter_simswap.onnx
opset_version = 15
```
```
[execution]
providers = CUDAExecutionProvider
```
+3 -3
View File
@@ -16,7 +16,7 @@ def export() -> None:
opset_version = CONFIG.getint('exporting', 'opset_version')
makedirs(directory_path, exist_ok = True)
model = ArcFaceConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu')
model.eval()
arcface_converter_trainer = ArcFaceConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu')
arcface_converter_trainer.eval()
input_tensor = torch.randn(1, 512)
torch.onnx.export(model, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version)
torch.onnx.export(arcface_converter_trainer, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version)
+2 -2
View File
@@ -9,14 +9,14 @@ from mxnet.io import ImageRecordIter
from onnxruntime import InferenceSession
from tqdm import tqdm
from .typing import Embedding, EmbeddingPairs, VisionFrame
from types import Embedding, EmbeddingPairs, VisionFrame
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
def prepare_crop_vision_frame(crop_vision_frame : VisionFrame) -> VisionFrame:
crop_vision_frame = crop_vision_frame.astype(numpy.float32) / 255
crop_vision_frame = crop_vision_frame.astype(numpy.float32) / 255.0
crop_vision_frame = (crop_vision_frame - 0.5) * 2
return crop_vision_frame
+7 -7
View File
@@ -10,8 +10,8 @@ from pytorch_lightning.tuner.tuning import Tuner
from torch import Tensor
from torch.utils.data import DataLoader, Dataset, TensorDataset, random_split
from .model import ArcFaceConverter
from .typing import Batch, Loader
from networks.arcface_converter import ArcFaceConverter
from types import Batch, Loader
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
@@ -20,12 +20,12 @@ CONFIG.read('config.ini')
class ArcFaceConverterTrainer(pytorch_lightning.LightningModule):
def __init__(self) -> None:
super(ArcFaceConverterTrainer, self).__init__()
self.model = ArcFaceConverter()
self.arcface_converter = ArcFaceConverter()
self.loss_fn = torch.nn.MSELoss()
self.lr = 0.001
def forward(self, source_embedding : Tensor) -> Tensor:
return self.model(source_embedding)
return self.arcface_converter(source_embedding)
def training_step(self, batch : Batch, batch_index : int) -> Tensor:
source_embedding, target_embedding = batch
@@ -110,7 +110,7 @@ def create_trainer() -> Trainer:
def train() -> None:
trainer = create_trainer()
training_loader, validation_loader = create_loaders()
model = ArcFaceConverterTrainer()
arcface_converter = ArcFaceConverterTrainer()
tuner = Tuner(trainer)
tuner.lr_find(model, training_loader, validation_loader)
trainer.fit(model, training_loader, validation_loader)
tuner.lr_find(arcface_converter, training_loader, validation_loader)
trainer.fit(arcface_converter, training_loader, validation_loader)
Submodule face_swapper/LivePortrait updated: 632da7486d...1b22029ec0
+48 -26
View File
@@ -1,15 +1,9 @@
Face Swapper
=================
============
> Swap one face over another face.
> Face shape and feature aware identity transfer.
![License](https://img.shields.io/badge/license-MIT-green)
Preview
-------
![Preview]()
![License](https://img.shields.io/badge/license-ResearchRAIL--M-red)
Installation
@@ -23,74 +17,92 @@ pip install -r requirements.txt
Example
-------
This example utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap.
This example utilizes the MegaFace dataset to train the Face Swapper model.
```
[preparing.dataset]
dataset_path = datasets/train
dataset_path = .datasets/train
folder_pattern = {}/*
image_pattern = {}/*.*g
same_person_probability = 0.2
```
```
[training.loader]
batch_size = 24
num_workers = 12
```
```
[training.model]
id_embedder_path = assets/models/id_embedder.pt
landmarker_path = assets/models/landmarker.pt
motion_extractor_path = assets/models/motion_extractor.pt
id_embedder_path = .models/id_embedder.pt
landmarker_path = .models/landmarker.pt
motion_extractor_path = .models/motion_extractor.pt
```
```
[training.model.generator]
num_blocks = 2
id_channels = 512
```
```
[training.model.discriminator]
input_channels = 3
num_filters = 64
num_layers = 5
num_discriminators = 3
kernel_size = 4
```
```
[training.losses]
weight_adversarial = 1
weight_id = 20
weight_attribute = 10
weight_reconstruction = 10
weight_pose = 100
```
```
[training.trainer]
max_epochs = 50
learning_rate = 0.0004
precision = 16-mixed
automatic_optimization = false
```
```
[training.output]
checkpoint_path = outputs/last.ckpt
directory_path = outputs
directory_path = .outputs
file_path = .outputs/last.ckpt
file_pattern = 'checkpoint-{epoch}-{step}-{l_G:.4f}-{l_D:.4f}'
preview_frequency = 250
validation_frequency = 1000
```
```
[exporting]
directory_path = export
source_path = outputs/last.ckpt
target_path = export/face_swapper.onnx
directory_path = .exports
source_path = .outputs/last.ckpt
target_path = .exports/face_swapper.onnx
opset_version = 15
```
[inference]
generator_path = outputs/last.ckpt
id_embedder_path = assets/models/id_embedder.pt
source_path = assets/images/source.jpg
target_path = assets/models/target.jpg
output_path = outputs/output.jpg
```
[inferencing]
generator_path = .outputs/last.ckpt
id_embedder_path = .models/id_embedder.pt
source_path = .assets/source.jpg
target_path = .assets/target.jpg
output_path = .outputs/output.jpg
```
Training
--------
Train the Face swapper model.
Train the Face Swapper model.
```
python train.py
@@ -105,3 +117,13 @@ Export the model to ONNX.
```
python export.py
```
Inferencing
-----------
Inference the model.
```
python infer.py
```
+2 -2
View File
@@ -38,8 +38,8 @@ precision =
automatic_optimization =
[training.output]
checkpoint_path =
directory_path =
file_path =
file_pattern =
preview_frequency =
validation_frequency =
@@ -50,7 +50,7 @@ source_path =
target_path =
opset_version =
[inference]
[inferencing]
generator_path =
id_embedder_path =
source_path =
+5 -5
View File
@@ -7,8 +7,8 @@ import torch
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
from .helper import read_image
from .typing import Batch, ImagePathList, ImagePathSet
from helper import read_image
from types import Batch, ImagePathList, ImagePathSet
class DataLoaderVGG(TensorDataset):
@@ -33,9 +33,9 @@ class DataLoaderVGG(TensorDataset):
transform = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BICUBIC),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.RandomAffine(4, translate=(0.01, 0.01), scale=(0.98, 1.02), shear=(1, 1), fill=0),
transforms.Resize((256, 256), interpolation = transforms.InterpolationMode.BICUBIC),
transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1),
transforms.RandomAffine(4, translate = (0.01, 0.01), scale = (0.98, 1.02), shear = (1, 1)),
transforms.ToTensor(),
transforms.Lambda(lambda temp_tensor : temp_tensor[[2, 1, 0], :, :]),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+1 -1
View File
@@ -6,7 +6,7 @@ import torch.nn
import torch.nn as nn
from torch import Tensor
from .typing import DiscriminatorOutputs
from types import DiscriminatorOutputs
class NLayerDiscriminator(nn.Module):
+1 -1
View File
@@ -4,7 +4,7 @@ import torch
import torch.nn as nn
from torch import Tensor
from .typing import SourceEmbedding, TargetAttributes, VisionTensor
from types import SourceEmbedding, TargetAttributes, VisionTensor
class AdaptiveEmbeddingIntegrationNetwork(nn.Module):
+2 -2
View File
@@ -4,7 +4,7 @@ import cv2
import numpy
import torch
from .typing import IdEmbedder, IdEmbedding, Padding, Tensor, VisionFrame, VisionTensor
from types import IdEmbedder, IdEmbedding, Padding, Tensor, VisionFrame, VisionTensor
def is_windows() -> bool:
@@ -20,7 +20,7 @@ def read_image(image_path : str) -> VisionFrame:
def convert_to_vision_tensor(vision_frame : VisionFrame) -> VisionTensor:
vision_tensor = torch.from_numpy(vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32))
vision_tensor = vision_tensor / 255
vision_tensor = vision_tensor / 255.0
vision_tensor = (vision_tensor - 0.5) * 2
vision_tensor = vision_tensor.unsqueeze(0)
return vision_tensor
+10 -10
View File
@@ -3,9 +3,9 @@ import configparser
import cv2
import torch
from .generator import AdaptiveEmbeddingIntegrationNetwork
from .helper import calc_id_embedding, convert_to_vision_frame, convert_to_vision_tensor, read_image
from .typing import Generator, IdEmbedder, VisionFrame
from generator import AdaptiveEmbeddingIntegrationNetwork
from helper import calc_id_embedding, convert_to_vision_frame, convert_to_vision_tensor, read_image
from types import Generator, IdEmbedder, VisionFrame
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
@@ -21,17 +21,17 @@ def run_swap(generator : Generator, id_embedder : IdEmbedder, source_vision_fram
def infer() -> None:
generator_path = CONFIG.get('inference', 'generator_path')
id_embedder_path = CONFIG.get('inference', 'id_embedder_path')
source_path = CONFIG.get('inference', 'source_path')
target_path = CONFIG.get('inference', 'target_path')
output_path = CONFIG.get('inference', 'output_path')
generator_path = CONFIG.get('inferencing', 'generator_path')
id_embedder_path = CONFIG.get('inferencing', 'id_embedder_path')
source_path = CONFIG.get('inferencing', 'source_path')
target_path = CONFIG.get('inferencing', 'target_path')
output_path = CONFIG.get('inferencing', 'output_path')
state_dict = torch.load(generator_path, map_location='cpu').get('state_dict').get('generator')
state_dict = torch.load(generator_path, map_location = 'cpu').get('state_dict').get('generator')
generator = AdaptiveEmbeddingIntegrationNetwork(512, 2)
generator.load_state_dict(state_dict)
generator.eval()
id_embedder = torch.jit.load(id_embedder_path, map_location='cpu') # type:ignore[no-untyped-call]
id_embedder = torch.jit.load(id_embedder_path, map_location = 'cpu') # type:ignore[no-untyped-call]
id_embedder.eval()
source_vision_frame = read_image(source_path)
View File
+9 -9
View File
@@ -12,11 +12,11 @@ from pytorch_msssim import ssim
from torch import Tensor
from torch.utils.data import DataLoader
from .data_loader import DataLoaderVGG
from .discriminator import MultiscaleDiscriminator
from .generator import AdaptiveEmbeddingIntegrationNetwork
from .helper import calc_id_embedding, hinge_fake_loss, hinge_real_loss
from .typing import Batch, DiscriminatorLossSet, DiscriminatorOutputs, FaceLandmark203, GeneratorLossSet, LossTensor, SourceEmbedding, SwapAttributes, TargetAttributes, VisionTensor
from data_loader import DataLoaderVGG
from discriminator import MultiscaleDiscriminator
from generator import AdaptiveEmbeddingIntegrationNetwork
from helper import calc_id_embedding, hinge_fake_loss, hinge_real_loss
from types import Batch, DiscriminatorLossSet, DiscriminatorOutputs, FaceLandmark203, GeneratorLossSet, LossTensor, SourceEmbedding, SwapAttributes, TargetAttributes, VisionTensor
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
@@ -237,16 +237,16 @@ def create_trainer() -> Trainer:
def train() -> None:
batch_size = CONFIG.getint('training.loader', 'batch_size')
num_workers = CONFIG.getint('training.loader', 'num_workers')
checkpoint_path = CONFIG.get('training.output', 'checkpoint_path')
dataset_path = CONFIG.get('preparing.dataset', 'dataset_path')
dataset_image_pattern = CONFIG.get('preparing.dataset', 'image_pattern')
dataset_directory_pattern = CONFIG.get('preparing.dataset', 'directory_pattern')
same_person_probability = CONFIG.getfloat('preparing.dataset', 'same_person_probability')
batch_size = CONFIG.getint('training.loader', 'batch_size')
num_workers = CONFIG.getint('training.loader', 'num_workers')
file_path = CONFIG.get('training.output', 'file_path')
dataset = DataLoaderVGG(dataset_path, dataset_image_pattern, dataset_directory_pattern, same_person_probability)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True, num_workers = num_workers, drop_last = True, pin_memory = True, persistent_workers = True)
face_swap_model = FaceSwapperTrain()
trainer = create_trainer()
trainer.fit(face_swap_model, data_loader, ckpt_path = checkpoint_path)
trainer.fit(face_swap_model, data_loader, ckpt_path = file_path)