Rename ArcFace Converter to Embedding Converter, Add EmbeddingDataset, Add learning rate to config

This commit is contained in:
henryruhs
2025-02-12 12:19:14 +01:00
parent 62a69cddd2
commit 1b6e7a6ca5
23 changed files with 70 additions and 66 deletions

Before

Width:  |  Height:  |  Size: 1.3 MiB

After

Width:  |  Height:  |  Size: 1.3 MiB

Before

Width:  |  Height:  |  Size: 5.2 MiB

After

Width:  |  Height:  |  Size: 5.2 MiB

-3
View File
@@ -1,3 +0,0 @@
MIT license
Copyright (c) 2024 Henry Ruhs
+3
View File
@@ -0,0 +1,3 @@
MIT license
Copyright (c) 2025 Henry Ruhs
@@ -1,7 +1,7 @@
ArcFace Converter
=================
Embedding Converter
===================
> Convert face embeddings between various ArcFace models.
> Convert face embeddings between various models.
![License](https://img.shields.io/badge/license-MIT-green)
@@ -9,7 +9,7 @@ ArcFace Converter
Preview
-------
![Preview](https://raw.githubusercontent.com/facefusion/facefusion-labs/master/.github/preview_arcface_converter.png?sanitize=true)
![Preview](https://raw.githubusercontent.com/facefusion/facefusion-labs/next/.github/previews/embedding_converter.png?sanitize=true)
Installation
@@ -23,7 +23,7 @@ pip install -r requirements.txt
Setup
-----
This `config.ini` utilizes the MegaFace dataset to train an ArcFace Converter for SimSwap.
This `config.ini` utilizes the MegaFace dataset to train the Embedding Converter for SimSwap.
```
[preparing.dataset]
@@ -54,6 +54,7 @@ num_workers = 8
```
[training.trainer]
learning_rate = 0.001
max_epochs = 4096
```
@@ -80,7 +81,7 @@ providers = CUDAExecutionProvider
Preparing
---------
Prepare the face embedding pairs.
Prepare the embedding pairs.
```
python prepare.py
@@ -90,7 +91,7 @@ python prepare.py
Training
--------
Train the ArcFace converter model.
Train the Embedding Converter model.
```
python train.py
@@ -18,6 +18,7 @@ batch_size =
num_workers =
[training.trainer]
learning_rate =
max_epochs =
[training.output]
@@ -3,7 +3,7 @@ from os import makedirs
import torch
from .training import ArcFaceConverterTrainer
from .training import EmbeddingConverterTrainer
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
@@ -16,7 +16,7 @@ def export() -> None:
opset_version = CONFIG.getint('exporting', 'opset_version')
makedirs(directory_path, exist_ok = True)
arcface_converter_trainer = ArcFaceConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu')
arcface_converter_trainer.eval()
embedding_converter_trainer = EmbeddingConverterTrainer.load_from_checkpoint(source_path, map_location = 'cpu')
embedding_converter_trainer.eval()
input_tensor = torch.randn(1, 512)
torch.onnx.export(arcface_converter_trainer, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version)
torch.onnx.export(embedding_converter_trainer, input_tensor, target_path, input_names = [ 'input' ], output_names = [ 'output' ], opset_version = opset_version)
@@ -3,9 +3,9 @@ import torch.nn as nn
from torch import Tensor
class ArcFaceConverter(nn.Module):
class EmbeddingConverter(nn.Module):
def __init__(self) -> None:
super(ArcFaceConverter, self).__init__()
super(EmbeddingConverter, self).__init__()
self.fc1 = nn.Linear(512, 1024)
self.fc2 = nn.Linear(1024, 2048)
self.fc3 = nn.Linear(2048, 1024)
@@ -9,7 +9,7 @@ from mxnet.io import ImageRecordIter
from onnxruntime import InferenceSession
from tqdm import tqdm
from .types import Embedding, VisionFrame
from .types import Embedding, EmbeddingDataset, VisionFrame
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
@@ -35,9 +35,9 @@ def forward(inference_session : InferenceSession, crop_vision_frame : VisionFram
return embedding
def process_embeddings(dataset_reader : ImageRecordIter, source_inference_session : InferenceSession, target_inference_session : InferenceSession) -> Embedding:
def create_embedding_dataset(dataset_reader : ImageRecordIter, source_inference_session : InferenceSession, target_inference_session : InferenceSession) -> EmbeddingDataset:
dataset_process_limit = CONFIG.getint('preparing.dataset', 'process_limit')
embeddings = []
embedding_pairs = []
with tqdm(total = dataset_process_limit) as progress:
for batch in dataset_reader:
@@ -45,13 +45,13 @@ def process_embeddings(dataset_reader : ImageRecordIter, source_inference_sessio
crop_vision_frame = prepare_crop_vision_frame(crop_vision_frame)
source_embedding = forward(source_inference_session, crop_vision_frame)
target_embedding = forward(target_inference_session, crop_vision_frame)
embeddings.append([ source_embedding, target_embedding ])
embedding_pairs.append([ source_embedding, target_embedding ])
progress.update()
if progress.n == dataset_process_limit:
return numpy.concatenate(embeddings, axis = 1).T
return numpy.concatenate(embedding_pairs, axis = 1).T
return numpy.concatenate(embeddings, axis = 1).T
return numpy.concatenate(embedding_pairs, axis = 1).T
def prepare() -> None:
@@ -74,6 +74,6 @@ def prepare() -> None:
)
source_inference_session = create_inference_session(model_source_path, execution_providers)
target_inference_session = create_inference_session(model_target_path, execution_providers)
embedding_pairs = process_embeddings(dataset_reader, source_inference_session, target_inference_session)
numpy.save(input_source_path, embedding_pairs[..., 0].T)
numpy.save(input_target_path, embedding_pairs[..., 1].T)
embedding_dataset = create_embedding_dataset(dataset_reader, source_inference_session, target_inference_session)
numpy.save(input_source_path, embedding_dataset[..., 0].T)
numpy.save(input_target_path, embedding_dataset[..., 1].T)
@@ -10,39 +10,39 @@ from pytorch_lightning.tuner.tuning import Tuner
from torch import Tensor
from torch.utils.data import DataLoader, Dataset, TensorDataset, random_split
from .models.arcface_converter import ArcFaceConverter
from .models.embedding_converter import EmbeddingConverter
from .types import Batch, Loader
CONFIG = configparser.ConfigParser()
CONFIG.read('config.ini')
class ArcFaceConverterTrainer(pytorch_lightning.LightningModule):
class EmbeddingConverterTrainer(pytorch_lightning.LightningModule):
def __init__(self) -> None:
super(ArcFaceConverterTrainer, self).__init__()
self.arcface_converter = ArcFaceConverter()
self.loss_fn = torch.nn.MSELoss()
self.lr = 0.001
super(EmbeddingConverterTrainer, self).__init__()
self.embedding_converter = EmbeddingConverter()
self.mse_loss = torch.nn.MSELoss()
def forward(self, source_embedding : Tensor) -> Tensor:
return self.arcface_converter(source_embedding)
return self.embedding_converter(source_embedding)
def training_step(self, batch : Batch, batch_index : int) -> Tensor:
source_embedding, target_embedding = batch
output_embedding = self(source_embedding)
loss = self.loss_fn(output_embedding, target_embedding)
self.log('train_loss', loss, prog_bar = True, logger = True)
return loss
source, target = batch
output = self(source)
loss_training = self.mse_loss(output, target)
self.log('loss_training', loss_training, prog_bar = True)
return loss_training
def validation_step(self, batch : Batch, batch_index : int) -> Tensor:
source_embedding, target_embedding = batch
output_embedding = self(source_embedding)
loss = self.loss_fn(output_embedding, target_embedding)
self.log('val_loss', loss, prog_bar = True, logger = True)
return loss
source, target = batch
output = self(source)
loss_validation = self.mse_loss(output, target)
self.log('loss_validation', loss_validation, prog_bar = True)
return loss_validation
def configure_optimizers(self) -> Any:
optimizer = torch.optim.Adam(self.parameters(), lr = self.lr)
learning_rate = CONFIG.getfloat('training.trainer', 'learning_rate')
optimizer = torch.optim.Adam(self.parameters(), lr = learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
return\
@@ -110,7 +110,7 @@ def create_trainer() -> Trainer:
def train() -> None:
trainer = create_trainer()
training_loader, validation_loader = create_loaders()
arcface_converter = ArcFaceConverterTrainer()
embedding_converter = EmbeddingConverterTrainer()
tuner = Tuner(trainer)
tuner.lr_find(arcface_converter, training_loader, validation_loader)
trainer.fit(arcface_converter, training_loader, validation_loader)
tuner.lr_find(embedding_converter, training_loader, validation_loader)
trainer.fit(embedding_converter, training_loader, validation_loader)
@@ -8,6 +8,7 @@ Batch = Tuple[Tensor, Tensor]
Loader = DataLoader[Tuple[Tensor, ...]]
Embedding = NDArray[Any]
EmbeddingDataset = NDArray[Embedding]
FaceLandmark5 = NDArray[Any]
VisionFrame = NDArray[Any]
+1 -1
View File
@@ -1,3 +1,3 @@
ResearchRAIL-M license
Copyright (c) 2024 Henry Ruhs
Copyright (c) 2025 Henry Ruhs
Submodule face_swapper/LivePortrait updated: 1b22029ec0...e09d1c9f1c
+2 -2
View File
@@ -9,7 +9,7 @@ Face Swapper
Preview
-------
![Preview](https://raw.githubusercontent.com/facefusion/facefusion-labs/next/.github/preview_face_swapper.png?sanitize=true)
![Preview](https://raw.githubusercontent.com/facefusion/facefusion-labs/next/.github/previews/face_swapper.png?sanitize=true)
Installation
@@ -72,8 +72,8 @@ weight_pose = 100
```
[training.trainer]
max_epochs = 50
learning_rate = 0.0004
max_epochs = 50
precision = 16-mixed
automatic_optimization = false
```
+1 -1
View File
@@ -32,8 +32,8 @@ weight_reconstruction =
weight_pose =
[training.trainer]
max_epochs =
learning_rate =
max_epochs =
precision =
automatic_optimization =
+7 -6
View File
@@ -34,12 +34,13 @@ class FaceSwapperLoss:
weight_reconstruction = CONFIG.getfloat('training.losses', 'weight_reconstruction')
weight_pose = CONFIG.getfloat('training.losses', 'weight_pose')
weight_gaze = CONFIG.getfloat('training.losses', 'weight_gaze')
generator_loss_set = {}
generator_loss_set['loss_adversarial'] = self.calc_adversarial_loss(discriminator_outputs)
generator_loss_set['loss_id'] = self.calc_id_loss(source_tensor, swap_tensor)
generator_loss_set['loss_attribute'] = self.calc_attribute_loss(target_attributes, swap_attributes)
generator_loss_set['loss_reconstruction'] = self.calc_reconstruction_loss(swap_tensor, target_tensor, is_same_person)
generator_loss_set =\
{
'loss_adversarial': self.calc_adversarial_loss(discriminator_outputs),
'loss_id': self.calc_id_loss(source_tensor, swap_tensor),
'loss_attribute': self.calc_attribute_loss(target_attributes, swap_attributes),
'loss_reconstruction': self.calc_reconstruction_loss(swap_tensor, target_tensor, is_same_person)
}
if weight_pose > 0:
generator_loss_set['loss_pose'] = self.calc_pose_loss(swap_tensor, target_tensor)
+9 -9
View File
@@ -61,12 +61,12 @@ class FaceSwapperTrain(pytorch_lightning.LightningModule, FaceSwapperLoss):
if self.global_step % CONFIG.getint('training.output', 'preview_frequency') == 0:
self.generate_preview(source_tensor, target_tensor, swap_tensor)
self.log('l_G', generator_losses.get('loss_generator'), prog_bar = True)
self.log('l_D', discriminator_losses.get('loss_discriminator'), prog_bar = True)
self.log('l_ADV', generator_losses.get('loss_adversarial'), prog_bar = True)
self.log('l_ATTR', generator_losses.get('loss_attribute'), prog_bar = True)
self.log('l_ID', generator_losses.get('loss_id'), prog_bar = True)
self.log('l_REC', generator_losses.get('loss_reconstruction'), prog_bar = True)
self.log('loss_generator', generator_losses.get('loss_generator'), prog_bar = True)
self.log('loss_discriminator', discriminator_losses.get('loss_discriminator'), prog_bar = True)
self.log('loss_adversarial', generator_losses.get('loss_adversarial'), prog_bar = True)
self.log('loss_attribute', generator_losses.get('loss_attribute'), prog_bar = True)
self.log('loss_id', generator_losses.get('loss_id'), prog_bar = True)
self.log('loss_reconstruction', generator_losses.get('loss_reconstruction'), prog_bar = True)
return generator_losses.get('loss_generator')
def generate_preview(self, source_tensor : VisionTensor, target_tensor : VisionTensor, swap_tensor : VisionTensor) -> None:
@@ -76,7 +76,7 @@ class FaceSwapperTrain(pytorch_lightning.LightningModule, FaceSwapperLoss):
swap_tensors = swap_tensor[:max_preview]
rows = [ torch.cat([ source_tensor, target_tensor, swap_tensor ], dim = 2) for source_tensor, target_tensor, swap_tensor in zip(source_tensors, target_tensors, swap_tensors) ]
grid = torchvision.utils.make_grid(torch.cat(rows, dim = 1).unsqueeze(0), nrow = 1, normalize = True, scale_each = True)
self.logger.experiment.add_image("Generator Preview", grid, self.global_step)
self.logger.experiment.add_image('preview', grid, self.global_step)
def create_trainer() -> Trainer:
@@ -111,10 +111,10 @@ def train() -> None:
same_person_probability = CONFIG.getfloat('preparing.dataset', 'same_person_probability')
batch_size = CONFIG.getint('training.loader', 'batch_size')
num_workers = CONFIG.getint('training.loader', 'num_workers')
file_path = CONFIG.get('training.output', 'file_path')
output_file_path = CONFIG.get('training.output', 'file_path')
dataset = DataLoaderVGG(dataset_path, dataset_image_pattern, dataset_directory_pattern, same_person_probability)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True, num_workers = num_workers, drop_last = True, pin_memory = True, persistent_workers = True)
face_swap_model = FaceSwapperTrain()
trainer = create_trainer()
trainer.fit(face_swap_model, data_loader, ckpt_path = file_path)
trainer.fit(face_swap_model, data_loader, ckpt_path = output_file_path)