diff --git a/GUI.py b/GUI.py index 2ce3836..16e1f4d 100644 --- a/GUI.py +++ b/GUI.py @@ -5,7 +5,7 @@ # Created Date: Wednesday December 22nd 2021 # Author: Chen Xuanhong # Email: chenxuanhongzju@outlook.com -# Last Modified: Monday, 17th January 2022 12:45:32 am +# Last Modified: Thursday, 10th February 2022 12:14:47 am # Modified By: Chen Xuanhong # Copyright (c) 2021 Shanghai Jiao Tong University ############################################################# @@ -24,6 +24,13 @@ except: main(['install', 'paramiko']) import paramiko +try: + import pyperclip +except: + from pip._internal import main + main(['install', 'pyperclip']) + import pyperclip + import threading import tkinter as tk import tkinter.ttk as ttk @@ -390,13 +397,18 @@ class Application(tk.Frame): ssh_frame.pack(fill="both", padx=5,pady=5) ssh_frame.columnconfigure(0, weight=1) ssh_frame.columnconfigure(1, weight=1) + ssh_frame.columnconfigure(2, weight=1) ssh_button = tk.Button(ssh_frame, text = "Open SSH", font=font_list, command = self.OpenSSH, bg='#990033', fg='#F5F5F5') ssh_button.grid(row=0,column=0,sticky=tk.EW) + ssh_button = tk.Button(ssh_frame, text = "Copy Passwd", + font=font_list, command = self.CopyPasswd, bg='#990033', fg='#F5F5F5') + ssh_button.grid(row=0,column=1,sticky=tk.EW) + ssh_button = tk.Button(ssh_frame, text = "Pull Log", font=font_list, command = self.PullLog, bg='#990033', fg='#F5F5F5') - ssh_button.grid(row=0,column=1,sticky=tk.EW) + ssh_button.grid(row=0,column=2,sticky=tk.EW) ################################################################################################# config_frame = tk.Frame(self.master) @@ -532,6 +544,16 @@ class Application(tk.Frame): self.test_com["value"] =all_files self.test_com.current(0) + def CopyPasswd(self): + def copy(): + ip = self.list_com.get() + cur_mac = self.machine_dict[ip] + passwd = cur_mac["passwd"] + pyperclip.copy(passwd) + + thread_update = threading.Thread(target=copy) + thread_update.start() + def Test(self): def test_task(): log = self.log_com.get() diff --git a/GUI/file_sync/filestate_machine0.json b/GUI/file_sync/filestate_machine0.json index 80d4bd2..9656ee6 100644 --- a/GUI/file_sync/filestate_machine0.json +++ b/GUI/file_sync/filestate_machine0.json @@ -1,5 +1,5 @@ { - "GUI.py": 1642351532.4558506, + "GUI.py": 1644423287.9844918, "test.py": 1643529962.5602193, "train.py": 1643397924.974299, "components\\Generator.py": 1642347735.351465, @@ -60,7 +60,7 @@ "face_crop.py": 1643789609.1834445, "face_crop_video.py": 1643815024.5516832, "similarity.py": 1643269705.1073737, - "train_multigpu.py": 1644331842.3490777, + "train_multigpu.py": 1644509438.008675, "components\\arcface_decoder.py": 1643396144.2575414, "components\\Generator_nobias.py": 1643179001.810856, "data_tools\\data_loader_VGGFace2HQ_multigpu.py": 1644330414.9587426, @@ -83,9 +83,9 @@ "torch_utils\\ops\\__init__.py": 1640773190.0, "train_scripts\\trainer_arcface_rec.py": 1643399647.0182135, "train_scripts\\trainer_multigpu_base.py": 1644131205.772292, - "train_scripts\\trainer_multi_gpu.py": 1644331738.7729652, + "train_scripts\\trainer_multi_gpu.py": 1644549528.075511, "train_yamls\\train_arcface_rec.yaml": 1643398807.3434353, - "train_yamls\\train_multigpu.yaml": 1644331809.0680442, + "train_yamls\\train_multigpu.yaml": 1644549590.0652373, "wandb\\run-20220129_032741-340btp9k\\files\\conda-environment.yaml": 1643398065.409959, "wandb\\run-20220129_032741-340btp9k\\files\\config.yaml": 1643398069.2392955, "wandb\\run-20220129_032939-2nmaozxq\\files\\conda-environment.yaml": 1643398182.647548, @@ -104,5 +104,11 @@ "dnnlib\\__init__.py": 1640773190.0, "components\\Generator_ori.py": 1644229508.0031855, "losses\\cos.py": 1644229583.4023254, - "data_tools\\data_loader_VGGFace2HQ_multigpu1.py": 1644297868.397411 + "data_tools\\data_loader_VGGFace2HQ_multigpu1.py": 1644297868.397411, + "speed_test.py": 1644476745.605093, + "components\\DeConv_Invo.py": 1644426607.1588645, + "components\\Generator_reduce_up.py": 1644477248.9149294, + "components\\Generator_upsample.py": 1644426070.2325442, + "components\\misc\\Involution.py": 1644509321.5267963, + "train_yamls\\train_Invoup.yaml": 1644550037.4785244 } \ No newline at end of file diff --git a/GUI/machines.json b/GUI/machines.json index 30b51d2..68c43a2 100644 --- a/GUI/machines.json +++ b/GUI/machines.json @@ -7,5 +7,14 @@ "path": "/home/ubuntu/CXH/simswap_plus", "ckp_path": "train_logs", "logfilename": "filestate_machine0.json" + }, + { + "ip": "2001:da8:8000:6880:f284:d61c:3c76:f9cb", + "user": "ps", + "port": 22, + "passwd": "glass123456", + "path": "/data1/cxh/simswap_plus", + "ckp_path": "train_logs", + "logfilename": "filestate_machine1.json" } ] \ No newline at end of file diff --git a/README.md b/README.md index fb4ee75..f2ea463 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,20 @@ # Simswap++ ## Dependencies -- python > 3.6 +- python >= 3.7 - yaml (pip install pyyaml) - paramiko (For ssh file transportation) -- pytorch > 1.8 -- tkinter (For GUI) +- pytorch >= 1.9 - pillow - torchvision - opencv - matplotlib - timm +- cupy (for involution) you need to create a new env in anaconda (conda install pytorch==1.10.1 cudatoolkit==10.2.89 cupy==10.1.0 -c pytorch -c conda-forge) ## logger -- tensorboard (pip install tensorboard) -- tensorboardX (pip install tensorboardX) +- wandb (pip install wandb) ***OR*** diff --git a/components/DeConv_Invo.py b/components/DeConv_Invo.py new file mode 100644 index 0000000..03ae788 --- /dev/null +++ b/components/DeConv_Invo.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: DeConv copy.py +# Created Date: Tuesday July 20th 2021 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 10th February 2022 1:10:04 am +# Modified By: Chen Xuanhong +# Copyright (c) 2021 Shanghai Jiao Tong University +############################################################# + + +from components.misc.Involution import involution +from torch import nn + +class DeConv(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size = 3, upsampl_scale = 2, padding="zero"): + super().__init__() + self.upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampl_scale) + padding_size = int((kernel_size -1)/2) + self.conv1x1 = nn.Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size= 1) + # self.same_padding = nn.ReflectionPad2d(padding_size) + if padding.lower() == "reflect": + + self.conv = involution(out_channels,kernel_size,1) + # self.conv = nn.Sequential( + # nn.ReflectionPad2d(padding_size), + # nn.Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size= kernel_size, bias= False)) + # for layer in self.conv: + # if isinstance(layer,nn.Conv2d): + # nn.init.xavier_uniform_(layer.weight) + elif padding.lower() == "zero": + self.conv = involution(out_channels,kernel_size,1) + # nn.init.xavier_uniform_(self.conv.weight) + # self.__weights_init__() + + # def __weights_init__(self): + # nn.init.xavier_uniform_(self.conv.weight) + + def forward(self, input): + h = self.conv1x1(input) + h = self.upsampling(h) + h = self.conv(h) + return h \ No newline at end of file diff --git a/components/Generator_reduce_up.py b/components/Generator_reduce_up.py new file mode 100644 index 0000000..ef095f8 --- /dev/null +++ b/components/Generator_reduce_up.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: Generator.py +# Created Date: Sunday January 16th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 10th February 2022 3:14:08 pm +# Modified By: Chen Xuanhong +# Copyright (c) 2022 Shanghai Jiao Tong University +############################################################# + +import torch +from torch import nn +from components.DeConv_Invo import DeConv + +class Demodule(nn.Module): + def __init__(self, epsilon=1e-8): + """ + @notice: avoid in-place ops. + https://discuss.pytorch.org/t/encounter-the-runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-operation/836/3 + """ + super(Demodule, self).__init__() + self.epsilon = epsilon + + def forward(self, x): + tmp = torch.mul(x, x) # or x ** 2 + tmp = torch.rsqrt(torch.mean(tmp, (2, 3), True) + self.epsilon) + return x * tmp + +class ApplyStyle(nn.Module): + """ + @ref: https://github.com/lernapparat/lernapparat/blob/master/style_gan/pytorch_style_gan.ipynb + """ + def __init__(self, latent_size, channels): + super(ApplyStyle, self).__init__() + self.linear = nn.Linear(latent_size, channels * 2) + + def forward(self, x, latent): + style = self.linear(latent) # style => [batch_size, n_channels*2] + shape = [-1, 2, x.size(1), 1, 1] + style = style.view(shape) # [batch_size, 2, n_channels, ...] + #x = x * (style[:, 0] + 1.) + style[:, 1] + x = x * (style[:, 0] * 1 + 1.) + style[:, 1] * 1 + return x + +class Modulation(nn.Module): + def __init__(self, latent_size, channels): + super(Modulation, self).__init__() + self.linear = nn.Linear(latent_size, channels) + + def forward(self, x, latent): + style = self.linear(latent) # style => [batch_size, n_channels*2] + shape = [-1, x.size(1), 1, 1] + style = style.view(shape) # [batch_size, 2, n_channels, ...] + #x = x * (style[:, 0] + 1.) + style[:, 1] + x = x * style + return x + +class ResnetBlock_Modulation(nn.Module): + def __init__(self, dim, latent_size, padding_type, activation=nn.ReLU(True)): + super(ResnetBlock_Modulation, self).__init__() + + p = 0 + conv1 = [] + if padding_type == 'reflect': + conv1 += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv1 += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv1 += [nn.Conv2d(dim, dim, kernel_size=3, padding = p), Demodule()] + self.conv1 = nn.Sequential(*conv1) + self.style1 = Modulation(latent_size, dim) + self.act1 = activation + + p = 0 + conv2 = [] + if padding_type == 'reflect': + conv2 += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv2 += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv2 += [nn.Conv2d(dim, dim, kernel_size=3, padding=p), Demodule()] + self.conv2 = nn.Sequential(*conv2) + self.style2 = Modulation(latent_size, dim) + + + def forward(self, x, dlatents_in_slice): + y = self.conv1(x) + y = self.style1(y, dlatents_in_slice) + y = self.act1(y) + y = self.conv2(y) + y = self.style2(y, dlatents_in_slice) + out = x + y + return out + +class Generator(nn.Module): + def __init__( + self, + **kwargs + ): + super().__init__() + + chn = kwargs["g_conv_dim"] + k_size = kwargs["g_kernel_size"] + res_num = kwargs["res_num"] + + padding_size= int((k_size -1)/2) + padding_type= 'reflect' + + activation = nn.ReLU(True) + + self.first_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(3, 64, kernel_size=7, padding=0, bias=False), + nn.BatchNorm2d(64), activation) + ### downsample + self.down1 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(128), activation) + + self.down2 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(256), activation) + + self.down3 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(512), activation) + + self.down4 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(512), activation) + + ### resnet blocks + BN = [] + for i in range(res_num): + BN += [ + ResnetBlock_Modulation(512, latent_size=chn, padding_type=padding_type, activation=activation)] + self.BottleNeck = nn.Sequential(*BN) + + self.up4 = nn.Sequential( + DeConv(512,512,3), + nn.BatchNorm2d(512), activation + ) + + self.up3 = nn.Sequential( + DeConv(512,256,3), + nn.BatchNorm2d(256), activation + ) + + self.up2 = nn.Sequential( + DeConv(256,128,3), + nn.BatchNorm2d(128), activation + ) + + self.up1 = nn.Sequential( + DeConv(128,64,3), + nn.BatchNorm2d(64), activation + ) + + self.last_layer = nn.Sequential(nn.ReflectionPad2d(3), + nn.Conv2d(64, 3, kernel_size=7, padding=0)) + + + # self.__weights_init__() + + # def __weights_init__(self): + # for layer in self.encoder: + # if isinstance(layer,nn.Conv2d): + # nn.init.xavier_uniform_(layer.weight) + + # for layer in self.encoder2: + # if isinstance(layer,nn.Conv2d): + # nn.init.xavier_uniform_(layer.weight) + + def forward(self, input, id): + x = input # 3*224*224 + skip1 = self.first_layer(x) + skip2 = self.down1(skip1) + skip3 = self.down2(skip2) + skip4 = self.down3(skip3) + res = self.down4(skip4) + + for i in range(len(self.BottleNeck)): + x = self.BottleNeck[i](res, id) + + x = self.up4(x) + x = self.up3(x) + x = self.up2(x) + x = self.up1(x) + x = self.last_layer(x) + + return x diff --git a/components/Generator_upsample.py b/components/Generator_upsample.py new file mode 100644 index 0000000..b2b5986 --- /dev/null +++ b/components/Generator_upsample.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: Generator.py +# Created Date: Sunday January 16th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 10th February 2022 1:01:09 am +# Modified By: Chen Xuanhong +# Copyright (c) 2022 Shanghai Jiao Tong University +############################################################# + +import torch +from torch import nn +from components.DeConv_Invo import DeConv + +class InstanceNorm(nn.Module): + def __init__(self, epsilon=1e-8): + """ + @notice: avoid in-place ops. + https://discuss.pytorch.org/t/encounter-the-runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-operation/836/3 + """ + super(InstanceNorm, self).__init__() + self.epsilon = epsilon + + def forward(self, x): + x = x - torch.mean(x, (2, 3), True) + tmp = torch.mul(x, x) # or x ** 2 + tmp = torch.rsqrt(torch.mean(tmp, (2, 3), True) + self.epsilon) + return x * tmp + +class ApplyStyle(nn.Module): + """ + @ref: https://github.com/lernapparat/lernapparat/blob/master/style_gan/pytorch_style_gan.ipynb + """ + def __init__(self, latent_size, channels): + super(ApplyStyle, self).__init__() + self.linear = nn.Linear(latent_size, channels * 2) + + def forward(self, x, latent): + style = self.linear(latent) # style => [batch_size, n_channels*2] + shape = [-1, 2, x.size(1), 1, 1] + style = style.view(shape) # [batch_size, 2, n_channels, ...] + #x = x * (style[:, 0] + 1.) + style[:, 1] + x = x * (style[:, 0] * 1 + 1.) + style[:, 1] * 1 + return x + +class ResnetBlock_Adain(nn.Module): + def __init__(self, dim, latent_size, padding_type, activation=nn.ReLU(True)): + super(ResnetBlock_Adain, self).__init__() + + p = 0 + conv1 = [] + if padding_type == 'reflect': + conv1 += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv1 += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv1 += [nn.Conv2d(dim, dim, kernel_size=3, padding = p), InstanceNorm()] + self.conv1 = nn.Sequential(*conv1) + self.style1 = ApplyStyle(latent_size, dim) + self.act1 = activation + + p = 0 + conv2 = [] + if padding_type == 'reflect': + conv2 += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv2 += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv2 += [nn.Conv2d(dim, dim, kernel_size=3, padding=p), InstanceNorm()] + self.conv2 = nn.Sequential(*conv2) + self.style2 = ApplyStyle(latent_size, dim) + + + def forward(self, x, dlatents_in_slice): + y = self.conv1(x) + y = self.style1(y, dlatents_in_slice) + y = self.act1(y) + y = self.conv2(y) + y = self.style2(y, dlatents_in_slice) + out = x + y + return out + + +class Generator(nn.Module): + def __init__( + self, + **kwargs + ): + super().__init__() + + chn = kwargs["g_conv_dim"] + k_size = kwargs["g_kernel_size"] + res_num = kwargs["res_num"] + + padding_size= int((k_size -1)/2) + padding_type= 'reflect' + + activation = nn.ReLU(True) + + self.first_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(3, 64, kernel_size=7, padding=0, bias=False), + nn.BatchNorm2d(64), activation) + ### downsample + self.down1 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False), + nn.BatchNorm2d(128), activation) + + self.down2 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False), + nn.BatchNorm2d(256), activation) + + self.down3 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False), + nn.BatchNorm2d(512), activation) + + self.down4 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1, bias=False), + nn.BatchNorm2d(512), activation) + + ### resnet blocks + BN = [] + for i in range(res_num): + BN += [ + ResnetBlock_Adain(512, latent_size=chn, padding_type=padding_type, activation=activation)] + self.BottleNeck = nn.Sequential(*BN) + + self.up4 = nn.Sequential( + DeConv(512,512,3), + nn.BatchNorm2d(512), activation + ) + + self.up3 = nn.Sequential( + DeConv(512,256,3), + nn.BatchNorm2d(256), activation + ) + + self.up2 = nn.Sequential( + DeConv(256,128,3), + nn.BatchNorm2d(128), activation + ) + + self.up1 = nn.Sequential( + DeConv(128,64,3), + nn.BatchNorm2d(64), activation + ) + + self.last_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(64, 3, kernel_size=7, padding=0)) + + + # self.__weights_init__() + + # def __weights_init__(self): + # for layer in self.encoder: + # if isinstance(layer,nn.Conv2d): + # nn.init.xavier_uniform_(layer.weight) + + # for layer in self.encoder2: + # if isinstance(layer,nn.Conv2d): + # nn.init.xavier_uniform_(layer.weight) + + def forward(self, input, id): + x = input # 3*224*224 + skip1 = self.first_layer(x) + skip2 = self.down1(skip1) + skip3 = self.down2(skip2) + skip4 = self.down3(skip3) + res = self.down4(skip4) + + for i in range(len(self.BottleNeck)): + x = self.BottleNeck[i](res, id) + + x = self.up4(x) + x = self.up3(x) + x = self.up2(x) + x = self.up1(x) + x = self.last_layer(x) + + return x diff --git a/components/misc/Involution.py b/components/misc/Involution.py new file mode 100644 index 0000000..79a62cf --- /dev/null +++ b/components/misc/Involution.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: Involution.py +# Created Date: Tuesday July 20th 2021 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Friday, 11th February 2022 12:08:41 am +# Modified By: Chen Xuanhong +# Copyright (c) 2021 Shanghai Jiao Tong University +############################################################# + + + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair +from torch.autograd import Function + +import cupy +from string import Template +from collections import namedtuple + + + +Stream = namedtuple('Stream', ['ptr']) + + +def Dtype(t): + if isinstance(t, torch.cuda.FloatTensor): + return 'float' + elif isinstance(t, torch.cuda.DoubleTensor): + return 'double' + + +@cupy._util.memoize(for_each_device=True) +def load_kernel(kernel_name, code, **kwargs): + code = Template(code).substitute(**kwargs) + kernel_code = cupy.cuda.compile_with_cache(code) + return kernel_code.get_function(kernel_name) + + +CUDA_NUM_THREADS = 1024 + +kernel_loop = ''' +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ + i < (n); \ + i += blockDim.x * gridDim.x) +''' + + +def GET_BLOCKS(N): + return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS + + +_involution_kernel = kernel_loop + ''' +extern "C" +__global__ void involution_forward_kernel( +const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) { + CUDA_KERNEL_LOOP(index, ${nthreads}) { + const int n = index / ${channels} / ${top_height} / ${top_width}; + const int c = (index / ${top_height} / ${top_width}) % ${channels}; + const int h = (index / ${top_width}) % ${top_height}; + const int w = index % ${top_width}; + const int g = c / (${channels} / ${groups}); + ${Dtype} value = 0; + #pragma unroll + for (int kh = 0; kh < ${kernel_h}; ++kh) { + #pragma unroll + for (int kw = 0; kw < ${kernel_w}; ++kw) { + const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; + const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; + if ((h_in >= 0) && (h_in < ${bottom_height}) + && (w_in >= 0) && (w_in < ${bottom_width})) { + const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in) + * ${bottom_width} + w_in; + const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h) + * ${top_width} + w; + value += weight_data[offset_weight] * bottom_data[offset]; + } + } + } + top_data[index] = value; + } +} +''' + + +_involution_kernel_backward_grad_input = kernel_loop + ''' +extern "C" +__global__ void involution_backward_grad_input_kernel( + const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) { + CUDA_KERNEL_LOOP(index, ${nthreads}) { + const int n = index / ${channels} / ${bottom_height} / ${bottom_width}; + const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels}; + const int h = (index / ${bottom_width}) % ${bottom_height}; + const int w = index % ${bottom_width}; + const int g = c / (${channels} / ${groups}); + ${Dtype} value = 0; + #pragma unroll + for (int kh = 0; kh < ${kernel_h}; ++kh) { + #pragma unroll + for (int kw = 0; kw < ${kernel_w}; ++kw) { + const int h_out_s = h + ${pad_h} - kh * ${dilation_h}; + const int w_out_s = w + ${pad_w} - kw * ${dilation_w}; + if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) { + const int h_out = h_out_s / ${stride_h}; + const int w_out = w_out_s / ${stride_w}; + if ((h_out >= 0) && (h_out < ${top_height}) + && (w_out >= 0) && (w_out < ${top_width})) { + const int offset = ((n * ${channels} + c) * ${top_height} + h_out) + * ${top_width} + w_out; + const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out) + * ${top_width} + w_out; + value += weight_data[offset_weight] * top_diff[offset]; + } + } + } + } + bottom_diff[index] = value; + } +} +''' + + +_involution_kernel_backward_grad_weight = kernel_loop + ''' +extern "C" +__global__ void involution_backward_grad_weight_kernel( + const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) { + CUDA_KERNEL_LOOP(index, ${nthreads}) { + const int h = (index / ${top_width}) % ${top_height}; + const int w = index % ${top_width}; + const int kh = (index / ${kernel_w} / ${top_height} / ${top_width}) + % ${kernel_h}; + const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w}; + const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; + const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; + if ((h_in >= 0) && (h_in < ${bottom_height}) + && (w_in >= 0) && (w_in < ${bottom_width})) { + const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups}; + const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num}; + ${Dtype} value = 0; + #pragma unroll + for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) { + const int top_offset = ((n * ${channels} + c) * ${top_height} + h) + * ${top_width} + w; + const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in) + * ${bottom_width} + w_in; + value += top_diff[top_offset] * bottom_data[bottom_offset]; + } + buffer_data[index] = value; + } else { + buffer_data[index] = 0; + } + } +} +''' + + +class _involution(Function): + @staticmethod + def forward(ctx, input, weight, stride, padding, dilation): + assert input.dim() == 4 and input.is_cuda + assert weight.dim() == 6 and weight.is_cuda + batch_size, channels, height, width = input.size() + kernel_h, kernel_w = weight.size()[2:4] + output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1) + output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1) + + output = input.new(batch_size, channels, output_h, output_w) + n = output.numel() + + with torch.cuda.device_of(input): + f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n, + num=batch_size, channels=channels, groups=weight.size()[1], + bottom_height=height, bottom_width=width, + top_height=output_h, top_width=output_w, + kernel_h=kernel_h, kernel_w=kernel_w, + stride_h=stride[0], stride_w=stride[1], + dilation_h=dilation[0], dilation_w=dilation[1], + pad_h=padding[0], pad_w=padding[1]) + f(block=(CUDA_NUM_THREADS,1,1), + grid=(GET_BLOCKS(n),1,1), + args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()], + stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) + + ctx.save_for_backward(input, weight) + ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda and grad_output.is_contiguous() + input, weight = ctx.saved_tensors + stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation + + batch_size, channels, height, width = input.size() + kernel_h, kernel_w = weight.size()[2:4] + output_h, output_w = grad_output.size()[2:] + + grad_input, grad_weight = None, None + + opt = dict(Dtype=Dtype(grad_output), + num=batch_size, channels=channels, groups=weight.size()[1], + bottom_height=height, bottom_width=width, + top_height=output_h, top_width=output_w, + kernel_h=kernel_h, kernel_w=kernel_w, + stride_h=stride[0], stride_w=stride[1], + dilation_h=dilation[0], dilation_w=dilation[1], + pad_h=padding[0], pad_w=padding[1]) + + with torch.cuda.device_of(input): + if ctx.needs_input_grad[0]: + grad_input = input.new(input.size()) + + n = grad_input.numel() + opt['nthreads'] = n + + f = load_kernel('involution_backward_grad_input_kernel', + _involution_kernel_backward_grad_input, **opt) + f(block=(CUDA_NUM_THREADS,1,1), + grid=(GET_BLOCKS(n),1,1), + args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()], + stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) + + if ctx.needs_input_grad[1]: + grad_weight = weight.new(weight.size()) + + n = grad_weight.numel() + opt['nthreads'] = n + + f = load_kernel('involution_backward_grad_weight_kernel', + _involution_kernel_backward_grad_weight, **opt) + f(block=(CUDA_NUM_THREADS,1,1), + grid=(GET_BLOCKS(n),1,1), + args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()], + stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) + + return grad_input, grad_weight, None, None, None + + +def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1): + """ involution kernel + """ + assert input.size(0) == weight.size(0) + assert input.size(-2)//stride == weight.size(-2) + assert input.size(-1)//stride == weight.size(-1) + if input.is_cuda: + out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation)) + if bias is not None: + out += bias.view(1,-1,1,1) + else: + raise NotImplementedError + return out + + +class involution(nn.Module): + + def __init__(self, + channels, + kernel_size, + stride): + super(involution, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.channels = channels + reduction_ratio = 4 + self.group_channels = 8 + self.groups = self.channels // self.group_channels + self.seblock = nn.Sequential( + nn.Conv2d(in_channels = channels, out_channels = channels // reduction_ratio, kernel_size= 1), + # nn.BatchNorm2d(channels // reduction_ratio), + nn.ReLU(), + nn.Conv2d(in_channels = channels // reduction_ratio, out_channels = kernel_size**2 * self.groups, kernel_size= 1) + ) + + # self.conv1 = ConvModule( + # in_channels=channels, + # out_channels=channels // reduction_ratio, + # kernel_size=1, + # conv_cfg=None, + # norm_cfg=dict(type='BN'), + # act_cfg=dict(type='ReLU')) + # self.conv2 = ConvModule( + # in_channels=channels // reduction_ratio, + # out_channels=kernel_size**2 * self.groups, + # kernel_size=1, + # stride=1, + # conv_cfg=None, + # norm_cfg=None, + # act_cfg=None) + if stride > 1: + self.avgpool = nn.AvgPool2d(stride, stride) + + def forward(self, x): + # weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) + weight = self.seblock(x) + b, c, h, w = weight.shape + weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w) + out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2) + return out diff --git a/env/env.json b/env/env.json index 107f12a..942909d 100644 --- a/env/env.json +++ b/env/env.json @@ -12,6 +12,5 @@ "train_scripts_path":"./train_scripts", "test_scripts_path":"./test_scripts", "config_json_name":"model_config.json" - } } \ No newline at end of file diff --git a/speed_test.py b/speed_test.py new file mode 100644 index 0000000..2465f37 --- /dev/null +++ b/speed_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: speed_test.py +# Created Date: Thursday February 10th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 10th February 2022 3:05:44 pm +# Modified By: Chen Xuanhong +# Copyright (c) 2022 Shanghai Jiao Tong University +############################################################# +import os +import time + +import torch + + + +if __name__ == '__main__': + + script = "Generator_reduce_up" + class_name = "Generator" + arcface_ckpt= "arcface_ckpt/arcface_checkpoint.tar" + model_config={ + "g_conv_dim": 512, + "g_kernel_size": 3, + "res_num": 9 + } + + os.environ['CUDA_VISIBLE_DEVICES'] = str(0) + print("GPU used : ", os.environ['CUDA_VISIBLE_DEVICES']) + + gscript_name = "components." + script + + + package = __import__(gscript_name, fromlist=True) + gen_class= getattr(package, class_name) + gen = gen_class(**model_config) + model = gen.cuda().eval().requires_grad_(False) + arcface1 = torch.load(arcface_ckpt, map_location=torch.device("cpu")) + arcface = arcface1['model'].module + arcface = arcface.cuda() + arcface.eval().requires_grad_(False) + + + id_img = torch.rand((4,3,112,112)).cuda() + id_latent = torch.rand((4,512)).cuda() + # cv2.imwrite(os.path.join("./swap_results", "id_%s.png"%(id_basename)),id_img_align_crop[0] + + attr = torch.rand((4,3,512,512)).cuda() + + import datetime + start_time = time.time() + for i in range(100): + with torch.no_grad(): + + id_latent = arcface(id_img) + + results = model(attr, id_latent) + elapsed = time.time() - start_time + elapsed = str(datetime.timedelta(seconds=elapsed)) + information="Elapsed [{}]".format(elapsed) + print(information) \ No newline at end of file diff --git a/train_multigpu.py b/train_multigpu.py index d5ba081..48354e2 100644 --- a/train_multigpu.py +++ b/train_multigpu.py @@ -5,7 +5,7 @@ # Created Date: Tuesday April 28th 2020 # Author: Chen Xuanhong # Email: chenxuanhongzju@outlook.com -# Last Modified: Tuesday, 8th February 2022 10:50:37 pm +# Last Modified: Friday, 11th February 2022 12:10:37 am # Modified By: Chen Xuanhong # Copyright (c) 2020 Shanghai Jiao Tong University ############################################################# @@ -31,9 +31,9 @@ def getParameters(): parser = argparse.ArgumentParser() # general settings - parser.add_argument('-v', '--version', type=str, default='multigpu3', + parser.add_argument('-v', '--version', type=str, default='invoup1', help="version name for train, test, finetune") - parser.add_argument('-t', '--tag', type=str, default='multigpu', + parser.add_argument('-t', '--tag', type=str, default='invo_upsample', help="tag for current experiment") parser.add_argument('-p', '--phase', type=str, default="train", @@ -46,9 +46,9 @@ def getParameters(): # training parser.add_argument('--experiment_description', type=str, - default="测试多GPU训练") + default="使用involution作为上采样") - parser.add_argument('--train_yaml', type=str, default="train_multigpu.yaml") + parser.add_argument('--train_yaml', type=str, default="train_Invoup.yaml") # system logger parser.add_argument('--logger', type=str, diff --git a/train_scripts/trainer_multi_gpu.py b/train_scripts/trainer_multi_gpu.py index de56e85..1080b3b 100644 --- a/train_scripts/trainer_multi_gpu.py +++ b/train_scripts/trainer_multi_gpu.py @@ -5,7 +5,7 @@ # Created Date: Sunday January 9th 2022 # Author: Chen Xuanhong # Email: chenxuanhongzju@outlook.com -# Last Modified: Tuesday, 8th February 2022 10:48:58 pm +# Last Modified: Friday, 11th February 2022 11:18:47 am # Modified By: Chen Xuanhong # Copyright (c) 2022 Shanghai Jiao Tong University ############################################################# @@ -94,6 +94,7 @@ def init_framework(config, reporter, device, rank): # print and recorde model structure reporter.writeInfo("Discriminator structure:") reporter.writeModel(dis.__str__()) + arcface1 = torch.load(config["arcface_ckpt"], map_location=torch.device("cpu")) arcface = arcface1['model'].module @@ -428,6 +429,9 @@ def train_loop( if rank == 0 and (step + 1) % log_freq == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) + # print("ready to report losses") + # ID_Total= loss_G_ID + # torch.distributed.all_reduce(ID_Total) epochinformation="[{}], Elapsed [{}], Step [{}/{}], \ G_ID: {:.4f}, G_loss: {:.4f}, Rec_loss: {:.4f}, Fm_loss: {:.4f}, \ diff --git a/train_yamls/train_Invoup.yaml b/train_yamls/train_Invoup.yaml new file mode 100644 index 0000000..2005f02 --- /dev/null +++ b/train_yamls/train_Invoup.yaml @@ -0,0 +1,63 @@ +# Related scripts +train_script_name: multi_gpu + +# models' scripts +model_configs: + g_model: + script: Generator_upsample + class_name: Generator + module_params: + g_conv_dim: 512 + g_kernel_size: 3 + res_num: 9 + + d_model: + script: projected_discriminator + class_name: ProjectedDiscriminator + module_params: + diffaug: False + interp224: False + backbone_kwargs: {} + +arcface_ckpt: arcface_ckpt/arcface_checkpoint.tar + +# Training information +batch_size: 28 + +# Dataset +dataloader: VGGFace2HQ_multigpu +dataset_name: vggface2_hq +dataset_params: + random_seed: 1234 + dataloader_workers: 4 + +eval_dataloader: DIV2K_hdf5 +eval_dataset_name: DF2K_H5_Eval +eval_batch_size: 2 + +# Dataset + +# Optimizer +optim_type: Adam +g_optim_config: + lr: 0.0004 + betas: [ 0, 0.99] + eps: !!float 1e-8 + +d_optim_config: + lr: 0.0004 + betas: [ 0, 0.99] + eps: !!float 1e-8 + +id_weight: 20.0 +reconstruct_weight: 10.0 +feature_match_weight: 10.0 + +# Log +log_step: 300 +model_save_step: 10000 +sample_step: 1000 +total_step: 1000000 +checkpoint_names: + generator_name: Generator + discriminator_name: Discriminator \ No newline at end of file diff --git a/train_yamls/train_multigpu.yaml b/train_yamls/train_multigpu.yaml index daf0f97..b164fd7 100644 --- a/train_yamls/train_multigpu.yaml +++ b/train_yamls/train_multigpu.yaml @@ -22,7 +22,7 @@ model_configs: arcface_ckpt: arcface_ckpt/arcface_checkpoint.tar # Training information -batch_size: 24 +batch_size: 28 # Dataset dataloader: VGGFace2HQ_multigpu