commit df2c0c06fa4987863b785a1404c8295a76bcf7e8 Author: Your Name Date: Fri Feb 1 15:30:41 2019 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5cb0195 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +data/ +checkpoint/ +.idea/ +__pycache__/ +.vscode + +demo_faceswap_video.py +predict_128x128.py diff --git a/dataset/face_pair_dataset.py b/dataset/face_pair_dataset.py new file mode 100644 index 0000000..75abecb --- /dev/null +++ b/dataset/face_pair_dataset.py @@ -0,0 +1,204 @@ +""" +Copyright StrangeAI authors @2019 + +assume you have to directly which you want +convert A to B, just put all faces of A person to A, +faces of B person to B + +""" +import torch +from torch.utils.data import Dataset +import glob +import os +from alfred.dl.torch.common import device +import cv2 +from PIL import Image +from torchvision import transforms +import numpy as np +from utils.umeyama import umeyama +import cv2 + +random_transform_args = { + 'rotation_range': 10, + 'zoom_range': 0.05, + 'shift_range': 0.05, + 'random_flip': 0.4, +} + + +def random_transform(image, rotation_range, zoom_range, shift_range, random_flip): + h, w = image.shape[0:2] + rotation = np.random.uniform(-rotation_range, rotation_range) + scale = np.random.uniform(1 - zoom_range, 1 + zoom_range) + tx = np.random.uniform(-shift_range, shift_range) * w + ty = np.random.uniform(-shift_range, shift_range) * h + mat = cv2.getRotationMatrix2D((w // 2, h // 2), rotation, scale) + mat[:, 2] += (tx, ty) + result = cv2.warpAffine(image, mat, (w, h), borderMode=cv2.BORDER_REPLICATE) + if np.random.random() < random_flip: + result = result[:, ::-1] + return result + + +def random_warp_128(image): + assert image.shape == (256, 256, 3), 'resize image to 256 256 first' + range_ = np.linspace(128 - 120, 128 + 120, 9) + mapx = np.broadcast_to(range_, (9, 9)) + mapy = mapx.T + mapx = mapx + np.random.normal(size=(9, 9), scale=5) + mapy = mapy + np.random.normal(size=(9, 9), scale=5) + interp_mapx = cv2.resize(mapx, (144, 144))[8:136, 8:136].astype('float32') + interp_mapy = cv2.resize(mapy, (144, 144))[8:136, 8:136].astype('float32') + warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) + src_points = np.stack([mapx.ravel(), mapy.ravel()], axis=-1) + dst_points = np.mgrid[0:129:16, 0:129:16].T.reshape(-1, 2) + mat = umeyama(src_points, dst_points, True)[0:2] + target_image = cv2.warpAffine(image, mat, (128, 128)) + return warped_image, target_image + + +def random_warp_64(image): + assert image.shape == (256, 256, 3) + range_ = np.linspace(128 - 120, 128 + 120, 5) + mapx = np.broadcast_to(range_, (5, 5)) + mapy = mapx.T + mapx = mapx + np.random.normal(size=(5, 5), scale=5) + mapy = mapy + np.random.normal(size=(5, 5), scale=5) + interp_mapx = cv2.resize(mapx, (80, 80))[8:72, 8:72].astype('float32') + interp_mapy = cv2.resize(mapy, (80, 80))[8:72, 8:72].astype('float32') + warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) + src_points = np.stack([mapx.ravel(), mapy.ravel()], axis=-1) + dst_points = np.mgrid[0:65:16, 0:65:16].T.reshape(-1, 2) + mat = umeyama(src_points, dst_points, True)[0:2] + target_image = cv2.warpAffine(image, mat, (64, 64)) + return warped_image, target_image + + +class FacePairDataset(Dataset): + + def __init__(self, a_dir, b_dir, target_size, transform): + super(FacePairDataset, self).__init__ + self.a_dir = a_dir + self.b_dir = b_dir + self.target_size = target_size + + self.transform = transform + # extension can be changed here to png or others + self.a_images_list = glob.glob(os.path.join(a_dir, '*.png')) + self.b_images_list = glob.glob(os.path.join(b_dir, '*.png')) + + def __getitem__(self, index): + # return 2 image pair, A and B + img_a = Image.open(self.a_images_list[index]) + img_b = Image.open(self.b_images_list[index]) + + # align the face first + img_a = img_a.resize((self.target_size, self.target_size), Image.ANTIALIAS) + img_b = img_b.resize((self.target_size, self.target_size), Image.ANTIALIAS) + + # transform + if self.transform: + img_a = self.transform(img_a) + img_b = self.transform(img_b) + + # already resized, warp it + img_a = random_transform(np.array(img_a), **random_transform_args) + img_b = random_transform(np.array(img_b), **random_transform_args) + img_a_input, img_a = random_warp(np.array(img_a), 256) + img_b_input, img_b = random_warp(np.array(img_b), 256) + img_a_tensor = torch.Tensor(img_a.transpose(2, 0, 1)/255.).float() + img_a_input_tensor = torch.Tensor(img_a_input.transpose(2, 0, 1)/255.).float() + img_b_tensor = torch.Tensor(img_b.transpose(2, 0, 1)/255.).float() + img_b_input_tensor = torch.Tensor(img_b_input.transpose(2, 0, 1)/255.).float() + return img_a_tensor, img_a_input_tensor, img_b_tensor, img_b_input_tensor + + def __len__(self): + return min(len(self.a_images_list), len(self.b_images_list)) + + + +class FacePairDataset64x64(Dataset): + + def __init__(self, a_dir, b_dir, target_size, transform): + super(FacePairDataset64x64, self).__init__ + self.a_dir = a_dir + self.b_dir = b_dir + self.target_size = target_size + + self.transform = transform + # extension can be changed here to png or others + self.a_images_list = glob.glob(os.path.join(a_dir, '*.png')) + self.b_images_list = glob.glob(os.path.join(b_dir, '*.png')) + + def __getitem__(self, index): + # return 2 image pair, A and B + img_a = Image.open(self.a_images_list[index]) + img_b = Image.open(self.b_images_list[index]) + + # align the face first + img_a = img_a.resize((256, 256), Image.ANTIALIAS) + img_b = img_b.resize((256, 256), Image.ANTIALIAS) + + # transform + if self.transform: + img_a = self.transform(img_a) + img_b = self.transform(img_b) + + # # already resized, warp it + img_a = random_transform(np.array(img_a), **random_transform_args) + img_b = random_transform(np.array(img_b), **random_transform_args) + img_a_input, img_a = random_warp_64(np.array(img_a)) + img_b_input, img_b = random_warp_64(np.array(img_b)) + + img_a = np.array(img_a) + img_b = np.array(img_b) + + img_a_tensor = torch.Tensor(img_a.transpose(2, 0, 1)/255.).float() + img_a_input_tensor = torch.Tensor(img_a_input.transpose(2, 0, 1)/255.).float() + img_b_tensor = torch.Tensor(img_b.transpose(2, 0, 1)/255.).float() + img_b_input_tensor = torch.Tensor(img_b_input.transpose(2, 0, 1)/255.).float() + return img_a_tensor, img_a_input_tensor, img_b_tensor, img_b_input_tensor + + def __len__(self): + return min(len(self.a_images_list), len(self.b_images_list)) + + + +class FacePairDataset128x128(Dataset): + + def __init__(self, a_dir, b_dir, target_size, transform): + super(FacePairDataset128x128, self).__init__ + self.a_dir = a_dir + self.b_dir = b_dir + self.target_size = target_size + + self.transform = transform + self.a_images_list = glob.glob(os.path.join(a_dir, '*.png')) + self.b_images_list = glob.glob(os.path.join(b_dir, '*.png')) + + def __getitem__(self, index): + # return 2 image pair, A and B + img_a = Image.open(self.a_images_list[index]) + img_b = Image.open(self.b_images_list[index]) + + # align the face first + img_a = img_a.resize((256, 256), Image.ANTIALIAS) + img_b = img_b.resize((256, 256), Image.ANTIALIAS) + + # transform + if self.transform: + img_a = self.transform(img_a) + img_b = self.transform(img_b) + + img_a = random_transform(np.array(img_a), **random_transform_args) + img_b = random_transform(np.array(img_b), **random_transform_args) + img_a_input, img_a = random_warp_128(np.array(img_a)) + img_b_input, img_b = random_warp_128(np.array(img_b)) + img_a_tensor = torch.Tensor(img_a.transpose(2, 0, 1)/255.).float() + img_a_input_tensor = torch.Tensor(img_a_input.transpose(2, 0, 1)/255.).float() + img_b_tensor = torch.Tensor(img_b.transpose(2, 0, 1)/255.).float() + img_b_input_tensor = torch.Tensor(img_b_input.transpose(2, 0, 1)/255.).float() + return img_a_tensor, img_a_input_tensor, img_b_tensor, img_b_input_tensor + + def __len__(self): + return min(len(self.a_images_list), len(self.b_images_list)) \ No newline at end of file diff --git a/dataset/training_data.py b/dataset/training_data.py new file mode 100644 index 0000000..5df24ae --- /dev/null +++ b/dataset/training_data.py @@ -0,0 +1,61 @@ +import numpy +from utils.umeyama import umeyama +import cv2 + +random_transform_args = { + 'rotation_range': 10, + 'zoom_range': 0.05, + 'shift_range': 0.05, + 'random_flip': 0.4, +} + + +def random_transform(image, rotation_range, zoom_range, shift_range, random_flip): + h, w = image.shape[0:2] + rotation = numpy.random.uniform(-rotation_range, rotation_range) + scale = numpy.random.uniform(1 - zoom_range, 1 + zoom_range) + tx = numpy.random.uniform(-shift_range, shift_range) * w + ty = numpy.random.uniform(-shift_range, shift_range) * h + mat = cv2.getRotationMatrix2D((w // 2, h // 2), rotation, scale) + mat[:, 2] += (tx, ty) + result = cv2.warpAffine(image, mat, (w, h), borderMode=cv2.BORDER_REPLICATE) + if numpy.random.random() < random_flip: + result = result[:, ::-1] + return result + + +# get pair of random warped images from aligened face image +def random_warp(image): + assert image.shape == (256, 256, 3) + range_ = numpy.linspace(128 - 80, 128 + 80, 5) + mapx = numpy.broadcast_to(range_, (5, 5)) + mapy = mapx.T + + mapx = mapx + numpy.random.normal(size=(5, 5), scale=5) + mapy = mapy + numpy.random.normal(size=(5, 5), scale=5) + + interp_mapx = cv2.resize(mapx, (80, 80))[8:72, 8:72].astype('float32') + interp_mapy = cv2.resize(mapy, (80, 80))[8:72, 8:72].astype('float32') + + # just crop the image, remove the top left bottom right 8 pixels (in order to get the pure face) + warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) + + src_points = numpy.stack([mapx.ravel(), mapy.ravel()], axis=-1) + dst_points = numpy.mgrid[0:65:16, 0:65:16].T.reshape(-1, 2) + mat = umeyama(src_points, dst_points, True)[0:2] + target_image = cv2.warpAffine(image, mat, (64, 64)) + return warped_image, target_image + + +def get_training_data(images, batch_size): + indices = numpy.random.randint(len(images), size=batch_size) + for i, index in enumerate(indices): + image = images[index] + image = random_transform(image, **random_transform_args) + warped_img, target_img = random_warp(image) + if i == 0: + warped_images = numpy.empty((batch_size,) + warped_img.shape, warped_img.dtype) + target_images = numpy.empty((batch_size,) + target_img.shape, warped_img.dtype) + warped_images[i] = warped_img + target_images[i] = target_img + return warped_images, target_images diff --git a/images/1.jpeg b/images/1.jpeg new file mode 100644 index 0000000..345d4c8 Binary files /dev/null and b/images/1.jpeg differ diff --git a/images/2.jpeg b/images/2.jpeg new file mode 100644 index 0000000..1c7cfc0 Binary files /dev/null and b/images/2.jpeg differ diff --git a/images/3.jpeg b/images/3.jpeg new file mode 100644 index 0000000..092b204 Binary files /dev/null and b/images/3.jpeg differ diff --git a/images/4.jpeg b/images/4.jpeg new file mode 100644 index 0000000..34326f6 Binary files /dev/null and b/images/4.jpeg differ diff --git a/images/5.jpeg b/images/5.jpeg new file mode 100644 index 0000000..ff7d2d0 Binary files /dev/null and b/images/5.jpeg differ diff --git a/images/grid_res.py b/images/grid_res.py new file mode 100644 index 0000000..4693b37 --- /dev/null +++ b/images/grid_res.py @@ -0,0 +1,34 @@ +""" +grid a final image from result images + +""" +import cv2 +import numpy as np +import os +import sys +import glob +from PIL import Image + + +d = sys.argv[1] +print('from ', d) + +all_img_files = glob.glob(os.path.join(d, '*.png')) +assert len(all_img_files) % 6 == 0, 'images divided by 6' +all_img_files = sorted(all_img_files) +rows = len(all_img_files) // 6 +print(rows) +print(len(all_img_files)) + + +res_img = Image.new('RGB',(128*6, 128*(len(all_img_files)//6)), (255, 255, 255)) + +for i in range(len(all_img_files)//6): + for j in range(6): + # print('now: ', all_img_files[6*i + j]) + img = Image.open(all_img_files[6*i + j]) + res_img.paste(img, (j*128, i*128)) +res_img.save('res_grid.png') +print(np.array(res_img).shape) +cv2.imshow('rr', np.array(res_img)) +cv2.waitKey(0) \ No newline at end of file diff --git a/images/res/1480_0.png b/images/res/1480_0.png new file mode 100644 index 0000000..da50587 Binary files /dev/null and b/images/res/1480_0.png differ diff --git a/images/res/1480_1.png b/images/res/1480_1.png new file mode 100644 index 0000000..1a5c897 Binary files /dev/null and b/images/res/1480_1.png differ diff --git a/images/res/1480_2.png b/images/res/1480_2.png new file mode 100644 index 0000000..1504289 Binary files /dev/null and b/images/res/1480_2.png differ diff --git a/images/res/1480_3.png b/images/res/1480_3.png new file mode 100644 index 0000000..ff8bc04 Binary files /dev/null and b/images/res/1480_3.png differ diff --git a/images/res/1480_4.png b/images/res/1480_4.png new file mode 100644 index 0000000..d067c18 Binary files /dev/null and b/images/res/1480_4.png differ diff --git a/images/res/1480_5.png b/images/res/1480_5.png new file mode 100644 index 0000000..e5f991e Binary files /dev/null and b/images/res/1480_5.png differ diff --git a/images/res/1490_0.png b/images/res/1490_0.png new file mode 100644 index 0000000..b887347 Binary files /dev/null and b/images/res/1490_0.png differ diff --git a/images/res/1490_1.png b/images/res/1490_1.png new file mode 100644 index 0000000..20e523d Binary files /dev/null and b/images/res/1490_1.png differ diff --git a/images/res/1490_2.png b/images/res/1490_2.png new file mode 100644 index 0000000..0292fef Binary files /dev/null and b/images/res/1490_2.png differ diff --git a/images/res/1490_3.png b/images/res/1490_3.png new file mode 100644 index 0000000..993e20d Binary files /dev/null and b/images/res/1490_3.png differ diff --git a/images/res/1490_4.png b/images/res/1490_4.png new file mode 100644 index 0000000..1c36776 Binary files /dev/null and b/images/res/1490_4.png differ diff --git a/images/res/1490_5.png b/images/res/1490_5.png new file mode 100644 index 0000000..332b1af Binary files /dev/null and b/images/res/1490_5.png differ diff --git a/images/res/15450_0.png b/images/res/15450_0.png new file mode 100644 index 0000000..10e3bf1 Binary files /dev/null and b/images/res/15450_0.png differ diff --git a/images/res/15450_1.png b/images/res/15450_1.png new file mode 100644 index 0000000..88266b8 Binary files /dev/null and b/images/res/15450_1.png differ diff --git a/images/res/15450_2.png b/images/res/15450_2.png new file mode 100644 index 0000000..40a2ee5 Binary files /dev/null and b/images/res/15450_2.png differ diff --git a/images/res/15450_3.png b/images/res/15450_3.png new file mode 100644 index 0000000..5c31da4 Binary files /dev/null and b/images/res/15450_3.png differ diff --git a/images/res/15450_4.png b/images/res/15450_4.png new file mode 100644 index 0000000..3e49547 Binary files /dev/null and b/images/res/15450_4.png differ diff --git a/images/res/15450_5.png b/images/res/15450_5.png new file mode 100644 index 0000000..efea942 Binary files /dev/null and b/images/res/15450_5.png differ diff --git a/images/res/15780_0.png b/images/res/15780_0.png new file mode 100644 index 0000000..7fd129b Binary files /dev/null and b/images/res/15780_0.png differ diff --git a/images/res/15780_1.png b/images/res/15780_1.png new file mode 100644 index 0000000..c6ecfc0 Binary files /dev/null and b/images/res/15780_1.png differ diff --git a/images/res/15780_2.png b/images/res/15780_2.png new file mode 100644 index 0000000..aa2f0e2 Binary files /dev/null and b/images/res/15780_2.png differ diff --git a/images/res/15780_3.png b/images/res/15780_3.png new file mode 100644 index 0000000..5ee1f89 Binary files /dev/null and b/images/res/15780_3.png differ diff --git a/images/res/15780_4.png b/images/res/15780_4.png new file mode 100644 index 0000000..f671bf3 Binary files /dev/null and b/images/res/15780_4.png differ diff --git a/images/res/15780_5.png b/images/res/15780_5.png new file mode 100644 index 0000000..c472418 Binary files /dev/null and b/images/res/15780_5.png differ diff --git a/images/res_grid.png b/images/res_grid.png new file mode 100644 index 0000000..837f5d1 Binary files /dev/null and b/images/res_grid.png differ diff --git a/images/res_right/1480_0.png b/images/res_right/1480_0.png new file mode 100644 index 0000000..4297d41 Binary files /dev/null and b/images/res_right/1480_0.png differ diff --git a/images/res_right/1480_1.png b/images/res_right/1480_1.png new file mode 100644 index 0000000..8348e00 Binary files /dev/null and b/images/res_right/1480_1.png differ diff --git a/images/res_right/1480_2.png b/images/res_right/1480_2.png new file mode 100644 index 0000000..8302d71 Binary files /dev/null and b/images/res_right/1480_2.png differ diff --git a/images/res_right/1480_3.png b/images/res_right/1480_3.png new file mode 100644 index 0000000..7d18847 Binary files /dev/null and b/images/res_right/1480_3.png differ diff --git a/images/res_right/1480_4.png b/images/res_right/1480_4.png new file mode 100644 index 0000000..1e4b7ab Binary files /dev/null and b/images/res_right/1480_4.png differ diff --git a/images/res_right/1480_5.png b/images/res_right/1480_5.png new file mode 100644 index 0000000..e747485 Binary files /dev/null and b/images/res_right/1480_5.png differ diff --git a/images/res_right/1490_0.png b/images/res_right/1490_0.png new file mode 100644 index 0000000..5064870 Binary files /dev/null and b/images/res_right/1490_0.png differ diff --git a/images/res_right/1490_1.png b/images/res_right/1490_1.png new file mode 100644 index 0000000..e9ccc27 Binary files /dev/null and b/images/res_right/1490_1.png differ diff --git a/images/res_right/1490_2.png b/images/res_right/1490_2.png new file mode 100644 index 0000000..f19a9ea Binary files /dev/null and b/images/res_right/1490_2.png differ diff --git a/images/res_right/1490_3.png b/images/res_right/1490_3.png new file mode 100644 index 0000000..d31fb84 Binary files /dev/null and b/images/res_right/1490_3.png differ diff --git a/images/res_right/1490_4.png b/images/res_right/1490_4.png new file mode 100644 index 0000000..6ae329b Binary files /dev/null and b/images/res_right/1490_4.png differ diff --git a/images/res_right/1490_5.png b/images/res_right/1490_5.png new file mode 100644 index 0000000..08c3864 Binary files /dev/null and b/images/res_right/1490_5.png differ diff --git a/images/res_right/15450_0.png b/images/res_right/15450_0.png new file mode 100644 index 0000000..4b492d6 Binary files /dev/null and b/images/res_right/15450_0.png differ diff --git a/images/res_right/15450_1.png b/images/res_right/15450_1.png new file mode 100644 index 0000000..699fcc7 Binary files /dev/null and b/images/res_right/15450_1.png differ diff --git a/images/res_right/15450_2.png b/images/res_right/15450_2.png new file mode 100644 index 0000000..89b33ba Binary files /dev/null and b/images/res_right/15450_2.png differ diff --git a/images/res_right/15450_3.png b/images/res_right/15450_3.png new file mode 100644 index 0000000..7972c0f Binary files /dev/null and b/images/res_right/15450_3.png differ diff --git a/images/res_right/15450_4.png b/images/res_right/15450_4.png new file mode 100644 index 0000000..80b76ff Binary files /dev/null and b/images/res_right/15450_4.png differ diff --git a/images/res_right/15450_5.png b/images/res_right/15450_5.png new file mode 100644 index 0000000..fa36180 Binary files /dev/null and b/images/res_right/15450_5.png differ diff --git a/images/res_right/15780_0.png b/images/res_right/15780_0.png new file mode 100644 index 0000000..3dba851 Binary files /dev/null and b/images/res_right/15780_0.png differ diff --git a/images/res_right/15780_1.png b/images/res_right/15780_1.png new file mode 100644 index 0000000..189bbd4 Binary files /dev/null and b/images/res_right/15780_1.png differ diff --git a/images/res_right/15780_2.png b/images/res_right/15780_2.png new file mode 100644 index 0000000..97e9de8 Binary files /dev/null and b/images/res_right/15780_2.png differ diff --git a/images/res_right/15780_3.png b/images/res_right/15780_3.png new file mode 100644 index 0000000..73e0a7d Binary files /dev/null and b/images/res_right/15780_3.png differ diff --git a/images/res_right/15780_4.png b/images/res_right/15780_4.png new file mode 100644 index 0000000..ef812ab Binary files /dev/null and b/images/res_right/15780_4.png differ diff --git a/images/res_right/15780_5.png b/images/res_right/15780_5.png new file mode 100644 index 0000000..c74fe41 Binary files /dev/null and b/images/res_right/15780_5.png differ diff --git a/images/resul2t.png b/images/resul2t.png new file mode 100644 index 0000000..45b2426 Binary files /dev/null and b/images/resul2t.png differ diff --git a/images/result.png b/images/result.png new file mode 100644 index 0000000..a2be7d0 Binary files /dev/null and b/images/result.png differ diff --git a/images/rotate.sh b/images/rotate.sh new file mode 100755 index 0000000..feb95fd --- /dev/null +++ b/images/rotate.sh @@ -0,0 +1,11 @@ +d=$1 +echo "roate from "$d +for szFile in $d/*.png +do + dd=${d}_right + echo "save to "$dd + if [ ! -d $dd ];then + mkdir $dd + fi + convert "$szFile" -rotate 90 $dd/"$(basename "$szFile")" ; +done diff --git a/images/run.sh b/images/run.sh new file mode 100755 index 0000000..7a2e298 --- /dev/null +++ b/images/run.sh @@ -0,0 +1,4 @@ +rm -r res_right +./rotate.sh res +python3 grid_res.py res_right + diff --git a/init_dependencies.sh b/init_dependencies.sh new file mode 100644 index 0000000..71e8d7f --- /dev/null +++ b/init_dependencies.sh @@ -0,0 +1,8 @@ +echo 'dlib should build manually.' + +sudo apt-get install ffmpeg x264 libx264-dev +sudo apt-get install xvfb + +sudo pip3 install pyvirtualdisplay +sudo pip3 install moviepy +sudo pip3 install face_recognition diff --git a/models/.gitignore b/models/.gitignore new file mode 100644 index 0000000..26310a2 --- /dev/null +++ b/models/.gitignore @@ -0,0 +1,2 @@ +swapnet_128.py +swapnet_256.py \ No newline at end of file diff --git a/models/padding_same_conv.py b/models/padding_same_conv.py new file mode 100644 index 0000000..f586ff8 --- /dev/null +++ b/models/padding_same_conv.py @@ -0,0 +1,126 @@ +# modify con2d function to use same padding +# code referd to @famssa in 'https://github.com/pytorch/pytorch/issues/3867' +# and tensorflow source code + +import torch.utils.data +from torch.nn import functional as F + +import math +import torch +from torch.nn.parameter import Parameter +from torch.nn.functional import pad +from torch.nn.modules import Module +from torch.nn.modules.utils import _single, _pair, _triple + + +class _ConvNd(Module): + + def __init__(self, in_channels, out_channels, kernel_size, stride, + padding, dilation, transposed, output_padding, groups, bias): + super(_ConvNd, self).__init__() + if in_channels % groups != 0: + raise ValueError('in_channels must be divisible by groups') + if out_channels % groups != 0: + raise ValueError('out_channels must be divisible by groups') + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.transposed = transposed + self.output_padding = output_padding + self.groups = groups + if transposed: + self.weight = Parameter(torch.Tensor( + in_channels, out_channels // groups, *kernel_size)) + else: + self.weight = Parameter(torch.Tensor( + out_channels, in_channels // groups, *kernel_size)) + if bias: + self.bias = Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + def reset_parameters(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def __repr__(self): + s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}' + ', stride={stride}') + if self.padding != (0,) * len(self.padding): + s += ', padding={padding}' + if self.dilation != (1,) * len(self.dilation): + s += ', dilation={dilation}' + if self.output_padding != (0,) * len(self.output_padding): + s += ', output_padding={output_padding}' + if self.groups != 1: + s += ', groups={groups}' + if self.bias is None: + s += ', bias=False' + s += ')' + return s.format(name=self.__class__.__name__, **self.__dict__) + + +class Conv2d(_ConvNd): + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + super(Conv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + False, _pair(0), groups, bias) + + def forward(self, input): + return conv2d_same_padding(input, self.weight, self.bias, self.stride, + self.padding, self.dilation, self.groups) + + +class Conv2dPaddingSame(_ConvNd): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + super(Conv2dPaddingSame, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + False, _pair(0), groups, bias) + + def forward(self, input): + return conv2d_same_padding(input, self.weight, self.bias, self.stride, + self.padding, self.dilation, self.groups) + + +# custom con2d, because pytorch don't have "padding='same'" option. +def conv2d_same_padding(input, weight, bias=None, stride=1, padding=1, dilation=1, groups=1): + + input_rows = input.size(2) + filter_rows = weight.size(2) + effective_filter_size_rows = (filter_rows - 1) * dilation[0] + 1 + out_rows = (input_rows + stride[0] - 1) // stride[0] + padding_needed = max(0, (out_rows - 1) * stride[0] + effective_filter_size_rows - + input_rows) + padding_rows = max(0, (out_rows - 1) * stride[0] + + (filter_rows - 1) * dilation[0] + 1 - input_rows) + rows_odd = (padding_rows % 2 != 0) + padding_cols = max(0, (out_rows - 1) * stride[0] + + (filter_rows - 1) * dilation[0] + 1 - input_rows) + cols_odd = (padding_rows % 2 != 0) + + if rows_odd or cols_odd: + input = pad(input, [0, int(cols_odd), 0, int(rows_odd)]) + + return F.conv2d(input, weight, bias, stride, + padding=(padding_rows // 2, padding_cols // 2), + dilation=dilation, groups=groups) diff --git a/models/swapnet.py b/models/swapnet.py new file mode 100644 index 0000000..5c63bc2 --- /dev/null +++ b/models/swapnet.py @@ -0,0 +1,105 @@ +""" +Copyright StrangeAI Authors @2019 + +""" +import torch +import torch.utils.data +from torch import nn, optim +from .padding_same_conv import Conv2d +from alfred.dl.torch.common import device + + +def toTensor(img): + img = torch.from_numpy(img.transpose((0, 3, 1, 2))).to(device) + return img + + +def var_to_np(img_var): + return img_var.data.cpu().numpy() + + +class _ConvLayer(nn.Sequential): + def __init__(self, input_features, output_features): + super(_ConvLayer, self).__init__() + self.add_module('conv2', Conv2d(input_features, output_features, + kernel_size=5, stride=2)) + self.add_module('leakyrelu', nn.LeakyReLU(0.1, inplace=True)) + + +class _UpScale(nn.Sequential): + def __init__(self, input_features, output_features): + super(_UpScale, self).__init__() + self.add_module('conv2_', Conv2d(input_features, output_features * 4, + kernel_size=3)) + self.add_module('leakyrelu', nn.LeakyReLU(0.1, inplace=True)) + self.add_module('pixelshuffler', _PixelShuffler()) + + +class Flatten(nn.Module): + + def forward(self, input): + output = input.view(input.size(0), -1) + return output + + +class Reshape(nn.Module): + + def forward(self, input): + output = input.view(-1, 1024, 4, 4) # channel * 4 * 4 + + return output + + +class _PixelShuffler(nn.Module): + def forward(self, input): + batch_size, c, h, w = input.size() + rh, rw = (2, 2) + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + out = input.view(batch_size, rh, rw, oc, h, w) + out = out.permute(0, 3, 4, 1, 5, 2).contiguous() + out = out.view(batch_size, oc, oh, ow) # channel first + + return out + + +class SwapNet(nn.Module): + def __init__(self): + super(SwapNet, self).__init__() + + self.encoder = nn.Sequential( + _ConvLayer(3, 128), + _ConvLayer(128, 256), + _ConvLayer(256, 512), + _ConvLayer(512, 1024), + Flatten(), + nn.Linear(1024 * 4 * 4, 1024), + nn.Linear(1024, 1024 * 4 * 4), + Reshape(), + _UpScale(1024, 512), + ) + + self.decoder_A = nn.Sequential( + _UpScale(512, 256), + _UpScale(256, 128), + _UpScale(128, 64), + Conv2d(64, 3, kernel_size=5, padding=1), + nn.Sigmoid(), + ) + + self.decoder_B = nn.Sequential( + _UpScale(512, 256), + _UpScale(256, 128), + _UpScale(128, 64), + Conv2d(64, 3, kernel_size=5, padding=1), + nn.Sigmoid(), + ) + + def forward(self, x, select='A'): + if select == 'A': + out = self.encoder(x) + out = self.decoder_A(out) + else: + out = self.encoder(x) + out = self.decoder_B(out) + return out diff --git a/predict_64x64.py b/predict_64x64.py new file mode 100644 index 0000000..5687ac1 --- /dev/null +++ b/predict_64x64.py @@ -0,0 +1,71 @@ +""" +convert a face to another person + +""" +from models.swapnet import SwapNet +import torch +from alfred.dl.torch.common import device +import cv2 +import numpy as np +from dataset.training_data import random_warp +from utils.umeyama import umeyama + +mean_value = np.array([0.03321508, 0.05035182, 0.02038819]) + + +def process_img(ori_img): + img = cv2.resize(ori_img, (256, 256)) + range_ = np.linspace( 128-80, 128+80, 5 ) + mapx = np.broadcast_to( range_, (5,5) ) + mapy = mapx.T + + # warp image like in the training + mapx = mapx + np.random.normal( size=(5,5), scale=5 ) + mapy = mapy + np.random.normal( size=(5,5), scale=5 ) + interp_mapx = cv2.resize(mapx, (80, 80))[8:72, 8:72].astype('float32') + interp_mapy = cv2.resize(mapy, (80, 80))[8:72, 8:72].astype('float32') + warped_image = cv2.remap(img, interp_mapx, interp_mapy, cv2.INTER_LINEAR) + return warped_image + + +def load_img(): + a = 'images/34600_test_A_target.png' + img = cv2.imread(a) / 255. + return img + + +def predict(): + # convert trump to cage + # img_f = 'data/trump/51834796.jpg' + # img_f = 'data/trump/494045244.jpg' + # NOTE: using face extracted (not original image) + img_f = 'data/trump/464669134_face_0.png' + + ori_img = cv2.imread(img_f) + img = cv2.resize(ori_img, (64, 64)) / 255. + img = np.rot90(img) + # img = load_img() + in_img = np.array(img, dtype=np.float).transpose(2, 1, 0) + + # normalize img + in_img = torch.Tensor(in_img).to(device).unsqueeze(0) + model = SwapNet().to(device) + if torch.cuda.is_available(): + checkpoint = torch.load('checkpoint/faceswap_trump_cage_64x64.pth') + else: + checkpoint = torch.load('checkpoint/faceswap_trump_cage_64x64.pth', map_location={'cuda:0': 'cpu'}) + model.load_state_dict(checkpoint['state']) + model.eval() + print('model loaded.') + + out = model.forward(in_img, select='B') + out = np.clip(out.detach().cpu().numpy()[0]*255, 0, 255).astype('uint8').transpose(2, 1, 0) + + cv2.imshow('original image', ori_img) + cv2.imshow('network input image', img) + cv2.imshow('result image', np.rot90(out, axes=(1, 0))) + cv2.waitKey(0) + + +if __name__ == '__main__': + predict() diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..a3a09e1 --- /dev/null +++ b/readme.md @@ -0,0 +1,93 @@ +# High Resolution Face Swap + +**a face swap implementation with much more higher resolution result (128x128) **, this is a promoted and optimized *swap face application* based on GAN tech. our implementation did those changes based on original *deepfakes* implementation: + +- *deepfakes* only support 64x64 input, we make it **deeper** and can output 128x128 size; +- we proposed a new network called *SwapNet, SwapNet128*; +- we changed the pre-proccess step with input data (such as warp face), make it more clear; +- we make the dataset loader more efficient, load pair face data directly from 2 dir; +- we proposed a new **face outline replace** tech to get a much more combination result with + original image, their differences are like below image. + + +we will continuely update this repo, and make face swap much more intuitive and simple, anyone can build there own face changing model. Here are some result for 128x128 higher resolution face swap: + +

+ +

+ + + +We have train on trump-cage and fanbingbing-galgadot convert model. The result is not fully trained yet, but it shows a promising result, the face in most situation can works perfect! + +final result on face swap directly from original big image: + +

+ +

+ +

+ +

+ + + +As you can see above, we can achieve **high resolution** and seamlessly combination with face transformation. final result on face swap directly from video (to be added soon): + + + +## Dependencies + +our face swap implementation need *alfred-py* which can installed with: + +``` +sudo pip3 install alfred-py +``` + +## Pretrained Model + +We only provided pretrained model for 128x128 model, and it was hosted by StrangeAI (http://codes.strangeai.pro). For train from scratch, you can download the trump cage dataset from: https://anonfile.com/p7w3m0d5be/face-swap.zip . +For those already StrangeAI VIP membership users, you can download the whole codes and models from http://strangeai.pro . + + +## Train & Predict + +the run, simply using: + +``` +python3 predict.py +# train fanbingbing-galgadot face swap +python3 train_trump_cage_64x64.py +python3 train_fbb_gal_128x128.py +``` + +this will predict on a trump face and convert it into cage face. + + +## More Info + +if you wanna be invited to our computer vision discussion wechat group, you can add me via wechat or found us at: http://strangeai.pro which is **the biggest AI codes sharing platform in China**. + + + +## Note About FaceSwap + +We have did some failure attempt and experiments lots of combination to produce a good result, here are some notes you need to know to build a face swap tech: + +- Size is everything: we have try maximum 256x256 as input size, but it fails to swap face style between 2 faces; +- Warp preprocess does not really matter, we have also trying to remove warp preprocess step and directly using target images for train, it can also success train a face swap model, but for dataset augumentation, better to warp it and make some random transform; +- loss is not really matter. Just kick of train, and train about 15000 epochs, and you can get good result; +- For data preparing, better extract faces first using dlib or [alfred](http://github.com/jinfagang/alfred) + + + + +## Faceswap Datasets + +Actually, we gathered a lot of faces datasets. beside the default one, you may also access them via Baidu cloud disk. + + + +## Copyright + +*FaceSwap* is a project opensourced under MIT license, all right reserved by StrangeAI authors. website: http://strangeai.pro \ No newline at end of file diff --git a/result.png b/result.png new file mode 100644 index 0000000..45b2426 Binary files /dev/null and b/result.png differ diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..6f03afb --- /dev/null +++ b/tests.py @@ -0,0 +1,52 @@ +from models.swapnet import SwapNet +from models.swapnet_128 import SwapNet128 +from utils.model_summary import summary +from alfred.dl.torch.common import device +from dataset.face_pair_dataset import random_warp_128 +from dataset.training_data import random_transform, random_transform_args +from PIL import Image +import cv2 +import numpy as np +import torch +from utils.umeyama import umeyama + +# model = SwapNet().to(device) +# summary(model, input_size=(3, 64, 64)) + +# def random_warp(image): +# assert image.shape == (256, 256, 3) +# range_ = np.linspace(128 - 120, 128 + 120, 5) +# mapx = np.broadcast_to(range_, (5, 5)) +# mapy = mapx.T +# mapx = mapx + np.random.normal(size=(5, 5), scale=5) +# mapy = mapy + np.random.normal(size=(5, 5), scale=5) + +# interp_mapx = cv2.resize(mapx, (80, 80))[8:72, 8:72].astype('float32') +# interp_mapy = cv2.resize(mapy, (80, 80))[8:72, 8:72].astype('float32') + +# # just crop the image, remove the top left bottom right 8 pixels (in order to get the pure face) +# warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) + +# src_points = np.stack([mapx.ravel(), mapy.ravel()], axis=-1) +# dst_points = np.mgrid[0:65:16, 0:65:16].T.reshape(-1, 2) +# mat = umeyama(src_points, dst_points, True)[0:2] +# target_image = cv2.warpAffine(image, mat, (64, 64)) +# return warped_image, target_image + +# model = SwapNet128().to(device) +# summary(model, input_size=(3, 128, 128)) + +# a = Image.open('data/trump_cage/cage/2455911_face_0.png') +# a = a.resize((256, 256), Image.ANTIALIAS) +# a = random_transform(np.array(a), **random_transform_args) +# warped_img, target_img = random_warp_128(np.array(a)) + +# t = torch.from_numpy(target_img.transpose(2, 0, 1) / 255.).to(device) +# b = t.detach().cpu().numpy().transpose((2, 1, 0))*255 +# print(b.shape) + +# cv2.imshow('rr', np.array(a)) +# cv2.imshow('warped image', np.array(warped_img)) +# cv2.imshow('target image', np.array(target_img)) +# cv2.imshow('bbbbbbbbb', b) +# cv2.waitKey(0) \ No newline at end of file diff --git a/tools/download_dlib_landmarkmodel.sh b/tools/download_dlib_landmarkmodel.sh new file mode 100644 index 0000000..f7f65ec --- /dev/null +++ b/tools/download_dlib_landmarkmodel.sh @@ -0,0 +1,3 @@ +cd ~ +wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 +bzip2 -d shape_predictor_68_face_landmarks.dat.bz2 diff --git a/tools/download_trump_cage_dataset.sh b/tools/download_trump_cage_dataset.sh new file mode 100644 index 0000000..676ffa1 --- /dev/null +++ b/tools/download_trump_cage_dataset.sh @@ -0,0 +1 @@ +wget https://anonfile.com/p7w3m0d5be/face-swap.zip \ No newline at end of file diff --git a/tools/extract_faces.sh b/tools/extract_faces.sh new file mode 100644 index 0000000..8dfefa2 --- /dev/null +++ b/tools/extract_faces.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# this script wille extract all faces from a directory of images +# it using alfred-py and dlib to do this +# the size of faces are does not matter, it will be resized according to faceswap networks + +sudo pip3 install alfred-py diff --git a/train_fbb_gal_128x128.py b/train_fbb_gal_128x128.py new file mode 100644 index 0000000..1e7c794 --- /dev/null +++ b/train_fbb_gal_128x128.py @@ -0,0 +1,158 @@ +""" +Copyright StrangeAI Authors @2019 + + +As the network without linear connect layer +the feature are not compressed, so the encoder are weak +it consist to many informations, and decoder can not using the abstract +information to construct a new image + +""" +from __future__ import print_function +import argparse +import os +import cv2 +import numpy as np +import torch +import torch.utils.data +from torch import nn, optim +from torch.autograd import Variable +from torch.nn import functional as F +import torch.backends.cudnn as cudnn +from utils.util import get_image_paths, load_images, stack_images +from dataset.training_data import get_training_data +from alfred.dl.torch.common import device +from shutil import copyfile +try: + from models.swapnet_128 import SwapNet128, toTensor, var_to_np +except Exception: + print('can not import swapnet128, if you need high resolution face swap, ' + 'you can download from http://luoli.ai (you can afford a VIP membership to get all other codes)') +from loguru import logger +from dataset.face_pair_dataset import FacePairDataset128x128 +from torchvision import transforms +from torch.utils.data import DataLoader +from alfred.utils.log import init_logger + +init_logger() + +batch_size = 32 +epochs = 100000 +save_per_epoch = 300 + +a_dir = './data/galgadot_fbb/fanbingbing_faces' +b_dir = './data/galgadot_fbb/galgadot_faces' +# we start to train on bigger size +dataset_name = 'galgadot_fbb' +target_size = 128 +log_img_dir = './checkpoint/results_{}_{}x{}'.format(dataset_name, target_size, target_size) +log_model_dir = './checkpoint/{}_{}x{}'.format(dataset_name, + target_size, target_size) +check_point_save_path = os.path.join( + log_model_dir, 'faceswap_{}_{}x{}.pth'.format(dataset_name, target_size, target_size)) + + +def main(): + os.makedirs(log_img_dir, exist_ok=True) + os.makedirs(log_model_dir, exist_ok=True) + logger.info("loading datasets") + + transform = transforms.Compose([ + # transforms.Resize((target_size, target_size)), + transforms.RandomHorizontalFlip(), + # transforms.RandomVerticalFlip(), + # transforms.ToTensor(), + ]) + ds = FacePairDataset128x128(a_dir=a_dir, b_dir=b_dir, + target_size=target_size, transform=transform) + dataloader = DataLoader(ds, batch_size, shuffle=True) + + model = SwapNet128() + model.to(device) + start_epoch = 0 + logger.info('try resume from checkpoint') + try: + if torch.cuda.is_available(): + checkpoint = torch.load(check_point_save_path) + else: + checkpoint = torch.load( + check_point_save_path, map_location={'cuda:0': 'cpu'}) + model.load_state_dict(checkpoint['state']) + start_epoch = checkpoint['epoch'] + logger.info('checkpoint loaded.') + except FileNotFoundError: + print('Can\'t found {}'.format(check_point_save_path)) + + criterion = nn.L1Loss() + optimizer_1 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_a.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + optimizer_2 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_b.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + + logger.info('Start training, from epoch {} '.format(start_epoch)) + try: + for epoch in range(start_epoch, epochs): + iter = 0 + for data in dataloader: + iter += 1 + img_a_target, img_a_input, img_b_target, img_b_input = data + img_a_target = img_a_target.to(device) + img_a_input = img_a_input.to(device) + img_b_target = img_b_target.to(device) + img_b_input = img_b_input.to(device) + # print(img_a.size()) + # print(img_b.size()) + + optimizer_1.zero_grad() + optimizer_2.zero_grad() + predict_a = model(img_a_input, to='a') + predict_b = model(img_b_input, to='b') + loss1 = criterion(predict_a, img_a_target) + loss2 = criterion(predict_b, img_b_target) + loss1.backward() + loss2.backward() + optimizer_1.step() + optimizer_2.step() + logger.info('Epoch: {}, iter: {}, lossA: {}, lossB: {}'.format( + epoch, iter, loss1.item(), loss2.item())) + if epoch % save_per_epoch == 0 and epoch != 0: + logger.info('Saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname( + check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch)), + check_point_save_path) + if epoch % 10 == 0 and epoch != 0 and iter == 1: + img_a_original = np.array(img_a_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + img_b_original = np.array(img_b_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + a_predict_a = np.array(predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_b = np.array(predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + a_predict_b = model(img_a_input, to='b') + b_predict_a = model(img_b_input, to='a') + a_predict_b = np.array(a_predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_a = np.array(b_predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + cv2.imwrite(os.path.join(log_img_dir, '{}_0.png'.format(epoch)), cv2.cvtColor(img_a_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_3.png'.format(epoch)), cv2.cvtColor(img_b_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_1.png'.format(epoch)), cv2.cvtColor(a_predict_a, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_4.png'.format(epoch)), cv2.cvtColor(b_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_2.png'.format(epoch)), cv2.cvtColor(a_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_5.png'.format(epoch)), cv2.cvtColor(b_predict_a, cv2.COLOR_BGR2RGB)) + logger.info('Record a result') + except KeyboardInterrupt: + logger.info('try saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch)), + check_point_save_path) + + +if __name__ == "__main__": + main() diff --git a/train_fbb_gal_64x64.py b/train_fbb_gal_64x64.py new file mode 100644 index 0000000..a49e246 --- /dev/null +++ b/train_fbb_gal_64x64.py @@ -0,0 +1,157 @@ +""" +Copyright StrangeAI Authors @2019 + +original forked from deepfakes repo +edit and promoted by StrangeAI authors + +""" + +from __future__ import print_function +import argparse +import os + +import cv2 +import numpy as np +import torch + +import torch.utils.data +from torch import nn, optim +from torch.autograd import Variable +from torch.nn import functional as F +import torch.backends.cudnn as cudnn +from torch.utils.data import DataLoader +from models.swapnet import SwapNet, toTensor, var_to_np +from utils.util import get_image_paths, load_images, stack_images +from dataset.training_data import get_training_data +from alfred.dl.torch.common import device +from shutil import copyfile +from loguru import logger +from dataset.face_pair_dataset import FacePairDataset, FacePairDataset64x64 +from torchvision import transforms +import sys + +logger.remove() # Remove the pre-configured handler +logger.start(sys.stderr, format="{level} {time:MM-DD HH:mm:ss} {file}:{line} - {message}") + +batch_size = 64 +epochs = 100000 +save_per_epoch = 300 + +a_dir = './data/galgadot_fbb/fanbingbing_faces' +b_dir = './data/galgadot_fbb/galgadot_faces' +# we start to train on bigger size +target_size = 64 +dataset_name = 'galgadot_fbb' +log_img_dir = './checkpoint/results_{}_{}x{}'.format(dataset_name, target_size, target_size) +log_model_dir = './checkpoint/{}_{}x{}'.format(dataset_name, + target_size, target_size) +check_point_save_path = os.path.join( + log_model_dir, 'faceswap_{}_{}x{}.pth'.format(dataset_name, target_size, target_size)) + + +def main(): + os.makedirs(log_img_dir, exist_ok=True) + os.makedirs(log_model_dir, exist_ok=True) + + transform = transforms.Compose([ + # transforms.Resize((target_size, target_size)), + transforms.RandomHorizontalFlip(), + # transforms.RandomVerticalFlip(), + # transforms.ToTensor(), + ]) + ds = FacePairDataset64x64(a_dir=a_dir, b_dir=b_dir, + target_size=target_size, transform=transform) + dataloader = DataLoader(ds, batch_size, shuffle=True) + + model = SwapNet() + model.to(device) + start_epoch = 0 + logger.info('try resume from checkpoint') + if os.path.isdir('checkpoint'): + try: + if torch.cuda.is_available(): + checkpoint = torch.load(check_point_save_path) + else: + checkpoint = torch.load( + check_point_save_path, map_location={'cuda:0': 'cpu'}) + model.load_state_dict(checkpoint['state']) + start_epoch = checkpoint['epoch'] + logger.info('checkpoint loaded.') + except FileNotFoundError: + print('Can\'t found faceswap_trump_cage.pth') + + criterion = nn.L1Loss() + optimizer_1 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_A.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + optimizer_2 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_B.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + + logger.info('Start training, from epoch {} '.format(start_epoch)) + try: + for epoch in range(start_epoch, epochs): + iter = 0 + for data in dataloader: + iter += 1 + img_a_target, img_a_input, img_b_target, img_b_input = data + img_a_target = img_a_target.to(device) + img_a_input = img_a_input.to(device) + img_b_target = img_b_target.to(device) + img_b_input = img_b_input.to(device) + # print(img_a.size()) + # print(img_b.size()) + + optimizer_1.zero_grad() + optimizer_2.zero_grad() + predict_a = model(img_a_input, select='A') + predict_b = model(img_b_input, select='B') + loss1 = criterion(predict_a, img_a_target) + loss2 = criterion(predict_b, img_b_target) + loss1.backward() + loss2.backward() + optimizer_1.step() + optimizer_2.step() + logger.info('Epoch: {}, iter: {}, lossA: {}, lossB: {}'.format( + epoch, iter, loss1.item(), loss2.item())) + if epoch % save_per_epoch == 0 and epoch != 0: + logger.info('Saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname( + check_point_save_path), 'faceswap_trump_cage_128x128_{}.pth'.format(epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_trump_cage_128x128_{}.pth'.format(epoch)), + check_point_save_path) + if epoch % 10 == 0 and epoch != 0 and iter == 1: + img_a_original = np.array(img_a_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + img_b_original = np.array(img_b_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + a_predict_a = np.array(predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_b = np.array(predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + a_predict_b = model(img_a_input, select='B') + b_predict_a = model(img_b_input, select='A') + a_predict_b = np.array(a_predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_a = np.array(b_predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + cv2.imwrite(os.path.join(log_img_dir, '{}_0.png'.format(epoch)), cv2.cvtColor(img_a_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_3.png'.format(epoch)), cv2.cvtColor(img_b_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_1.png'.format(epoch)), cv2.cvtColor(a_predict_a, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_4.png'.format(epoch)), cv2.cvtColor(b_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_2.png'.format(epoch)), cv2.cvtColor(a_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_5.png'.format(epoch)), cv2.cvtColor(b_predict_a, cv2.COLOR_BGR2RGB)) + logger.info('Record a result') + except KeyboardInterrupt: + logger.warning('try saving models...do not interrupt') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname( + check_point_save_path), 'faceswap_trump_cage_256x256_{}.pth'.format(epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_trump_cage_256x256_{}.pth'.format(epoch)), + check_point_save_path) + + + +if __name__ == "__main__": + main() diff --git a/train_trump_cage_128x128.py b/train_trump_cage_128x128.py new file mode 100644 index 0000000..84555e5 --- /dev/null +++ b/train_trump_cage_128x128.py @@ -0,0 +1,158 @@ +""" +Copyright StrangeAI Authors @2019 + + +As the network without linear connect layer +the feature are not compressed, so the encoder are weak +it consist to many informations, and decoder can not using the abstract +information to construct a new image + +""" +from __future__ import print_function +import argparse +import os +import cv2 +import numpy as np +import torch +import torch.utils.data +from torch import nn, optim +from torch.autograd import Variable +from torch.nn import functional as F +import torch.backends.cudnn as cudnn +from utils.util import get_image_paths, load_images, stack_images +from dataset.training_data import get_training_data +from alfred.dl.torch.common import device +from shutil import copyfile +try: + from models.swapnet_128 import SwapNet128, toTensor, var_to_np +except Exception: + print('can not import swapnet128, if you need high resolution face swap, ' + 'you can download from http://luoli.ai (you can afford a VIP membership to get all other codes)') +from loguru import logger +from dataset.face_pair_dataset import FacePairDataset128x128 +from torchvision import transforms +from torch.utils.data import DataLoader +from alfred.utils.log import init_logger + +init_logger() + +batch_size = 32 +epochs = 100000 +save_per_epoch = 300 + +a_dir = './data/trump_cage/trump' +b_dir = './data/trump_cage/cage' +dataset_name = 'trump_cage' +# we start to train on bigger size +target_size = 128 +log_img_dir = './checkpoint/results_{}_{}x{}'.format(dataset_name, target_size, target_size) +log_model_dir = './checkpoint/{}_{}x{}'.format(dataset_name, + target_size, target_size) +check_point_save_path = os.path.join( + log_model_dir, 'faceswap_{}_{}x{}.pth'.format(dataset_name, target_size, target_size)) + + +def main(): + os.makedirs(log_img_dir, exist_ok=True) + os.makedirs(log_model_dir, exist_ok=True) + logger.info("loading datasets") + + transform = transforms.Compose([ + # transforms.Resize((target_size, target_size)), + transforms.RandomHorizontalFlip(), + # transforms.RandomVerticalFlip(), + # transforms.ToTensor(), + ]) + ds = FacePairDataset128x128(a_dir=a_dir, b_dir=b_dir, + target_size=target_size, transform=transform) + dataloader = DataLoader(ds, batch_size, shuffle=True) + + model = SwapNet128() + model.to(device) + start_epoch = 0 + logger.info('try resume from checkpoint') + try: + if torch.cuda.is_available(): + checkpoint = torch.load(check_point_save_path) + else: + checkpoint = torch.load( + check_point_save_path, map_location={'cuda:0': 'cpu'}) + model.load_state_dict(checkpoint['state']) + start_epoch = checkpoint['epoch'] + logger.info('checkpoint loaded.') + except FileNotFoundError: + print('Can\'t found {}'.format(check_point_save_path)) + + criterion = nn.L1Loss() + optimizer_1 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_a.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + optimizer_2 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_b.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + + logger.info('Start training, from epoch {} '.format(start_epoch)) + try: + for epoch in range(start_epoch, epochs): + iter = 0 + for data in dataloader: + iter += 1 + img_a_target, img_a_input, img_b_target, img_b_input = data + img_a_target = img_a_target.to(device) + img_a_input = img_a_input.to(device) + img_b_target = img_b_target.to(device) + img_b_input = img_b_input.to(device) + # print(img_a.size()) + # print(img_b.size()) + + optimizer_1.zero_grad() + optimizer_2.zero_grad() + predict_a = model(img_a_input, to='a') + predict_b = model(img_b_input, to='b') + loss1 = criterion(predict_a, img_a_target) + loss2 = criterion(predict_b, img_b_target) + loss1.backward() + loss2.backward() + optimizer_1.step() + optimizer_2.step() + logger.info('Epoch: {}, iter: {}, lossA: {}, lossB: {}'.format( + epoch, iter, loss1.item(), loss2.item())) + if epoch % save_per_epoch == 0 and epoch != 0 and iter == 1: + logger.info('Saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname( + check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch)), + check_point_save_path) + if epoch % 10 == 0 and epoch != 0 and iter == 1: + img_a_original = np.array(img_a_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + img_b_original = np.array(img_b_target.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + a_predict_a = np.array(predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_b = np.array(predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + a_predict_b = model(img_a_input, to='b') + b_predict_a = model(img_b_input, to='a') + a_predict_b = np.array(a_predict_b.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + b_predict_a = np.array(b_predict_a.detach().cpu().numpy()[0].transpose(2, 1, 0)*255, dtype=np.uint8) + + cv2.imwrite(os.path.join(log_img_dir, '{}_0.png'.format(epoch)), cv2.cvtColor(img_a_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_3.png'.format(epoch)), cv2.cvtColor(img_b_original, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_1.png'.format(epoch)), cv2.cvtColor(a_predict_a, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_4.png'.format(epoch)), cv2.cvtColor(b_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_2.png'.format(epoch)), cv2.cvtColor(a_predict_b, cv2.COLOR_BGR2RGB)) + cv2.imwrite(os.path.join(log_img_dir, '{}_5.png'.format(epoch)), cv2.cvtColor(b_predict_a, cv2.COLOR_BGR2RGB)) + logger.info('Record a result') + except KeyboardInterrupt: + logger.info('try saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_128x128_{}.pth'.format(dataset_name, epoch)), + check_point_save_path) + + +if __name__ == "__main__": + main() diff --git a/train_trump_cage_64x64.py b/train_trump_cage_64x64.py new file mode 100644 index 0000000..3d7ab8e --- /dev/null +++ b/train_trump_cage_64x64.py @@ -0,0 +1,144 @@ +""" +Copyright StrangeAI Authors @2019 + + +original forked from deepfakes repo + +edit and promoted by StrangeAI authors + +""" + +from __future__ import print_function +import argparse +import os + +import cv2 +import numpy as np +import torch + +import torch.utils.data +from torch import nn, optim +from torch.autograd import Variable +from torch.nn import functional as F +import torch.backends.cudnn as cudnn + +from models.swapnet import SwapNet, toTensor, var_to_np +from utils.util import get_image_paths, load_images, stack_images +from dataset.training_data import get_training_data +from alfred.dl.torch.common import device +from shutil import copyfile +from loguru import logger + +batch_size = 64 +epochs = 100000 +save_per_epoch = 300 + +a_dir = './data/trump_cage/trump' +b_dir = './data/trump_cage/cage' +# we start to train on bigger size +target_size = 64 +dataset_name = 'trump_cage' +log_img_dir = './checkpoint/results_{}_{}x{}'.format(dataset_name, target_size, target_size) +log_model_dir = './checkpoint/{}_{}x{}'.format(dataset_name, + target_size, target_size) +check_point_save_path = os.path.join( + log_model_dir, 'faceswap_{}_{}x{}.pth'.format(dataset_name, target_size, target_size)) + + +def main(): + os.makedirs(log_img_dir, exist_ok=True) + os.makedirs(log_model_dir, exist_ok=True) + + logger.info("loading datasets") + images_A = get_image_paths(a_dir) + images_B = get_image_paths(b_dir) + images_A = load_images(images_A) / 255.0 + images_B = load_images(images_B) / 255.0 + + print('mean value to remember: ', images_B.mean( + axis=(0, 1, 2)) - images_A.mean(axis=(0, 1, 2))) + images_A += images_B.mean(axis=(0, 1, 2)) - images_A.mean(axis=(0, 1, 2)) + + model = SwapNet() + model.to(device) + start_epoch = 0 + logger.info('try resume from checkpoint') + if os.path.isdir('checkpoint'): + try: + if torch.cuda.is_available(): + checkpoint = torch.load('./checkpoint/faceswap_trump_cage_64x64.pth') + else: + checkpoint = torch.load( + './checkpoint/faceswap_trump_cage_64x64.pth', map_location={'cuda:0': 'cpu'}) + model.load_state_dict(checkpoint['state']) + start_epoch = checkpoint['epoch'] + logger.info('checkpoint loaded.') + except FileNotFoundError: + print('Can\'t found faceswap_trump_cage.pth') + + criterion = nn.L1Loss() + optimizer_1 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_A.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + optimizer_2 = optim.Adam([{'params': model.encoder.parameters()}, + {'params': model.decoder_B.parameters()}], lr=5e-5, betas=(0.5, 0.999)) + + logger.info('Start training, from epoch {} '.format(start_epoch)) + + for epoch in range(start_epoch, epochs): + warped_A, target_A = get_training_data(images_A, batch_size) + # print(warped_A.shape) + # t_a = np.array(warped_A[0] * 255, dtype=np.uint8) + # print(t_a) + # print(t_a.shape) + # cv2.imshow('rr', t_a) + # cv2.waitKey(0) + # warped a and target a are not rotated, where did rotate? + + warped_B, target_B = get_training_data(images_B, batch_size) + warped_A, target_A = toTensor(warped_A), toTensor(target_A) + warped_B, target_B = toTensor(warped_B), toTensor(target_B) + # warp_a = np.array(warped_A[0].detach().cpu().numpy().transpose(2, 1, 0)*255, dtype=np.uint8) + # cv2.imshow('rr', warp_a) + # cv2.waitKey(0) + warped_A, target_A, warped_B, target_B = Variable(warped_A.float()), Variable(target_A.float()), \ + Variable(warped_B.float()), Variable(target_B.float()) + optimizer_1.zero_grad() + optimizer_2.zero_grad() + warped_A_out = model(warped_A, 'A') + warped_B_out = model(warped_B, 'B') + loss1 = criterion(warped_A_out, target_A) + loss2 = criterion(warped_B_out, target_B) + loss1.backward() + loss2.backward() + optimizer_1.step() + optimizer_2.step() + logger.info('epoch: {}, lossA: {}, lossB: {}'.format(epoch, loss1.item(), loss2.item())) + if epoch % save_per_epoch == 0 and iter == 0: + logger.info('Saving models...') + state = { + 'state': model.state_dict(), + 'epoch': epoch + } + torch.save(state, os.path.join(os.path.dirname( + check_point_save_path), 'faceswap_{}_64x64_{}.pth'.format(dataset_name, epoch))) + copyfile(os.path.join(os.path.dirname(check_point_save_path), 'faceswap_{}_64x64_{}.pth'.format(dataset_name, epoch)), + check_point_save_path) + if epoch % 100 == 0: + test_A_ = warped_A[0:2] + a_predict_a = var_to_np(model(test_A_, 'A'))[0]*255 + # warped a out + # print(test_A_[0].detach().cpu().numpy().shape) + a_predict_b = var_to_np(model(test_A_, 'B'))[0]*255 + + warp_a = test_A_[0].detach().cpu().numpy()*255 + target_a = target_A[0].detach().cpu().numpy()*255 + + cv2.imwrite(os.path.join(log_img_dir, "{}_res_a_to_a.png".format(epoch)), np.array(a_predict_a.transpose(2, 1, 0)).astype('uint8')) + cv2.imwrite(os.path.join(log_img_dir, "{}_res_a_to_b.png".format(epoch)), np.array(a_predict_b.transpose(2, 1, 0)).astype('uint8')) + cv2.imwrite(os.path.join(log_img_dir, "{}_test_A_warped.png".format(epoch)), np.array(warp_a.transpose(2, 1, 0)).astype('uint8')) + cv2.imwrite(os.path.join(log_img_dir, "{}_test_A_target.png".format(epoch)), np.array(target_a.transpose(2, 1, 0)).astype('uint8')) + logger.info('Record a result') + + +if __name__ == "__main__": + main() diff --git a/utils/face_extractor.py b/utils/face_extractor.py new file mode 100644 index 0000000..4913262 --- /dev/null +++ b/utils/face_extractor.py @@ -0,0 +1,101 @@ +""" +This file using for extracting faces of all images + +""" +import glob +try: + import dlib +except ImportError: + print('You have not installed dlib, install from https://github.com/davisking/dlib') + print('see you later.') + exit(0) +import os +import cv2 +import numpy as np +from loguru import logger + + +class FaceExtractor(object): + + def __init__(self): + self.detector = dlib.get_frontal_face_detector() + # self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") + + self.predictor_path = os.path.expanduser('~/shape_predictor_68_face_landmarks.dat') + + def get_faces_list(self, img, landmark=False): + """ + get faces and locations + """ + assert isinstance(img, np.ndarray), 'img should be numpy array (cv2 frame)' + if landmark: + if os.path.exists(self.predictor_path): + predictor = dlib.shape_predictor(self.predictor_path) + else: + logger.error('can not call this method, you should download ' + 'dlib landmark model: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2') + exit(0) + dets = self.detector(img, 1) + all_faces = [] + locations = [] + landmarks = [] + for i, d in enumerate(dets): + # get the face crop + x = int(d.left()) + y = int(d.top()) + w = int(d.width()) + h = int(d.height()) + + face_patch = np.array(img)[y: y + h, x: x + w, 0:3] + + if landmark: + shape = predictor(img, d) + landmarks.append(shape) + locations.append([x, y, w, h]) + all_faces.append(face_patch) + if landmark: + return all_faces, locations, landmarks + else: + return all_faces, locations + + def get_faces(self, img_d): + """ + get all faces from img_d + :param img_d: + :return: + """ + + all_images = [] + for e in ['png', 'jpg', 'jpeg']: + all_images.extend(glob.glob(os.path.join(img_d, '*.{}'.format(e)))) + print('Found all {} images under {}'.format(len(all_images), img_d)) + + s_d = os.path.dirname(img_d) + "_faces" + if not os.path.exists(s_d): + os.makedirs(s_d) + for img_f in all_images: + img = cv2.imread(img_f, cv2.COLOR_BGR2RGB) + + dets = self.detector(img, 1) + print('=> get {} faces in {}'.format(len(dets), img_f)) + print('=> saving faces...') + for i, d in enumerate(dets): + save_face_f = os.path.join(s_d, os.path.basename(img_f).split('.')[0] + + '_face_{}.png'.format(i)) + + # get the face crop + x = int(d.left()) + y = int(d.top()) + w = int(d.width()) + h = int(d.height()) + + face_patch = np.array(img)[y: y + h, x: x + w, 0:3] + # print(face_patch.shape) + img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2) + + # cv2.imshow('tt', img) + # cv2.waitKey(0) + cv2.imwrite(save_face_f, face_patch) + print('Done!') + # cv2.waitKey(0) + diff --git a/utils/model_summary.py b/utils/model_summary.py new file mode 100644 index 0000000..ff20cff --- /dev/null +++ b/utils/model_summary.py @@ -0,0 +1,129 @@ +# ----------------------- +# +# Copyright Jin Fagang @2018 +# +# 1/25/19 +# torch_summary +# ----------------------- +""" +codes token from +https://github.com/sksq96/pytorch-summary + +I edit something here, credits belongs to author +""" +import torch +import torch.nn as nn +from torch.autograd import Variable + +from collections import OrderedDict +import numpy as np + + +def summary(model, input_size, batch_size=-1, device="cuda"): + def register_hook(module): + def hook(module, input, output): + class_name = str(module.__class__).split(".")[-1].split("'")[0] + module_idx = len(summary) + + m_key = "%s-%i" % (class_name, module_idx + 1) + summary[m_key] = OrderedDict() + summary[m_key]["input_shape"] = list(input[0].size()) + summary[m_key]["input_shape"][0] = batch_size + if isinstance(output, (list, tuple)): + summary[m_key]["output_shape"] = [ + [-1] + list(o.size())[1:] for o in output + ] + else: + summary[m_key]["output_shape"] = list(output.size()) + summary[m_key]["output_shape"][0] = batch_size + + params = 0 + if hasattr(module, "weight") and hasattr(module.weight, "size"): + params += torch.prod(torch.LongTensor(list(module.weight.size()))) + summary[m_key]["trainable"] = module.weight.requires_grad + if hasattr(module, "bias") and hasattr(module.bias, "size"): + params += torch.prod(torch.LongTensor(list(module.bias.size()))) + summary[m_key]["nb_params"] = params + + if ( + not isinstance(module, nn.Sequential) + and not isinstance(module, nn.ModuleList) + and not (module == model) + ): + hooks.append(module.register_forward_hook(hook)) + + device = device.lower() + assert device in [ + "cuda", + "cpu", + ], "Input device is not valid, please specify 'cuda' or 'cpu'" + + if device == "cuda" and torch.cuda.is_available(): + dtype = torch.cuda.FloatTensor + else: + dtype = torch.FloatTensor + + # multiple inputs to the network + if isinstance(input_size, tuple) and input_size[0] <= 3: + # batch_size of 2 for batchnorm + x = torch.rand(2, *input_size).type(dtype) + else: + print('Wrong! you should send input size specific without batch size, etc: (3, 64, 64), channel first.') + exit(0) + # create properties + summary = OrderedDict() + hooks = [] + + # register hook + model.apply(register_hook) + + # make a forward pass + try: + print('fake data input: ', x.size()) + model(x) + except Exception as e: + print('summary failed. error: {}'.format(e)) + print('make sure your called model.to(device) ') + exit(0) + + # remove these hooks + for h in hooks: + h.remove() + + print("----------------------------------------------------------------") + line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") + print(line_new) + print("================================================================") + total_params = 0 + total_output = 0 + trainable_params = 0 + for layer in summary: + # input_shape, output_shape, trainable, nb_params + line_new = "{:>20} {:>25} {:>15}".format( + layer, + str(summary[layer]["output_shape"]), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) + + # assume 4 bytes/number (float on cuda). + total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) + total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients + total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) + total_size = total_params_size + total_output_size + total_input_size + + print("================================================================") + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("----------------------------------------------------------------") + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("----------------------------------------------------------------") diff --git a/utils/umeyama.py b/utils/umeyama.py new file mode 100644 index 0000000..c31284e --- /dev/null +++ b/utils/umeyama.py @@ -0,0 +1,88 @@ +# # License (Modified BSD) # Copyright (C) 2011, the scikit-image team All rights reserved. # # Redistribution and +# use in source and binary forms, with or without modification, are permitted provided that the following conditions +# are met: # # Redistributions of source code must retain the above copyright notice, this list of conditions and the +# following disclaimer. # Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of skimage nor the names of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# umeyama function from scikit-image/skimage/transform/_geometric.py + +import numpy as np + + +def umeyama(src, dst, estimate_scale): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = np.dot(dst_demean.T, src_demean) / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = np.dot(U, V) + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + d[dim - 1] = s + else: + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) + else: + scale = 1.0 + + T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) + T[:dim, :dim] *= scale + + return T diff --git a/utils/util.py b/utils/util.py new file mode 100644 index 0000000..c3dcfb4 --- /dev/null +++ b/utils/util.py @@ -0,0 +1,40 @@ +import cv2 +import numpy +import os + + +def get_image_paths(directory): + # return [x.path for x in os.scandir(directory) if x.name.endswith(".jpg") or x.name.endswith(".png")] + return [x.path for x in os.scandir(directory) if x.name.endswith(".png")] + + +def load_images(image_paths, convert=None): + iter_all_images = (cv2.resize(cv2.imread(fn), (256, 256)) for fn in image_paths) + if convert: + iter_all_images = (convert(img) for img in iter_all_images) + for i, image in enumerate(iter_all_images): + if i == 0: + all_images = numpy.empty((len(image_paths),) + image.shape, dtype=image.dtype) + all_images[i] = image + return all_images + + +def get_transpose_axes(n): + if n % 2 == 0: + y_axes = list(range(1, n - 1, 2)) + x_axes = list(range(0, n - 1, 2)) + else: + y_axes = list(range(0, n - 1, 2)) + x_axes = list(range(1, n - 1, 2)) + return y_axes, x_axes, [n - 1] + + +def stack_images(images): + images_shape = numpy.array(images.shape) + new_axes = get_transpose_axes(len(images_shape)) + new_shape = [numpy.prod(images_shape[x]) for x in new_axes] + return numpy.transpose( + images, + axes=numpy.concatenate(new_axes) + ).reshape(new_shape) +