Files
disrupting-deepfakes/stargan/data_loader.py
Nataniel Ruiz Gutierrez 21970b730a All
2019-12-21 16:37:10 -05:00

102 lines
3.6 KiB
Python

from torch.utils import data
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from PIL import Image
import torch
import os
import random
import noise
import cv2
class CelebA(data.Dataset):
"""Dataset class for the CelebA dataset."""
def __init__(self, image_dir, attr_path, selected_attrs, transform, mode):
"""Initialize and preprocess the CelebA dataset."""
self.image_dir = image_dir
self.attr_path = attr_path
self.selected_attrs = selected_attrs
self.transform = transform
self.mode = mode
self.train_dataset = []
self.test_dataset = []
self.attr2idx = {}
self.idx2attr = {}
self.preprocess()
if mode == 'train':
self.num_images = len(self.train_dataset)
else:
self.num_images = len(self.test_dataset)
def preprocess(self):
"""Preprocess the CelebA attribute file."""
lines = [line.rstrip() for line in open(self.attr_path, 'r')]
all_attr_names = lines[1].split()
for i, attr_name in enumerate(all_attr_names):
self.attr2idx[attr_name] = i
self.idx2attr[i] = attr_name
lines = lines[2:]
random.seed(1234)
random.shuffle(lines)
for i, line in enumerate(lines):
split = line.split()
filename = split[0]
values = split[1:]
label = []
for attr_name in self.selected_attrs:
idx = self.attr2idx[attr_name]
label.append(values[idx] == '1')
if (i+1) < 2000:
self.test_dataset.append([filename, label])
else:
self.train_dataset.append([filename, label])
print('Finished preprocessing the CelebA dataset...')
def __getitem__(self, index):
"""Return one image and its corresponding attribute label."""
dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
filename, label = dataset[index]
image = Image.open(os.path.join(self.image_dir, filename))
# image = noise.noisy('s&p', image)
return self.transform(image), torch.FloatTensor(label)
# def __getitem__(self, index):
# """Return one image and its corresponding attribute label."""
# dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
# filename, label = dataset[index]
# image = Image.open(os.path.join(self.image_dir, filename))
# return self.transform(image), torch.FloatTensor(label)
def __len__(self):
"""Return the number of images."""
return self.num_images
def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=128,
batch_size=16, dataset='CelebA', mode='train', num_workers=1):
"""Build and return a data loader."""
transform = []
if mode == 'train':
transform.append(T.RandomHorizontalFlip())
transform.append(T.CenterCrop(crop_size))
transform.append(T.Resize(image_size))
transform.append(T.ToTensor())
transform.append(T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
transform = T.Compose(transform)
if dataset == 'CelebA':
dataset = CelebA(image_dir, attr_path, selected_attrs, transform, mode)
elif dataset == 'RaFD':
dataset = ImageFolder(image_dir, transform)
data_loader = data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=(mode=='train'),
num_workers=num_workers)
return data_loader