from torch.utils import data from torchvision import transforms as T from torchvision.datasets import ImageFolder from PIL import Image import torch import os import random import noise import cv2 class CelebA(data.Dataset): """Dataset class for the CelebA dataset.""" def __init__(self, image_dir, attr_path, selected_attrs, transform, mode): """Initialize and preprocess the CelebA dataset.""" self.image_dir = image_dir self.attr_path = attr_path self.selected_attrs = selected_attrs self.transform = transform self.mode = mode self.train_dataset = [] self.test_dataset = [] self.attr2idx = {} self.idx2attr = {} self.preprocess() if mode == 'train': self.num_images = len(self.train_dataset) else: self.num_images = len(self.test_dataset) def preprocess(self): """Preprocess the CelebA attribute file.""" lines = [line.rstrip() for line in open(self.attr_path, 'r')] all_attr_names = lines[1].split() for i, attr_name in enumerate(all_attr_names): self.attr2idx[attr_name] = i self.idx2attr[i] = attr_name lines = lines[2:] random.seed(1234) random.shuffle(lines) for i, line in enumerate(lines): split = line.split() filename = split[0] values = split[1:] label = [] for attr_name in self.selected_attrs: idx = self.attr2idx[attr_name] label.append(values[idx] == '1') if (i+1) < 2000: self.test_dataset.append([filename, label]) else: self.train_dataset.append([filename, label]) print('Finished preprocessing the CelebA dataset...') def __getitem__(self, index): """Return one image and its corresponding attribute label.""" dataset = self.train_dataset if self.mode == 'train' else self.test_dataset filename, label = dataset[index] image = Image.open(os.path.join(self.image_dir, filename)) # image = noise.noisy('s&p', image) return self.transform(image), torch.FloatTensor(label) # def __getitem__(self, index): # """Return one image and its corresponding attribute label.""" # dataset = self.train_dataset if self.mode == 'train' else self.test_dataset # filename, label = dataset[index] # image = Image.open(os.path.join(self.image_dir, filename)) # return self.transform(image), torch.FloatTensor(label) def __len__(self): """Return the number of images.""" return self.num_images def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=128, batch_size=16, dataset='CelebA', mode='train', num_workers=1): """Build and return a data loader.""" transform = [] if mode == 'train': transform.append(T.RandomHorizontalFlip()) transform.append(T.CenterCrop(crop_size)) transform.append(T.Resize(image_size)) transform.append(T.ToTensor()) transform.append(T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))) transform = T.Compose(transform) if dataset == 'CelebA': dataset = CelebA(image_dir, attr_path, selected_attrs, transform, mode) elif dataset == 'RaFD': dataset = ImageFolder(image_dir, transform) data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=(mode=='train'), num_workers=num_workers) return data_loader