Files
disrupting-deepfakes/stargan/attacks.py
Nataniel Ruiz Gutierrez d05a264d06 First commit.
2020-03-09 17:37:40 -04:00

317 lines
10 KiB
Python

import copy
import numpy as np
from collections import Iterable
from scipy.stats import truncnorm
import torch
import torch.nn as nn
import defenses.smoothing as smoothing
class LinfPGDAttack(object):
def __init__(self, model=None, device=None, epsilon=0.05, k=10, a=0.01, feat = None):
"""
FGSM, I-FGSM and PGD attacks
epsilon: magnitude of attack
k: iterations
a: step size
"""
self.model = model
self.epsilon = epsilon
self.k = k
self.a = a
self.loss_fn = nn.MSELoss().to(device)
self.device = device
# Feature-level attack? Which layer?
self.feat = feat
# PGD or I-FGSM?
self.rand = True
def perturb(self, X_nat, y, c_trg):
"""
Vanilla Attack.
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
for i in range(self.k):
X.requires_grad = True
output, feats = self.model(X, c_trg)
if self.feat:
output = feats[self.feat]
self.model.zero_grad()
# Minus in the loss means "towards" and plus means "away from"
loss = self.loss_fn(output, y)
loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
self.model.zero_grad()
return X, X - X_nat
def perturb_blur(self, X_nat, y, c_trg):
"""
White-box attack against blur pre-processing.
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
X_orig = X_nat.clone().detach_()
# Kernel size
ks = 11
# Sigma for Gaussian noise
sig = 1.5
# preproc = smoothing.AverageSmoothing2D(channels=3, kernel_size=ks).to(self.device)
preproc = smoothing.GaussianSmoothing2D(sigma=sig, channels=3, kernel_size=ks).to(self.device)
# blurred_image = smoothing.AverageSmoothing2D(channels=3, kernel_size=ks).to(self.device)(X_orig)
blurred_image = smoothing.GaussianSmoothing2D(sigma=sig, channels=3, kernel_size=ks).to(self.device)(X_orig)
for i in range(self.k):
X.requires_grad = True
output, feats = self.model.forward_blur(X, c_trg, preproc)
self.model.zero_grad()
loss = self.loss_fn(output, y)
loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
self.model.zero_grad()
return X, X - X_nat, blurred_image
def perturb_blur_iter_full(self, X_nat, y, c_trg):
"""
Spread-spectrum attack against blur defenses (gray-box scenario).
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
# Gaussian blur kernel size
ks_gauss = 11
# Average smoothing kernel size
ks_avg = 3
# Sigma for Gaussian blur
sig = 1
# Type of blur
blur_type = 1
for i in range(self.k):
# Declare smoothing layer
if blur_type == 1:
preproc = smoothing.GaussianSmoothing2D(sigma=sig, channels=3, kernel_size=ks_gauss).to(self.device)
elif blur_type == 2:
preproc = smoothing.AverageSmoothing2D(channels=3, kernel_size=ks_avg).to(self.device)
X.requires_grad = True
output, feats = self.model.forward_blur(X, c_trg, preproc)
if self.feat:
output = feats[self.feat]
self.model.zero_grad()
loss = self.loss_fn(output, y)
loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
# Iterate through blur types
if blur_type == 1:
sig += 0.5
if sig >= 3.2:
blur_type = 2
sig = 1
if blur_type == 2:
ks_avg += 2
if ks_avg >= 11:
blur_type = 1
ks_avg = 3
self.model.zero_grad()
return X, X - X_nat
def perturb_blur_eot(self, X_nat, y, c_trg):
"""
EoT adaptation to the blur transformation.
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
# Gaussian blur kernel size
ks_gauss = 11
# Average smoothing kernel size
ks_avg = 3
# Sigma for Gaussian blur
sig = 1
# Type of blur
blur_type = 1
for i in range(self.k):
full_loss = 0.0
X.requires_grad = True
self.model.zero_grad()
for j in range(9): # 9 types of blur
# Declare smoothing layer
if blur_type == 1:
preproc = smoothing.GaussianSmoothing2D(sigma=sig, channels=3, kernel_size=ks_gauss).to(self.device)
elif blur_type == 2:
preproc = smoothing.AverageSmoothing2D(channels=3, kernel_size=ks_avg).to(self.device)
output, feats = self.model.forward_blur(X, c_trg, preproc)
loss = self.loss_fn(output, y)
full_loss += loss
if blur_type == 1:
sig += 0.5
if sig >= 3.2:
blur_type = 2
sig = 1
if blur_type == 2:
ks_avg += 2
if ks_avg >= 11:
blur_type = 1
ks_avg = 3
full_loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
self.model.zero_grad()
return X, X - X_nat
def perturb_iter_class(self, X_nat, y, c_trg):
"""
Iterative Class Conditional Attack
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
j = 0
J = len(c_trg)
for i in range(self.k):
X.requires_grad = True
output, feats = self.model(X, c_trg[j])
self.model.zero_grad()
loss = self.loss_fn(output, y)
loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
j += 1
if j == J:
j = 0
return X, eta
def perturb_joint_class(self, X_nat, y, c_trg):
"""
Joint Class Conditional Attack
"""
if self.rand:
X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32')).to(self.device)
else:
X = X_nat.clone().detach_()
# use the following if FGSM or I-FGSM and random seeds are fixed
# X = X_nat.clone().detach_() + torch.tensor(np.random.uniform(-0.001, 0.001, X_nat.shape).astype('float32')).cuda()
J = len(c_trg)
for i in range(self.k):
full_loss = 0.0
X.requires_grad = True
self.model.zero_grad()
for j in range(J):
output, feats = self.model(X, c_trg[j])
loss = self.loss_fn(output, y)
full_loss += loss
full_loss.backward()
grad = X.grad
X_adv = X + self.a * grad.sign()
eta = torch.clamp(X_adv - X_nat, min=-self.epsilon, max=self.epsilon)
X = torch.clamp(X_nat + eta, min=-1, max=1).detach_()
return X, eta
def clip_tensor(X, Y, Z):
# Clip X with Y min and Z max
X_np = X.data.cpu().numpy()
Y_np = Y.data.cpu().numpy()
Z_np = Z.data.cpu().numpy()
X_clipped = np.clip(X_np, Y_np, Z_np)
X_res = torch.FloatTensor(X_clipped)
return X_res
def perturb_batch(X, y, c_trg, model, adversary):
# Perturb batch function for adversarial training
model_cp = copy.deepcopy(model)
for p in model_cp.parameters():
p.requires_grad = False
model_cp.eval()
adversary.model = model_cp
X_adv, _ = adversary.perturb(X, y, c_trg)
return X_adv