CW L2 and Linf attack

gongzhitaao · Jan 18, 2018 · ae4bfb8 · ae4bfb8
1 parent f003012
commit ae4bfb8
Show file tree

Hide file tree

Showing 5 changed files with 1,284 additions and 0 deletions.
diff --git a/attacks/__init__.py b/attacks/__init__.py
@@ -1,3 +1,4 @@
 from .fast_gradient import *
 from .saliency_map import *
 from .deepfool import *
+from .cw import *
diff --git a/attacks/cw.py b/attacks/cw.py
@@ -0,0 +1,116 @@
+import tensorflow as tf
+
+
+__all__ = ['cw']
+
+
+def cw(model, x, y=None, eps=1.0, ord_=2, T=2,
+ optimizer=tf.train.AdamOptimizer(learning_rate=0.1), alpha=0.9,
+ min_prob=0, clip=(0.0, 1.0)):
+ """CarliniWagner (CW) attack.
+
+ Only CW-L2 and CW-Linf are implemented since I do not see the point of
+ embedding CW-L2 in CW-L1. See https://arxiv.org/abs/1608.04644 for
+ details.
+
+ The idea of CW attack is to minimize a loss that comprises two parts: a)
+ the p-norm distance between the original image and the adversarial image,
+ and b) a term that encourages the incorrect classification of the
+ adversarial images.
+
+ Please note that CW is a optimization process, so it is tricky. There are
+ lots of hyper-parameters to tune in order to get the best result. The
+ binary search process for the best eps values is omitted here. You could
+ do grid search to find the best parameter configuration, if you like.
+
+ :param model: The model wrapper.
+ :param x: The input clean sample, usually a placeholder. NOTE that the
+ shape of x MUST be static, i.e., fixed when constructing the
+ graph. This is because there are some variables that depends
+ upon this shape.
+ :param y: The target label. Set to be the least-likely label when None.
+ :param eps: The scaling factor for the second penalty term.
+ :param ord_: The p-norm, 2 or inf. Actually I only test whether it is 2
+ or not 2.
+ :param T: The temperature for sigmoid function. In the original paper,
+ the author used (tanh(x)+1)/2 = sigmoid(2x), i.e., t=2. During
+ our experiment, we found that this parameter also affects the
+ quality of generated adversarial samples.
+ :param optimizer: The optimizer used to minimize the CW loss. Default to
+ be tf.AdamOptimizer with learning rate 0.1. Note the learning rate is
+ much larger than normal learning rate.
+ :param alpha: Used only in CW-L0. The decreasing factor for the upper
+ bound of noise.
+ :param min_prob: The minimum confidence of adversarial examples.
+ Generally larger min_prob wil lresult in more noise.
+ :param clip: A tuple (clip_min, clip_max), which denotes the range of
+ values in x.
+
+ :return: A tuple (train_op, xadv). Run train_op for some epochs to
+ generate the adversarial image, then run xadv to get the final
+ adversarial image.
+ """
+ xshape = x.get_shape().as_list()
+ noise = tf.get_variable('noise', xshape, tf.float32,
+ initializer=tf.initializers.zeros)
+
+ # scale input to (0, 1)
+ x_scaled = (x - clip[0]) / (clip[1] - clip[0])
+
+ # change to sigmoid-space, clip to avoid overflow.
+ z = tf.clip_by_value(x_scaled, 1e-8, 1-1e-8)
+ xinv = tf.log(z / (1 - z)) / T
+
+ # add noise in sigmoid-space and map back to input domain
+ xadv = tf.sigmoid(T * (xinv + noise))
+ xadv = xadv * (clip[1] - clip[0]) + clip[0]
+
+ ybar, logits = model(xadv, logits=True)
+ ydim = ybar.get_shape().as_list()[1]
+
+ if y is not None:
+ y = tf.cond(tf.equal(tf.rank(y), 0),
+ lambda: tf.fill([xshape[0]], y),
+ lambda: tf.identity(y))
+ else:
+ # we set target to the least-likely label
+ y = tf.argmin(ybar, axis=1, output_type=tf.int32)
+
+ mask = tf.one_hot(y, ydim, on_value=0.0, off_value=float('inf'))
+ yt = tf.reduce_max(logits - mask, axis=1)
+ yo = tf.reduce_max(logits, axis=1)
+
+ # encourage to classify to a wrong category
+ loss0 = tf.nn.relu(yo - yt + min_prob)
+
+ axis = list(range(1, len(xshape)))
+ ord_ = float(ord_)
+
+ # make sure the adversarial images are visually close
+ if 2 == ord_:
+ # CW-L2 Original paper uses the reduce_sum version. These two
+ # implementation does not differ much.
+
+ # loss1 = tf.reduce_sum(tf.square(xadv-x), axis=axis)
+ loss1 = tf.reduce_mean(tf.square(xadv-x))
+ else:
+ # CW-Linf
+ tau0 = tf.fill([xshape[0]] + [1]*len(axis), clip[1])
+ tau = tf.get_variable('cw8-noise-upperbound', dtype=tf.float32,
+ initializer=tau0, trainable=False)
+ diff = xadv - x - tau
+
+ # if all values are smaller than the upper bound value tau, we reduce
+ # this value via tau*0.9 to make sure L-inf does not get stuck.
+ tau = alpha * tf.to_float(tf.reduce_all(diff < 0, axis=axis))
+ loss1 = tf.nn.relu(tf.reduce_sum(diff, axis=axis))
+
+ loss = eps*loss0 + loss1
+ train_op = optimizer.minimize(loss, var_list=[noise])
+
+ # We may need to update tau after each iteration. Refer to the CW-Linf
+ # section in the original paper.
+ if 2 != ord_:
+ train_op = tf.group(train_op, tau)
+
+ return train_op, xadv, noise