我正在尝试对MNIST编写简单的FGSM攻击。我尝试了foolbox库,它似乎可以工作,但是FGSM速度很慢(可能是因为它搜索了一个最小的eps /扰动,从而给出了不同的目标标签)。我开始编写自己的代码,我的代码总是给我零干扰。即,如果我绘制x_adversarial,则与x_input相同。我检查了梯度计算是否导致所有零矩阵。计算出的损失函数很小,但我认为该损失函数存在一定的梯度。有人能帮帮我吗?我已经花了一个星期的时间没有任何进展。再次感谢!
import tensorflow as tf
import numpy as np
import foolbox
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
# Importing the required Keras modules containing model and layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
num_digits_to_classify = 10
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalizing the RGB codes by dividing it to the max RGB value.
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])
def create_model_deep():
model = Sequential()
model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64,kernel_size=(5,5),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_digits_to_classify,activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
model = create_model_deep()
model.summary()
model.fit(x=x_train,y=y_train, epochs=10)
model.evaluate(x_test, y_test)
########################## foolbox FGSM attack ###############################################
from keras.backend import set_learning_phase
set_learning_phase(0)
from foolbox.criteria import Misclassification
fmodel = foolbox.models.TensorFlowModel.from_keras(model, bounds=(0,1))
attack = foolbox.attacks.FGSM(fmodel, criterion=Misclassification())
fgsm_error = 0.0
for i in range(x_test.shape[0]):
if i%1000 == 0:
print(i)
adversarial = attack(x_test[i],y_test[i])
if adversarial is not None:
adversarial = adversarial.reshape(1,28,28,1)
model_predictions = model.predict(adversarial)
label = np.argmax(model_predictions)
if label != y_test[i]:
fgsm_error = fgsm_error + 1.0
fgsm_error = fgsm_error/x_test.shape[0]
########################## My own FGSM attack ###############################################
sess = K.get_session()
eps = 0.3
x_adv = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_example")
x_noise = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_noise")
x_input = x_test[0].reshape(1,28,28,1)
y_input = y_test[0]
def loss_fn(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
grad = K.gradients(loss_fn(y_input,model.output), model.input)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise, grad = sess.run([x_adv, x_noise, grad], feed_dict={model.input:x_input, x_adv:x_input, x_noise:np.zeros_like(x_input)})
pred = model.predict(x_adv)
以下代码现在似乎可以工作。请看下面我的评论。
sess = K.get_session()
eps = 0.3
i = 100
x_input = x_test[i].reshape(1,28,28,1)
y_input = y_test[i]
x_adv = x_input
# Added noise
x_noise = np.zeros_like(x_input)
def loss_fn(y_true, y_pred):
target = K.one_hot(y_true,10)
loss = K.categorical_crossentropy(target, y_pred)
return loss
#loss = K.print_tensor(loss,message='loss = ')
#return K.sparse_categorical_crossentropy(y_true, y_pred)
def loss_fn_sparse(y_true, y_pred):
loss = K.sparse_categorical_crossentropy(y_true, y_pred)
return loss
image = K.cast(x_input,dtype='float32')
y_pred = model(image)
loss = loss_fn_sparse(y_input, y_pred)
grad = K.gradients(loss, image)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise = sess.run([x_adv, x_noise], feed_dict={model.input:x_input})
pred = model.predict(x_adv)