我目前正致力于在给定由卷积神经网络生成的内核/滤波器的情况下为最大输入图像创建可视化。
Keras有一篇博文here做了类似的事情,但是在使用除了提供的数据集之外的任何东西时,结果最好是有问题的,所以我想我可以直接尝试使用Tensorflow。 [我将稍后尝试使用其中的图像编辑我的帖子,此计算机上没有该图片。)
使用MNIST数据集以及Tensorflow教程和Keras博客文章作为参考,我在尝试创建所述可视化时生成了以下代码。我不确定我的方法是否正确,尤其是如何/何时将结果标准化以使其可视化。
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import copy
from scipy.misc import imsave
#~~~~~~~~~~~~~~~~~~~~~~~~~ CNN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Most of the CNN section directly from the tutorial
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
img_width = 28
img_height = 28
n = 3
remove_negatives = False
normalize = True
use = 'layer'
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return(tf.Variable(initial))
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return(tf.Variable(initial))
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(5000):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
layer = sess.run(W_conv1[:,:,:,:])
bias = sess.run(b_conv1)
layer2 = sess.run(W_conv2[:,:,:,:])
bias2 = sess.run(b_conv2)
#~~~~~~~~~~~~~~~ Begin Visualization Code ~~~~~~~~~~~~~~~~
kept_filters = []
layer_use = layer
bias_use = bias
k=1
#toggle between layer 1 and layer 2 based on variable defined at beginning
if use != 'layer':
k = np.shape(layer2[:,:,:,:])[2]
layer_use = layer2
bias_use = bias2
#loop through kernels/feature maps and maximize each one's input image
for fmap in range(len(layer[0,0,0,:])):
feat_map = fmap
#randomized white-noise input image that will be max'ed
noise_mat = weight_variable([1,28,28,k])
#load kernel as a constant
single_layer = tf.constant(layer_use[:,:,0:k,feat_map-1:feat_map] + bias_use[feat_map],dtype=tf.float32)
conv = conv2d(noise_mat,single_layer)
#Use mean of the image matrix as the "loss" - is this the proper way to do this?
loss = -tf.reduce_mean(conv)
train_step = tf.train.GradientDescentOptimizer(.5).minimize(loss,var_list=[noise_mat])
#the training/maximizing
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
updatelist = [np.sum(sess.run(noise_mat)[0,:,:,0])]
noise_mat_begin = sess.run(noise_mat[0,:,:,0])
conv_saved = sess.run(conv)
for __ in range(5000):
train_step.run()
if __%200 == 0:
updatelist = updatelist + [np.sum(sess.run(noise_mat)[0,:,:,0])]
noise_mat_end = sess.run(noise_mat)[0,:,:,0]
noise_mat_normed = copy.deepcopy(noise_mat_end)
#not sure the best way to normalize?
if remove_negatives:
noise_mat_normed[noise_mat_normed <= 0] = 0
if normalize:
std = np.std(noise_mat_normed)
mean = np.mean(noise_mat_normed)
def full_norm(val):
return((val - mean)/std)
vnew = np.vectorize(full_norm)
noise_mat_normed = vnew(noise_mat_normed)
else:
oldmax = np.max(noise_mat_normed)
oldmin = np.min(noise_mat_normed)
def new_range(val,OldMax,OldMin):
return((((val - OldMin) * 255) / (OldMax - OldMin)))
vnew = np.vectorize(new_range)
noise_mat_normed = vnew(noise_mat_normed,oldmax,oldmin)
#negative sums generally imply a lack of convergence due to my loss metric, so remove them
if np.sum(noise_mat_normed) > 0:
kept_filters += [noise_mat_normed]
#visualize results in a grid format, similar to the blog post
kept_filters = kept_filters[:n * n]
margin = 5
width = n * img_width + (n - 1) * margin
height = n * img_height + (n - 1) * margin
stitched_filters = np.zeros((width, height))
for i in range(n):
for j in range(n):
img = kept_filters[i * n + j]
stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
(img_height + margin) * j: (img_height + margin) * j + img_height] = img
imsave('TF_vis_%dx%d.png' % (n, n), stitched_filters)
这会产生类似的结果(来自卷积层1):
我不确定这是否完全正确,特别是因为第2层看起来并没有太大的不同。我的结果和/或方法是否合理?有没有其他人使用MNIST数据集完成此操作?另外,验证准确度> 95%。
编辑:我本来一定做错了;我重新/重新编写了博客文章中的代码,现在我自己的Tensorflow代码的结果看起来与博客文章方法的输出大致相同,所以这很好。然而,主要问题仍然存在:
为什么我没有获得更明显或不同的输出?我知道它们不会像过滤器本身那样具体,但这些图像似乎并没有像博客文章那样描绘任何东西。原始数据集中是否有足够的变化?
我不应该至少得到一些不仅仅是美化边界图像的东西,比如对角线或曲线吗?
第二层看起来不应该是第一层的更复杂的迭代吗?