Question

我目前正致力于在给定由卷积神经网络生成的内核/滤波器的情况下为最大输入图像创建可视化。

Keras有一篇博文here做了类似的事情，但是在使用除了提供的数据集之外的任何东西时，结果最好是有问题的，所以我想我可以直接尝试使用Tensorflow。 [我将稍后尝试使用其中的图像编辑我的帖子，此计算机上没有该图片。）

使用MNIST数据集以及Tensorflow教程和Keras博客文章作为参考，我在尝试创建所述可视化时生成了以下代码。我不确定我的方法是否正确，尤其是如何/何时将结果标准化以使其可视化。

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import copy
from scipy.misc import imsave


#~~~~~~~~~~~~~~~~~~~~~~~~~ CNN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Most of the CNN section directly from the tutorial
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
img_width = 28
img_height = 28
n = 3
remove_negatives = False
normalize = True
use = 'layer'


def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return(tf.Variable(initial))

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return(tf.Variable(initial))

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1,28,28,1])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
   for i in range(5000):
     batch = mnist.train.next_batch(50)
     if i%100 == 0:
       train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
       print("step %d, training accuracy %g"%(i, train_accuracy))
     train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
   layer = sess.run(W_conv1[:,:,:,:])
   bias = sess.run(b_conv1)
   layer2 = sess.run(W_conv2[:,:,:,:])
   bias2 = sess.run(b_conv2)





#~~~~~~~~~~~~~~~ Begin Visualization Code ~~~~~~~~~~~~~~~~
kept_filters = []
layer_use = layer
bias_use = bias
k=1
#toggle between layer 1 and layer 2 based on variable defined at beginning
if use != 'layer': 
   k = np.shape(layer2[:,:,:,:])[2]
   layer_use = layer2
   bias_use = bias2

#loop through kernels/feature maps and maximize each one's input image
for fmap in range(len(layer[0,0,0,:])):
   feat_map = fmap
   #randomized white-noise input image that will be max'ed
   noise_mat = weight_variable([1,28,28,k])
   #load kernel as a constant
   single_layer = tf.constant(layer_use[:,:,0:k,feat_map-1:feat_map] + bias_use[feat_map],dtype=tf.float32)
   conv = conv2d(noise_mat,single_layer)
   #Use mean of the image matrix as the "loss" - is this the proper way to do this?
   loss = -tf.reduce_mean(conv)
   train_step = tf.train.GradientDescentOptimizer(.5).minimize(loss,var_list=[noise_mat])

   #the training/maximizing     
   with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      updatelist = [np.sum(sess.run(noise_mat)[0,:,:,0])]
      noise_mat_begin = sess.run(noise_mat[0,:,:,0])
      conv_saved = sess.run(conv)
      for __ in range(5000):
         train_step.run()
         if __%200 == 0:
            updatelist = updatelist + [np.sum(sess.run(noise_mat)[0,:,:,0])]
      noise_mat_end = sess.run(noise_mat)[0,:,:,0]     
   noise_mat_normed = copy.deepcopy(noise_mat_end)

   #not sure the best way to normalize?
   if remove_negatives:
      noise_mat_normed[noise_mat_normed <= 0] = 0   
   if normalize:
      std = np.std(noise_mat_normed)
      mean = np.mean(noise_mat_normed)
      def full_norm(val):
         return((val - mean)/std)
      vnew = np.vectorize(full_norm)
      noise_mat_normed = vnew(noise_mat_normed)
   else:
      oldmax = np.max(noise_mat_normed)
      oldmin = np.min(noise_mat_normed)
      def new_range(val,OldMax,OldMin):
         return((((val - OldMin) * 255) / (OldMax - OldMin)))
      vnew = np.vectorize(new_range)
      noise_mat_normed = vnew(noise_mat_normed,oldmax,oldmin)

   #negative sums generally imply a lack of convergence due to my loss metric, so remove them   
   if np.sum(noise_mat_normed) > 0:
      kept_filters += [noise_mat_normed]


#visualize results in a grid format, similar to the blog post
kept_filters = kept_filters[:n * n]
margin = 5
width = n * img_width + (n - 1) * margin
height = n * img_height + (n - 1) * margin
stitched_filters = np.zeros((width, height))

for i in range(n):
    for j in range(n):
        img = kept_filters[i * n + j]
        stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
                         (img_height + margin) * j: (img_height + margin) * j + img_height] = img

imsave('TF_vis_%dx%d.png' % (n, n), stitched_filters)

这会产生类似的结果（来自卷积层1）：

我不确定这是否完全正确，特别是因为第2层看起来并没有太大的不同。我的结果和/或方法是否合理？有没有其他人使用MNIST数据集完成此操作？另外，验证准确度> 95％。

编辑：我本来一定做错了;我重新/重新编写了博客文章中的代码，现在我自己的Tensorflow代码的结果看起来与博客文章方法的输出大致相同，所以这很好。然而，主要问题仍然存在：

为什么我没有获得更明显或不同的输出？我知道它们不会像过滤器本身那样具体，但这些图像似乎并没有像博客文章那样描绘任何东西。原始数据集中是否有足够的变化？

我不应该至少得到一些不仅仅是美化边界图像的东西，比如对角线或曲线吗？

第二层看起来不应该是第一层的更复杂的迭代吗？

CNN＆＃s - 使用Tensorflow可视化最大化过滤器激活（使用MNIST）

0 个答案: