试图将CNN添加到MLP Siamese

时间:2017-08-03 22:38:22

标签: tensorflow neural-network conv-neural-network

当我尝试将CNN添加到我从github获得的准备好的连体代码时,我遇到了一个不兼容的形状错误:这里是链接:

https://github.com/ywpkwon/siamese_tf_mnist

这是运行会话的代码:

“”“使用Tensorflow和MNIST示例的连体实现。 这个暹罗网络嵌入了一个28x28的图像(784D中的一个点) 在2D中的一个点。

Youngwook Paul Kwon(年轻人在berkeley.edu) “”“

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os

#import helpers
import inference
import visualize

# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()

# setup siamese network
siamese = inference.siamese();
train_step =  tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()

# start training
if new:
    for step in range(1000):
        batch_x1, batch_y1 = mnist.train.next_batch(128)
        batch_x2, batch_y2 = mnist.train.next_batch(128)
        batch_y = (batch_y1 == batch_y2).astype('float')

        _, loss_v = sess.run([train_step, siamese.loss], feed_dict={
                        siamese.x1: batch_x1,
                        siamese.x2: batch_x2,
                        siamese.y_: batch_y})


        if step % 10 == 0:
            print ('step %d: loss' % (step))
            print (loss_v)

这是创建Siamese模型的代码。

import tensorflow as tf
class siamese:

# Create model
def __init__(self):
    self.x1 = tf.placeholder(tf.float32, [None, 784])
    self.x2 = tf.placeholder(tf.float32, [None, 784])

    with tf.variable_scope("siamese") as scope:
        self.o1 = self.network(self.x1)
        scope.reuse_variables()
        self.o2 = self.network(self.x2)


    # Create loss
    self.y_ = tf.placeholder(tf.float32, [None])
    self.loss = self.loss_with_step()

def network(self, x):
    weights = []
    fc1 = self.fc_layer(x, 1024, "fc1" , [5, 5, 1, 32])
    return fc1


def fc_layer(self, bottom, n_weight, name,kernel_shape ): #[5, 5, 1, 32]
    assert len(bottom.get_shape()) == 2
    #n_prev_weight = bottom.get_shape()[1]
    initer = tf.truncated_normal_initializer(stddev=0.01)
    weights_for_convolution = tf.get_variable(name+"weights_for_convolution", kernel_shape,
        initializer=tf.random_normal_initializer())
    bias_shape = kernel_shape[-1]
    biases_for_convolution = tf.get_variable(name+"biases_for_convolution", [bias_shape],
        initializer=tf.constant_initializer(0.1))
    biases_for_connected_layer = tf.get_variable(name+"biases_for_connected_layer", [1024],
        initializer=tf.constant_initializer(0.1))

    weights_for_connected_layer = tf.get_variable(name+"weights_for_connected_layer", [7*7*64,1024],
        initializer=tf.random_normal_initializer())
    W = tf.get_variable(name+'W', dtype=tf.float32, shape=[1024,2], initializer=initer)
    b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[2], dtype=tf.float32))
    #weights_for_readout_layer = tf.get_variable("weights_for_readout_layer", [1024,2],
        #initializer=tf.random_normal_initializer())

    #biases_for_readout_layer = tf.get_variable("biases_for_readout_layer", [2],
        #initializer=tf.constant_initializer(0.1))
    bottom1 = tf.reshape(bottom,[-1,28,28,1]) ##
    c2 = tf.nn.conv2d(bottom1, weights_for_convolution, strides=[1, 1, 1, 1], padding='SAME')
    conv = tf.nn.bias_add(c2, biases_for_convolution)

    relu = tf.nn.relu(conv)
    out = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1],
                         strides=[1, 2, 2, 1], padding='SAME')


    #print tf.shape(out)
    h_out_flat = tf.reshape(out ,[-1,7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_out_flat, weights_for_connected_layer) + biases_for_connected_layer)

    #compute model output

    final_output = tf.matmul(h_fc1,W) + b

    #fc = tf.nn.bias_add(tf.matmul(bottom, W), b)

    return final_output



def loss_with_spring(self):
    margin = 5.0
    labels_t = self.y_
    labels_f = tf.subtract(1.0, self.y_, name="1-yi")          # labels_ = !labels;
    eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
    print tf.shape(eucd2)
    eucd2 = tf.reduce_sum(eucd2, 1)
    eucd = tf.sqrt(eucd2+1e-6, name="eucd")
    C = tf.constant(margin, name="C")
    # yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
    pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
    # neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
    # neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
    neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
    losses = tf.add(pos, neg, name="losses")
    loss = tf.reduce_mean(losses, name="loss")
    return loss

def loss_with_step(self):
    margin = 5.0
    labels_t = self.y_ #128
    labels_f = tf.subtract(1.0, self.y_, name="1-yi")          # labels_ = !labels;
    eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
    eucd2 = tf.reduce_sum(eucd2, 1)
    eucd = tf.sqrt(eucd2+1e-6, name="eucd")
    C = tf.constant(margin, name="C")
    pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
    neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
    losses = tf.add(pos, neg, name="losses")
    loss = tf.reduce_mean(losses, name="loss")
    return loss

实际上由于批量大小为128,标签-t为128,

这里的问题是loss_with_step函数中的欧氏距离,  以及loss_with_spring函数的大小为256而不是128我真的不知道为什么!

这是我得到的错误。

        Traceback (most recent call last):
      File "run1.py", line 56, in <module>

    siamese.y_: batch_y})
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/client/session.py", line 789, in run
    run_metadata_ptr)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/client/session.py", line 997, in _run
    feed_dict_string, options, run_metadata)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/client/session.py", line 1132, in _do_run
    target_list, options, run_metadata)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-   packages/tensorflow/python/client/session.py", line 1152, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible     shapes: [128] vs. [256]
         [[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost/    replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]

 Caused by op u'y_x_eucd', defined at:
  File "run1.py", line 28, in <module>
     siamese = inference1.siamese();
  File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master    /inference1.py", line 18, in __init__
    self.loss = self.loss_with_step()
  File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master    /inference1.py", line 110, in loss_with_step
    pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/ops/math_ops.py", line 286, in multiply
    return gen_math_ops._mul(x, y, name)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/framework/op_def_library.py", line 767, in apply
_op
    op_def=op_def)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/framework/ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site-    packages/tensorflow/python/framework/ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [128]     vs. [256]
         [[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost     /replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]

任何人都可以帮忙吗?

1 个答案:

答案 0 :(得分:0)

看起来你在卷积之后重塑是错误的。对于通过convolution layer传递的14x14x32输入,28x28x1的输出为conv(stride=1)-maxpool(stride 2)。因此,您需要将展平图层更改为:

  h_out_flat = tf.reshape(out ,[-1,14*14*32])

以及weights_for_connected_layer恰当。