Question

在训练CNN之后我试图通过网络运行单个图像并在第一个卷积层之后绘制特征图。这是我的CNN图层类的代码：

class ConvPoolLayer(object):    
    def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
                 activation_fn=sigmoid):
        self.filter_shape = filter_shape
        self.image_shape = image_shape
        self.poolsize = poolsize
        self.activation_fn=activation_fn
        # initialize weights and biases
        n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
        self.w = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
                dtype=theano.config.floatX),
            borrow=True)
        self.b = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
                dtype=theano.config.floatX),
            borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape(self.image_shape)
        self.conv_out = conv2d(
            input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
            input_shape=self.image_shape)

        self.feature_maps = theano.function([self.inpt], self.conv_out)

        pooled_out = pool.pool_2d(
            input=self.conv_out, ds=self.poolsize, ignore_border=True)
        self.output = self.activation_fn(
            pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.output_dropout = self.output # no dropout in the convolutional layers

我创建了一个theano函数feature_maps，它将在卷积后输出结果（如果有更清晰的方法吗？）

然后在我的主代码中（训练后），我拍摄一张图像并将其传递给第一层对象的feature_maps函数：

image_4d = image_2d.reshape(1, 1, 28, 28)
activations = conv_net[0].layers[0].feature_maps(image_4d)

我正在训练这个迷你批量大小为10，所以当我传入一张图片来获取要素图时，我得到以下错误：

ValueError: The hardcoded shape for the batch size (10) isn't the run time shape (1).
Apply node that caused the error: ConvOp{('imshp', (1, 28, 28)),('kshp', (5, 5)),('nkern', 20),('bsize', 10),('dx', 1),('dy', 1),('out_mode', 'valid'),('unroll_batch', 5),('unroll_kern', 2),('unroll_patch', False),('imshp_logical', (1, 28, 28)),('kshp_logical', (5, 5)),('kshp_logical_top_aligned', True)}(<TensorType(float32, (False, True, False, False))>, <TensorType(float32, 4D)>)
Toposort index: 0
Inputs types: [TensorType(float32, (False, True, False, False)), TensorType(float32, 4D)]
Inputs shapes: [(1L, 1L, 28L, 28L), (20L, 1L, 5L, 5L)]
Inputs strides: [(3136L, 3136L, 112L, 4L), (100L, 100L, 20L, 4L)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [['output']]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "C:\Users\Simon\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-8e78399125dc>", line 1, in <module>
    runfile('F:/python-machine-learning/CNN/mnist.py', wdir='F:/python-machine-learning/CNN')
  File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
    execfile(filename, namespace)
  File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)
  File "F:/python-machine-learning/CNN/mnist.py", line 53, in <module>
    conv_net = basic_conv(n=1, epochs=1)
  File "F:/python-machine-learning/CNN/mnist.py", line 47, in basic_conv
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
  File "network.py", line 95, in __init__
    init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
  File "network.py", line 273, in set_inpt
    input_shape=self.image_shape)

所以似乎抱怨的是我用10号小批量训练，但我试图通过只有1项的4d阵列。什么是解决这个问题的正确方法？

另外，最奇怪的是我在CPU上训练时遇到错误。当我在GPU上训练完全相同的网络时，没有这样的错误，我得到了功能映射图没有问题。我不确定为什么会出现这种差异...

编辑：

在ConvPoolLayer之后，网络末端有完全连接的和softmax层：

完全连接：

class FullyConnectedLayer(object):

    def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
        self.n_in = n_in
        self.n_out = n_out
        self.activation_fn = activation_fn
        self.p_dropout = p_dropout
        # Initialize weights and biases
        self.w = theano.shared(
            np.asarray(
                np.random.normal(
                    loc=0.0, scale=np.sqrt(1.0/n_in), size=(n_in, n_out)),
                dtype=theano.config.floatX),
            name='w', borrow=True)
        self.b = theano.shared(
            np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
                       dtype=theano.config.floatX),
            name='b', borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = self.activation_fn(
            (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = self.activation_fn(
            T.dot(self.inpt_dropout, self.w) + self.b)

    def accuracy(self, y):
        "Return the accuracy for the mini-batch."
        return T.mean(T.eq(y, self.y_out))

Soft max：

class SoftmaxLayer(object):

    def __init__(self, n_in, n_out, p_dropout=0.0):
        self.n_in = n_in
        self.n_out = n_out
        self.p_dropout = p_dropout
        # Initialize weights and biases
        self.w = theano.shared(
            np.zeros((n_in, n_out), dtype=theano.config.floatX),
            name='w', borrow=True)
        self.b = theano.shared(
            np.zeros((n_out,), dtype=theano.config.floatX),
            name='b', borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)  # Predicted class
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

    def cost(self, net):
        "Return the log-likelihood cost."
        return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

    def accuracy(self, y):
        "Return the accuracy for the mini-batch."
        return T.mean(T.eq(y, self.y_out))

最后，这是stochastc梯度下降的代码：

def SGD(self, training_data, epochs, mini_batch_size, alpha,
        validation_data, test_data, lmbda=0.0):
    """Train the network using mini-batch stochastic gradient descent."""
    training_x, training_y = training_data
    validation_x, validation_y = validation_data
    test_x, test_y = test_data

    # compute number of minibatches for training, validation and testing
    num_training_batches = size(training_data)/mini_batch_size
    num_validation_batches = size(validation_data)/mini_batch_size
    num_test_batches = size(test_data)/mini_batch_size

    # define (regularized) cost function, symbolic gradients, and updates
    l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
    cost = self.layers[-1].cost(self)+\
           0.5*lmbda*l2_norm_squared/num_training_batches
    grads = T.grad(cost, self.params)
    updates = [(param, param - alpha*grad)
               for param, grad in zip(self.params, grads)]

    # define functions to train a mini-batch, and to compute the
    # accuracy in validation and test mini-batches.
    i = T.lscalar() # mini-batch index

    train_mb = theano.function(
        [i], cost, updates=updates,
        givens={
            self.x:
            training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    validate_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    test_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    self.test_mb_predictions = theano.function(
        [i], self.layers[-1].y_out,
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    # Do the actual training
    best_validation_accuracy = 0.0                            
    start_time = time.time()

    for epoch in xrange(epochs):
        for minibatch_index in xrange(num_training_batches):
            iteration = num_training_batches * epoch + minibatch_index

            cost_ij = train_mb(minibatch_index)

            if (iteration+1) % num_training_batches == 0:
                validation_accuracy = np.mean(
                    [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])

                if validation_accuracy >= best_validation_accuracy:
                    best_validation_accuracy = validation_accuracy
                    best_iteration = iteration

                if test_data:
                    test_accuracy = np.mean(
                        [test_mb_accuracy(j) for j in xrange(num_test_batches)])

Theano - CNN在训练后生成特征地图

0 个答案: