在python中具有反向传播和稀疏性的卷积神经网络

时间:2016-03-24 23:18:21

标签: python neural-network sparse-matrix theano backpropagation

我正在尝试修改github上network3.py的神经网络和深度学习所提供的代码。该代码基本上构造了卷积神经网络并训练MNIST数据集。

我要做的是将反向传播和稀疏性的概念添加到此代码中。我添加的代码部分在#的两行之间列出。我得到Typeerror: make node requires 4D tensor of kernels

据我所知,大小应为4D(1,1,28,28),但我不知道在何处以及如何进行此修改。

class ConvPoolLayer(object):
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
                 activation_fn=sigmoid):
        self.filter_shape = filter_shape
        self.image_shape = image_shape
        self.poolsize = poolsize
        self.activation_fn=activation_fn
        # initialize weights and biases
        n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
        self.w = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
                dtype=theano.config.floatX),
            borrow=True)
        #print self.w.eval()
        self.b = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
                dtype=theano.config.floatX),
            borrow=True)
        #print filter_shape[0]
        #print self.b.eval()
        self.params = [self.w, self.b]

    def sigmoid(self, x):

        return (1 / (1 + T.exp(-x)))               


    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape(self.image_shape)
############################################################
        learning_rate = 0.0001
        learning_rate_s = 0.0001
        gamma = 1
        alpha = 1 - learning_rate

        v1 = T.dot(self.w, self.inpt) + self.b
        y1 = self.sigmoid(v1)
        diff1 = self.inpt - T.dot(T.transpose(self.w), y1)               
        d1 = T.dot(self.w, diff1)
        d1 = T.dot(d1, (1.0 - T.dot(v1,v1)))             
        delta_w1_bp = learning_rate * T.dot(d1 , T.transpose(self.inpt))        
        delta_b1_bp = T.sum(learning_rate * d1, axis=1)           
        delta_w1_s = learning_rate_s * T.dot(self.sigmoid(y1),T.transpose(self.inpt)) 
        delta_b1_s = T.sum(learning_rate_s * self.sigmoid(y1), axis=1)                
        total_w1 = gamma * delta_w1_bp + (1 - gamma) * delta_w1_s        
        total_b1 = gamma * delta_b1_bp + (1 - gamma) * delta_b1_s        
        self.w = (alpha * self.w) + total_w1        
        self.b = (alpha * self.b) + total_b1

##################################################################

        conv_out = conv.conv2d(
            input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
            image_shape=self.image_shape)
        pooled_out = downsample.max_pool_2d(
            input=conv_out, ds=self.poolsize, ignore_border=True)
        self.output = self.activation_fn(
            pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.output_dropout = self.output # no dropout in the convolution layers

有谁知道如何解决这个问题?

我运行以调用上述脚本的主要代码是

import network3
from network3 import Network
from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
training_data, validation_data, test_data = network3.load_data_shared()
mini_batch_size = 10
net = Network([
    ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
                  filter_shape=(20, 1, 5, 5), 
                  poolsize=(2, 2)),
    FullyConnectedLayer(n_in=20*12*12, n_out=100),
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1, 
        validation_data, test_data)   

0 个答案:

没有答案