在训练CNN之后我试图通过网络运行单个图像并在第一个卷积层之后绘制特征图。这是我的CNN图层类的代码:
class ConvPoolLayer(object):
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
activation_fn=sigmoid):
self.filter_shape = filter_shape
self.image_shape = image_shape
self.poolsize = poolsize
self.activation_fn=activation_fn
# initialize weights and biases
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape)
self.conv_out = conv2d(
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
input_shape=self.image_shape)
self.feature_maps = theano.function([self.inpt], self.conv_out)
pooled_out = pool.pool_2d(
input=self.conv_out, ds=self.poolsize, ignore_border=True)
self.output = self.activation_fn(
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
self.output_dropout = self.output # no dropout in the convolutional layers
我创建了一个theano函数feature_maps
,它将在卷积后输出结果(如果有更清晰的方法吗?)
然后在我的主代码中(训练后),我拍摄一张图像并将其传递给第一层对象的feature_maps
函数:
image_4d = image_2d.reshape(1, 1, 28, 28)
activations = conv_net[0].layers[0].feature_maps(image_4d)
我正在训练这个迷你批量大小为10,所以当我传入一张图片来获取要素图时,我得到以下错误:
ValueError: The hardcoded shape for the batch size (10) isn't the run time shape (1).
Apply node that caused the error: ConvOp{('imshp', (1, 28, 28)),('kshp', (5, 5)),('nkern', 20),('bsize', 10),('dx', 1),('dy', 1),('out_mode', 'valid'),('unroll_batch', 5),('unroll_kern', 2),('unroll_patch', False),('imshp_logical', (1, 28, 28)),('kshp_logical', (5, 5)),('kshp_logical_top_aligned', True)}(<TensorType(float32, (False, True, False, False))>, <TensorType(float32, 4D)>)
Toposort index: 0
Inputs types: [TensorType(float32, (False, True, False, False)), TensorType(float32, 4D)]
Inputs shapes: [(1L, 1L, 28L, 28L), (20L, 1L, 5L, 5L)]
Inputs strides: [(3136L, 3136L, 112L, 4L), (100L, 100L, 20L, 4L)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [['output']]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "C:\Users\Simon\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-3-8e78399125dc>", line 1, in <module>
runfile('F:/python-machine-learning/CNN/mnist.py', wdir='F:/python-machine-learning/CNN')
File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "F:/python-machine-learning/CNN/mnist.py", line 53, in <module>
conv_net = basic_conv(n=1, epochs=1)
File "F:/python-machine-learning/CNN/mnist.py", line 47, in basic_conv
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
File "network.py", line 95, in __init__
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
File "network.py", line 273, in set_inpt
input_shape=self.image_shape)
所以似乎抱怨的是我用10号小批量训练,但我试图通过只有1项的4d阵列。什么是解决这个问题的正确方法?
另外,最奇怪的是我在CPU上训练时遇到错误。当我在GPU上训练完全相同的网络时,没有这样的错误,我得到了功能映射图没有问题。我不确定为什么会出现这种差异...
编辑:
在ConvPoolLayer之后,网络末端有完全连接的和softmax层:
完全连接:
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_in), size=(n_in, n_out)),
dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(
T.dot(self.inpt_dropout, self.w) + self.b)
def accuracy(self, y):
"Return the accuracy for the mini-batch."
return T.mean(T.eq(y, self.y_out))
Soft max:
class SoftmaxLayer(object):
def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(
np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.zeros((n_out,), dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1) # Predicted class
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def cost(self, net):
"Return the log-likelihood cost."
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
def accuracy(self, y):
"Return the accuracy for the mini-batch."
return T.mean(T.eq(y, self.y_out))
最后,这是stochastc梯度下降的代码:
def SGD(self, training_data, epochs, mini_batch_size, alpha,
validation_data, test_data, lmbda=0.0):
"""Train the network using mini-batch stochastic gradient descent."""
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
# compute number of minibatches for training, validation and testing
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data)/mini_batch_size
# define (regularized) cost function, symbolic gradients, and updates
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+\
0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params)
updates = [(param, param - alpha*grad)
for param, grad in zip(self.params, grads)]
# define functions to train a mini-batch, and to compute the
# accuracy in validation and test mini-batches.
i = T.lscalar() # mini-batch index
train_mb = theano.function(
[i], cost, updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
validate_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
test_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
self.test_mb_predictions = theano.function(
[i], self.layers[-1].y_out,
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
# Do the actual training
best_validation_accuracy = 0.0
start_time = time.time()
for epoch in xrange(epochs):
for minibatch_index in xrange(num_training_batches):
iteration = num_training_batches * epoch + minibatch_index
cost_ij = train_mb(minibatch_index)
if (iteration+1) % num_training_batches == 0:
validation_accuracy = np.mean(
[validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
if validation_accuracy >= best_validation_accuracy:
best_validation_accuracy = validation_accuracy
best_iteration = iteration
if test_data:
test_accuracy = np.mean(
[test_mb_accuracy(j) for j in xrange(num_test_batches)])