我正在使用一些小数据(6列火车,1个val和3个测试实例)运行在[1]发布的cnn代码。
[1] https://github.com/vsl9/Sentiment-Analysis-with-Convolutional-Networks
以下是经过轻微修改的代码:
print "loading data..."
x = cPickle.load(open("imdb-train-val-test.pickle", "rb"))
revs, W, word_idx_map, vocab = x[0], x[1], x[2], x[3]
# dimensions are as follows:
# W.shape = (145, 300)
# word_idx_map.len = 144
# vocab.len = 144
print "data loaded!"
max_l = np.max(pd.DataFrame(revs)['num_words'])
datasets = make_idx_data(revs, word_idx_map, max_l=max_l, kernel_size=5)
将列车数据放入单独的NumPy阵列
# Train data preparation
N = datasets[0].shape[0]
conv_input_width = W.shape[1]
conv_input_height = int(datasets[0].shape[1]-1)
# For each word write a word index (not vector) to X tensor
train_X = np.zeros((N, conv_input_height), dtype=np.int)
train_Y = np.zeros((N, 2), dtype=np.int)
for i in xrange(N):
for j in xrange(conv_input_height):
train_X[i, j] = datasets[0][i, j]
train_Y[i, datasets[0][i, -1]] = 1
print 'train_X.shape = {}'.format(train_X.shape)
print 'train_Y.shape = {}'.format(train_Y.shape)
将验证数据放在单独的NumPy数组中
# Validation data preparation
Nv = datasets[1].shape[0]
# For each word write a word index (not vector) to X tensor
val_X = np.zeros((Nv, conv_input_height), dtype=np.int)
val_Y = np.zeros((Nv, 2), dtype=np.int)
for i in xrange(Nv):
for j in xrange(conv_input_height):
val_X[i, j] = datasets[1][i, j]
val_Y[i, datasets[1][i, -1]] = 1
print 'val_X.shape = {}'.format(val_X.shape)
print 'val_Y.shape = {}'.format(val_Y.shape)
使用Keras定义和编译CNN模型
# Number of feature maps (outputs of convolutional layer)
N_fm = 300
# kernel size of convolutional layer
kernel_size = 5
model = Sequential()
# Embedding layer (lookup table of trainable word vectors)
model.add(Embedding(input_dim=W.shape[0],
output_dim=W.shape[1],
input_length=conv_input_height,
weights=[W],
W_constraint=unitnorm()))
# Reshape word vectors from Embedding to tensor format suitable for Convolutional layer
model.add(Reshape((1, conv_input_height, conv_input_width)))
# first convolutional layer
model.add(Convolution2D(N_fm,
kernel_size,
conv_input_width,
border_mode='valid',
W_regularizer=l2(0.0001)))
# ReLU activation
model.add(Activation('relu'))
# aggregate data in every feature map to scalar using MAX operation
model.add(MaxPooling2D(pool_size=(conv_input_height-kernel_size+1, 1)))
model.add(Flatten())
model.add(Dropout(0.01))
# Inner Product layer (as in regular neural network, but without non-linear activation function)
model.add(Dense(2))
# SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression
model.add(Activation('softmax'))
# Custom optimizers could be used, though right now standard adadelta is employed
opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-6)
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
训练N_epoch时代的模型
epoch = 0
val_acc = []
val_auc = []
N_epoch = 3
for i in xrange(N_epoch):
model.fit(train_X, train_Y, batch_size=1, nb_epoch=1, verbose=1)
output = model.predict_proba(val_X, batch_size=1, verbose=1)
# find validation accuracy using the best threshold value t
# ...
epoch += 1
print '{} epochs passed'.format(epoch)
print 'Accuracy on validation dataset:'
print val_acc
print 'AUC on validation dataset:'
print val_auc
我收到以下错误,我无法弄清楚问题是什么。请考虑我检查输入数据的尺寸和矩阵是否正确,我检查了几次!
$ KERAS_BACKEND=theano python train_cnn_imdb.py
Using Theano backend.
loading data...
(145, 300)
data loaded!
train_X.shape = (6, 41)
train_Y.shape = (6, 2)
val_X.shape = (1, 41)
val_Y.shape = (1, 2)
Epoch 1/1
Traceback (most recent call last):
File "train_cnn_imdb.py", line 155, in <module>
model.fit(train_X, train_Y, batch_size=1, nb_epoch=1, verbose=1)
File "/keras/models.py", line 640, in fit
sample_weight=sample_weight)
File "/keras/engine/training.py", line 1127, in fit
callback_metrics=callback_metrics)
File "/keras/engine/training.py", line 845, in _fit_loop
outs = f(ins_batch)
File "/keras/backend/theano_backend.py", line 811, in __call__
return self.function(*inputs)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873, in __call__
self.fn() if output_subset is None else\
RuntimeError: BaseCorrMM: Failed to allocate output of 1 x 300 x -3 x -258
Apply node that caused the error: CorrMM{valid, (1, 1), (1, 1)}(InplaceDimShuffle{0,2,x,1}.0, Subtensor{::, ::, ::int64, ::int64}.0)
Toposort index: 43
Inputs types: [TensorType(float32, (False, False, True, False)), TensorType(float32, 4D)]
Inputs shapes: [(1, 300, 1, 41), (300, 300, 5, 300)]
Inputs strides: [(1200, 4, 49200, 1200), (4, 1200, -108000000, -360000)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[InplaceDimShuffle{0,2,3,1}(CorrMM{valid, (1, 1), (1, 1)}.0)]]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "train_cnn_imdb.py", line 124, in <module>
W_regularizer=l2(0.0001)))
File "/keras/models.py", line 325, in add
output_tensor = layer(self.outputs[0])
File "/keras/engine/topology.py", line 514, in __call__
self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
File "/keras/engine/topology.py", line 572, in add_inbound_node
Node.create_node(self, inbound_layers, node_indices, tensor_indices)
File "/keras/engine/topology.py", line 149, in create_node
output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
File "/keras/layers/convolutional.py", line 464, in call
filter_shape=self.W_shape)
File "/keras/backend/theano_backend.py", line 1342, in conv2d
filter_shape=filter_shape)
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.