我正在尝试在Tensorflow中实现LSTM,但我对输入形状有问题。
我有一个语料库,我将其标记并删除标点符号,然后我以这种方式制作序列:
length = 50 + 1
sequences = list()
for i in range(length, len(tokens)):
# select sequence of tokens
seq = tokens[i-length:i]
# convert into a line
line = ' '.join(seq)
# store
sequences.append(line)
print('Total Sequences: %d' % len(sequences))
然后我将这些序列划分为火车并进行测试,如下所示:
sequences = array(sequences)
print(len(sequences))
print(sequences)
sequences_train = sequences[:10000] #17314
sequences_test = sequences[10000:12000]
X_train, y_train = sequences_train[:,:-1], sequences_train[:,-1]
X_test, y_test = sequences_test[:,:-1], sequences_test[:,-1]
y = to_categorical(y_train, num_classes=vocab_size)
然后我创建了神经网络:
num_input = 50
timesteps = 48
num_hidden = 128 # hidden layer num of features
num_classes = 48
num_features = X_train.shape[1]
#num_classes = 1
# tf Graph input
X = tf.placeholder(tf.float32, [None, timesteps, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([num_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, timesteps, n_input)
# Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
x = tf.unstack(x, timesteps, 1)
# Define a lstm cell with tensorflow
lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# network params
batch_size = 48
max_epochs = 30
display_step = 1
num_epochs = 0
epoch = 0
# batch sampling
np.random.seed(0)
train_indices = np.arange(len(X_train))
# lists
train_cost, val_cost, pred_undrsamp = [],[],[]
y_out_train, train_batch_x, y_batch_idx, y_out_training = [],[],[],[]
# restricting memory usage, TensorFlow is greedy and will use all memory otherwise
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as sess:
try:
sess.run(tf.global_variables_initializer())
print("Starting...\n")
print(str("---------------")*7)
# total batches
total_batch = int(X_train.shape[0]/batch_size)
for epoch in range(max_epochs):
# shuffling
np.random.shuffle(train_indices)
# loop over all batches
for i in range(total_batch):
batch_idx = train_indices[batch_size*i:batch_size*(i+1)]
batch_xs = X_train[batch_idx]
batch_ys = y_train[batch_idx]
batch_xs = batch_xs.reshape((1, timesteps, num_input))
batch_ys = batch_ys.reshape((1, 48))
feed_dict_train = {X: batch_xs, Y: batch_ys}
fetches_train = [train_op, loss_op, prediction]
#print(X.shape, batch_xs.shape, Y.shape, batch_ys.shape)
# running the train_op
res = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
train_cost += [res[1]]
y_out_train = list(res[2])
# storing for evaluation
y_batch_idx += list(batch_ys)
y_out_training += list(y_out_train)
# compute validation loss and accuracy
if epoch % display_step == 0:
num_epochs += 1
### VALIDATING ###
# deciding which parts to fetch
fetches_test = [loss_op, prediction]
#X_test = X_test.reshape((1, timesteps, num_input))
#y_test = y_test.reshape((1, 48))
# what to feed our accuracy op
feed_dict_valid = {X: X_test, Y: y_test}
print(X.shape, X_test.shape, Y.shape, y_test.shape)
# running the validation
res_t = sess.run(fetches_test, feed_dict=feed_dict_valid)
# storing for evaluation
val_cost += [res_t[0]]
pred_undrsamp = list(res_t[1]) # y_out validation
print("Epoch %i, Train cost: %0.3f,\t Val cost: %0.3f, Val ROC curve (area = %0.3f)\t" % ((epoch+1), train_cost[-1],val_cost[-1], roc_auc_score(y_test, pred_undrsamp)))
print(str("---------------")*7, "\n\n","Optimization Finished!")
print("\n","Total epochs: %i\t Final validation ROC_AUC: %0.3f\t"% \
((epoch+1),roc_auc_score(y_test, pred_undrsamp)),"\n")
print(get_plot_ROC(y_test, pred_undrsamp, 'Feedforward NN using RandomUnderSampling'))
except KeyboardInterrupt:
print('KeyboardInterrupt')
print('Done')
我有两个问题:如果我在训练期间没有进行测试,我的train_cost很大,但程序会运行,否则,如果我测试网络(所以我不会评论这个:< / p>
fetches_test = [loss_op, prediction]
#X_test = X_test.reshape((1, timesteps, num_input))
#y_test = y_test.reshape((1, 48))
feed_dict_valid = {X: X_test, Y: y_test}
res_t = sess.run(fetches_test, feed_dict=feed_dict_valid)
# storing for evaluation
val_cost += [res_t[0]]
pred_undrsamp = list(res_t[1]) # y_out validation
我收到此错误:
InvalidArgumentError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1326 try:
-> 1327 return fn(*args)
1328 except errors.OpError as e:
~\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1305 feed_dict, fetch_list, target_list,
-> 1306 status, run_metadata)
1307
~\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
~\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
InvalidArgumentError: You must feed a value for placeholder tensor
'Placeholder' with dtype float and shape [?,48,50]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[?,48,50],
_device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-24-018940a98f31> in <module>()
50
51 # running the train_op
---> 52 res = sess.run(fetches=fetches_train,
feed_dict=feed_dict_train)
53
54 # storing cross entropy, predictions (second fetch
argument, so index=1)
答案 0 :(得分:0)
您正在训练之前重新训练火车数据
你不应该为x_test和y_test做同样的事情(你已经注释了这段代码)。
将此代码移出循环并取消注释。