Question

我正在尝试在Tensorflow中实现LSTM，但我对输入形状有问题。

我有一个语料库，我将其标记并删除标点符号，然后我以这种方式制作序列：

length = 50 + 1
sequences = list()
for i in range(length, len(tokens)):
    # select sequence of tokens
    seq = tokens[i-length:i]
    # convert into a line
    line = ' '.join(seq)
    # store
    sequences.append(line)
print('Total Sequences: %d' % len(sequences))

然后我将这些序列划分为火车并进行测试，如下所示：

sequences = array(sequences)
print(len(sequences))
print(sequences)
sequences_train = sequences[:10000] #17314
sequences_test = sequences[10000:12000]
X_train, y_train = sequences_train[:,:-1], sequences_train[:,-1]
X_test, y_test = sequences_test[:,:-1], sequences_test[:,-1]
y = to_categorical(y_train, num_classes=vocab_size)

然后我创建了神经网络：

num_input = 50
timesteps = 48 
num_hidden = 128 # hidden layer num of features
num_classes = 48 
num_features = X_train.shape[1]
#num_classes = 1


# tf Graph input
X = tf.placeholder(tf.float32, [None, timesteps, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])

# Define weights
weights = {
'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([num_classes]))
}

def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)


    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# network params 
batch_size = 48 
max_epochs = 30 
display_step = 1
num_epochs = 0
epoch = 0

# batch sampling
np.random.seed(0)
train_indices = np.arange(len(X_train))

# lists 
train_cost, val_cost, pred_undrsamp = [],[],[]
y_out_train, train_batch_x, y_batch_idx, y_out_training = [],[],[],[]

# restricting memory usage, TensorFlow is greedy and will use all memory otherwise
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as sess:    
    try:
        sess.run(tf.global_variables_initializer())
        print("Starting...\n")
        print(str("---------------")*7)

        # total batches
        total_batch = int(X_train.shape[0]/batch_size)
        for epoch in range(max_epochs):

            # shuffling
            np.random.shuffle(train_indices)

            # loop over all batches            
            for i in range(total_batch):

                batch_idx = train_indices[batch_size*i:batch_size*(i+1)]
                batch_xs = X_train[batch_idx]
                batch_ys = y_train[batch_idx]
                batch_xs = batch_xs.reshape((1, timesteps, num_input))
                batch_ys = batch_ys.reshape((1, 48))

                feed_dict_train = {X: batch_xs, Y: batch_ys}
                fetches_train = [train_op, loss_op, prediction]
                #print(X.shape, batch_xs.shape, Y.shape, batch_ys.shape)

                # running the train_op
                res = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)


                train_cost += [res[1]]   
                y_out_train = list(res[2])

                # storing for evaluation 
                y_batch_idx += list(batch_ys)
                y_out_training += list(y_out_train)

            # compute validation loss and accuracy
            if epoch % display_step == 0:
            num_epochs += 1

                ### VALIDATING ###
                # deciding which parts to fetch
                fetches_test = [loss_op, prediction]

                #X_test = X_test.reshape((1, timesteps, num_input))
                #y_test = y_test.reshape((1, 48))

                # what to feed our accuracy op
                feed_dict_valid = {X: X_test, Y: y_test}
                print(X.shape, X_test.shape, Y.shape, y_test.shape)

                # running the validation
                res_t = sess.run(fetches_test, feed_dict=feed_dict_valid)

                # storing for evaluation 
                val_cost += [res_t[0]]
                pred_undrsamp = list(res_t[1]) # y_out validation


                print("Epoch %i, Train cost: %0.3f,\t Val cost: %0.3f, Val ROC curve (area = %0.3f)\t" % ((epoch+1), train_cost[-1],val_cost[-1], roc_auc_score(y_test, pred_undrsamp)))

        print(str("---------------")*7, "\n\n","Optimization Finished!")
        print("\n","Total epochs: %i\t Final validation ROC_AUC: %0.3f\t"% \
              ((epoch+1),roc_auc_score(y_test, pred_undrsamp)),"\n")
        print(get_plot_ROC(y_test, pred_undrsamp, 'Feedforward NN using RandomUnderSampling'))



    except KeyboardInterrupt:
        print('KeyboardInterrupt')

print('Done')

我有两个问题：如果我在训练期间没有进行测试，我的train_cost很大，但程序会运行，否则，如果我测试网络（所以我不会评论这个：< / p>

fetches_test = [loss_op, prediction]

#X_test = X_test.reshape((1, timesteps, num_input))
#y_test = y_test.reshape((1, 48))


feed_dict_valid = {X: X_test, Y: y_test}

res_t = sess.run(fetches_test, feed_dict=feed_dict_valid)

# storing for evaluation 
val_cost += [res_t[0]]
pred_undrsamp = list(res_t[1]) # y_out validation

我收到此错误：

InvalidArgumentError                      Traceback (most recent call last)
~\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:

~\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 

~\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
     65             try:
---> 66                 next(self.gen)
     67             except StopIteration:

 ~\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
    465           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466           pywrap_tensorflow.TF_GetCode(status))
    467   finally:

InvalidArgumentError: You must feed a value for placeholder tensor 
'Placeholder' with dtype float and shape [?,48,50]
     [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[?,48,50], 
_device="/job:localhost/replica:0/task:0/cpu:0"]()]]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-24-018940a98f31> in <module>()
     50 
     51                 # running the train_op
 ---> 52                 res = sess.run(fetches=fetches_train, 
feed_dict=feed_dict_train)
     53 
     54                 # storing cross entropy, predictions (second fetch 
argument, so index=1)

Answer 1

您正在训练之前重新训练火车数据

你不应该为x_test和y_test做同样的事情（你已经注释了这段代码）。

将此代码移出循环并取消注释。

您必须为占位符张量值占位符＆＃39; Placeholder＆＃39; dtype浮点数和形状[？，48,50]

1 个答案: