Question

我正在尝试使用sklearn中可用的20个新闻组数据集来训练LSTM进行增量学习（分类）。我使用了sklearn的TfidfVectorizer来预处理数据。然后我将生成的稀疏矩阵转换为numpy数组，然后再馈入。之后，对下面的行进行编码：

outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)

给出了一个错误，说“ inputs_”应具有3个维度。所以我用：

inputs_ = tf.expand_dims(inputs_, 0)

扩大尺寸。但是当我这样做时，我得到了错误：

ValueError：必须通过形状访问输入大小（输入深度）推断，但看到值None。

“ input_”的形状为：（1，134410）

我已经看过this帖子，但没有帮助。

我似乎无法理解如何解决此问题。任何帮助深表感谢。预先谢谢你！

下面显示的是我的完整代码：

import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer


def pre_process():

    newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

    vectorizer = TfidfVectorizer()
    features = vectorizer.fit_transform(newsgroups_data.data)

    lb = LabelBinarizer()
    labels = np.reshape(newsgroups_data.target, [-1])
    labels = lb.fit_transform(labels)

    return features, labels

def get_batches(x, y, batch_size=1):

    for ii in range(0, len(y), batch_size):
        yield x[ii:ii + batch_size], y[ii:ii + batch_size]


def plot_error(errorplot, datapoint, numberOfWrongPreds):
    errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
    errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
    errorplot.autoscale(enable=True, axis='both', tight=None)
    plt.draw()



def train_test():

    features, labels = pre_process()

    #Defining Hyperparameters

    epochs = 1
    lstm_layers = 1
    batch_size = 1
    lstm_size = 30
    learning_rate = 0.003

    print(lstm_size)
    print(batch_size)
    print(epochs)

    #--------------placeholders-------------------------------------

    # Create the graph object
    graph = tf.Graph()
    # Add nodes to the graph
    with graph.as_default():

        tf.set_random_seed(1)

        inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
        # labels_ = tf.placeholder(dtype= tf.int32)
        labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")

        #getting dynamic batch size according to the input tensor size

        # dynamic_batch_size = tf.shape(inputs_)[0]

        #output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.

        keep_prob = tf.placeholder(tf.float32, name = "keep_prob")

        # Your basic LSTM cell
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

        # Add dropout to the cell
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

        #Stack up multiple LSTM layers, for deep learning
        cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)

        # Getting an initial state of all zeros
        initial_state = cell.zero_state(batch_size, tf.float32)

        inputs_ = tf.expand_dims(inputs_, 0)

        outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_,  initial_state=initial_state)

        #hidden layer
        hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)

        logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)

        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))

        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)


        saver = tf.train.Saver()

    # ----------------------------online training-----------------------------------------

    with tf.Session(graph=graph) as sess:
        tf.set_random_seed(1)
        sess.run(tf.global_variables_initializer())
        iteration = 1
        state = sess.run(initial_state)
        wrongPred = 0
        errorplot, = plt.plot([], [])

        for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):

            feed = {inputs_: x.toarray(),
                    labels_: y,
                    keep_prob: 0.5,
                    initial_state: state}


            predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)

            print("----------------------------------------------------------")
            print("Iteration: {}".format(iteration))
            print("Prediction: ", predictions)
            print("Actual: ",y)

            pred = np.array(predictions)
            print(pred)
            print(y)

            if not ((pred==y).all()):
                wrongPred += 1

            if ii % 27 == 0:
                plot_error(errorplot,ii,wrongPred)

            loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)

            print("Train loss: {:.3f}".format(loss))
            iteration += 1

        saver.save(sess, "checkpoints/sentiment.ckpt")
        errorRate = wrongPred/len(labels)
        print("ERROR RATE: ", errorRate )

if __name__ == '__main__':
    train_test()

Answer 1

ValueError：输入大小（输入深度）必须可以通过形状推断来访问，但是看到的值是None。

出现此错误是因为您没有指定大小或输入数量。我让脚本像这样工作：

inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")


inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim,  initial_state=initial_state)

必须通过形状推断可访问输入大小（输入深度），但是在尝试将tf.expand_dims轴设置为0时，看到的值无错误。

1 个答案: