如何在张量流中使用python3预测LSTM模型中的情绪?

时间:2017-08-07 14:19:59

标签: python-3.x testing tensorflow lstm rnn

wordsList = np.load('training_data/wordsList.npy')
wordsList = wordsList.tolist() #Originally loaded as numpy array
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load('training_data/wordVectors.npy')

在变量

中加载了一些positiveFiles和negativeFiles
with tf.device('/gpu:0'):
    ids = np.zeros((numFiles, maxSeqLength), dtype='int32')
    fileCounter = 0
    for pf in positiveFiles:
       with open(pf, "r") as f:
           indexCounter = 0
           line=f.readline()
           cleanedLine = cleanSentences(line)
           split = cleanedLine.split()
           for word in split:
               try:
                   ids[fileCounter][indexCounter] = wordsList.index(word)
               except ValueError:
                   ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
               #print('value :' + str(ids))
               indexCounter = indexCounter + 1
               if indexCounter >= maxSeqLength:
                   break
           fileCounter = fileCounter + 1

    for nf in negativeFiles:
       with open(nf, "r") as f:
           indexCounter = 0
           line=f.readline()
           cleanedLine = cleanSentences(line)
           split = cleanedLine.split()
           for word in split:
               try:
                   ids[fileCounter][indexCounter] = wordsList.index(word)
               except ValueError:
                   ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
              # print('value :' + str(ids))
               indexCounter = indexCounter + 1
               if indexCounter >= maxSeqLength:
                   break
           fileCounter = fileCounter + 1
    #Pass into embedding function and see if it evaluates.

np.save('idsMatrix', ids)

batchSize = 24

培训和测试方法

def getTrainBatch():
    labels = []
    arr = np.zeros([batchSize, maxSeqLength])
    for i in range(batchSize):
        if (i % 2 == 0):
            num = randint(1,11499)
            labels.append([1,0])
        else:
            num = randint(13499,24999)
            labels.append([0,1])
        arr[i] = ids[num-1:num]
    return arr, labels

def getTestBatch():
    labels = []
    arr = np.zeros([batchSize, maxSeqLength])
    for i in range(batchSize):
        num = randint(11499,13499)
        if (num <= 12499):
            labels.append([1,0])
        else:
            labels.append([0,1])
        arr[i] = ids[num-1:num]
    return arr, labels

with tf.device('/gpu:0'):
    batchSize = 24
    lstmUnits = 64
    numClasses = 2
    iterations = 100000

    tf.reset_default_graph()

    labels = tf.placeholder(tf.float32, [batchSize, numClasses])
    input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])

    data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]), dtype=tf.float32)
    data = tf.nn.embedding_lookup(wordVectors, input_data)

    lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
    lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
    value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

with tf.device('/gpu:0'):
    weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
    bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
    value = tf.transpose(value, [1, 0, 2])
    last = tf.gather(value, int(value.get_shape()[0]) - 1)
    prediction = (tf.matmul(last, weight) + bias)

correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(loss)

sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

with tf.device('/gpu:0'):
    for i in range(iterations):
        nextBatch, nextBatchLabels = getTrainBatch();
        sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels})

iterations = 10
for i in range(iterations):
    nextBatch, nextBatchLabels = getTestBatch();
    sess.run(accuracy, {input_data: nextBatch, labels: nextBatchLabels})

这里我试图以给定句子的1或0形式预测输出。 从这个检查点加载这个文件之后......我怎么测试句子是肯定的(1)还是负的(0)。

new_saver = tf.train.import_meta_graph('models/pretrained....')
new_saver.restore(sess, tf.train.latest_checkpoint('models/./')) 

请帮忙。

1 个答案:

答案 0 :(得分:0)

对输入和输出使用命名,然后从图中检索张量以进行预测;我已经提出了一些必要的更改和额外的代码来进行预测

...
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength], name='inputs')
...
prediction = (tf.matmul(last, weight) + bias)
# you may use softmax if you want probabilities for prediction, but not for calculating the loss
# prediction = tf.nn.softmax(prediction)
prediction = tf.identity(prediction, name='prediction')
...
with tf.device('/gpu:0'):
    for i in range(iterations):
        nextBatch, nextBatchLabels = getTrainBatch();
        sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}
    saver.save(sess, 'model')

恢复代码:这里使用model.meta和model

的相对/绝对路径
new_saver = tf.train.import_meta_graph('/path/to/model.meta')
new_saver.restore(sess, '/path/to/model') 
with tf.Session() as sess:
    g = tf.get_default_graph()
    inputs = g.get_tensor_by_name('inputs:0')
    prediction = g.get_tensor_by_name('prediction:0')
    prediction_ = sess.run(prediction, {inputs: your_inputs})