Question

我的神经网络代码最终会在大多数情况下输出带NaN的向量。代码如下：

from __future__ import division, print_function

from six.moves import xrange

import time

import os

from glob import glob

from zipfile import ZipFile, ZIP_DEFLATED

import numpy as np

import tensorflow as tf

## Defining variables which have to be provided by user

## Defining the number of units in the RNN. This is also the size of the word
## and document embeddings
num_units = 100

##The number of data elements in a batch
batch_size = 1

##The folder where the npz files with the numpy arrays are stored.
npz_files_folder = "npz_files"

## Name of the file to which we want the model to be saved
model_file = "rnn_trial"

## Number of labels sampled from the noise for NCE
num_sampled = 50

## The dropout probability for the NN
dropout = 0.2

## The learning rate for the optimizer
lr = 0.1

## The number of epochs
epochs = 10

## Reading in the list of npz files with vectors for each document
doc_files = sorted(glob(os.path.join(npz_files_folder, "*.npz")))

num_classes = num_docs = len(doc_files)


## The tensor for storing a batch of sentences where each sentence is a
## sequence of word embeddings. This is an input to the NN
sentences = tf.placeholder(tf.float32, [batch_size, None, num_units],
    name='sentences')


## The tensor for storing a batch of documents where each document is a
## sequence of sentence embeddings. This is an input to the NN
documents = tf.placeholder(tf.float32, [batch_size, None, num_units])

## The tensor for storing the labels for each batch of documents
labels = tf.placeholder(tf.float32, [batch_size])

## Here we define the LSTM used in the first layer
sent_lstm = tf.contrib.rnn.BasicLSTMCell(num_units)
sent_lstm = tf.contrib.rnn.DropoutWrapper(sent_lstm,
    output_keep_prob=1.0-dropout)

## We define the initial_state of the LSTM in first layer here
initial_state_sent_lstm = sent_lstm.zero_state(batch_size, tf.float32)

## Here we get the outputs and states from the first layer
outputs_lstm, states_lstm = tf.nn.dynamic_rnn(sent_lstm,
    inputs=sentences, initial_state=initial_state_sent_lstm)

## Here we define the forward GRU used in the second layer
doc_gru_fw = tf.contrib.rnn.GRUCell(num_units//2)
initial_state_doc_gru_fw = doc_gru_fw.zero_state(batch_size, tf.float32)

## Here we define the reverse GRU used in second layer.
doc_gru_bw = tf.contrib.rnn.GRUCell(num_units-num_units//2)
initial_state_doc_gru_bw = doc_gru_bw.zero_state(batch_size, tf.float32)

## Here we get the outputs and states from the second layer
outputs_gru, states_gru = tf.nn.bidirectional_dynamic_rnn(cell_fw=doc_gru_fw,
    cell_bw=doc_gru_bw, initial_state_fw=initial_state_doc_gru_fw,
    initial_state_bw=initial_state_doc_gru_bw,
    inputs=documents)
# outputs_gru, states_gru = tf.nn.bidirectional_dynamic_rnn(cell_fw=doc_gru_fw,
#     cell_bw=doc_gru_bw,
#     inputs=documents, dtype=tf.float32)


## The final document embeddings
final_output = tf.reduce_mean(tf.concat(outputs_gru, 2), axis=1)

sigmoid_W = tf.Variable(
    tf.truncated_normal([num_units, 1],
    stddev=1.0/np.sqrt(num_units)))
sigmoid_b = tf.Variable(tf.zeros([1], dtype=tf.float32))

logits = tf.matmul(final_output, sigmoid_W) + sigmoid_b

y_ = (num_docs - 1) * tf.sigmoid(tf.reshape(logits, [-1]))

loss = tf.reduce_sum(tf.square(y_ - labels))

## Defining the training step
train = tf.train.AdamOptimizer(lr).minimize(loss)

## Initializing the session
sess = tf.Session()

## Initializing the variables
sess.run(tf.global_variables_initializer())

t = time.time()

for n in xrange(epochs):
    result = False

    for j, doc in enumerate(doc_files):

        # if j==100:
        #     break

        try:
            npz_file = np.load(doc, allow_pickle=False)
        except ValueError:
            continue

        train_label = np.array([j])

        sent_files = sorted(npz_file.files)

        temp_doc = np.array([])
        temp_doc = np.reshape(temp_doc, (0, num_units))

        for i, sent_file in enumerate(sent_files):
            sent_input = np.reshape(npz_file[sent_file], (1, -1, num_units))

            if 0 in sent_input.shape:
                continue

            output_1 = sess.run(outputs_lstm, 
                                feed_dict={sentences: sent_input})

            sent_embed = output_1[:, -1:]

            temp_doc = np.concatenate([temp_doc] + list(sent_embed), 0)

        ## Training the model
        temp_doc = np.array([temp_doc])
        _, doc_vector = sess.run([train, final_output], feed_dict={
            documents: temp_doc, labels: train_label})

        if np.isnan(np.sum(doc_vector)):
            result = True

    print(result)
    print("Finished with epoch ", n)
    print()


doc_vecs_file_name = model_file + "_doc_vecs.zip"

with ZipFile(doc_vecs_file_name, 'w', ZIP_DEFLATED, True) as myzip:

    for doc in doc_files:
        # if doc_files.index(doc)==100:
        #     break

        try:
            npz_file = np.load(doc, allow_pickle=False)
        except ValueError:
            continue

        sent_files = sorted(npz_file.files)

        temp_doc = np.array([])
        temp_doc = np.reshape(temp_doc, (0, num_units))

        for i, sent_file in enumerate(sent_files):
            sent_input = np.reshape(npz_file[sent_file], (1, -1, num_units))

            if 0 in sent_input.shape:
                continue

            output_1 = sess.run(outputs_lstm, 
                                feed_dict={sentences: sent_input})

            sent_embed = output_1[:, -1:]

            temp_doc = np.concatenate([temp_doc] + list(sent_embed), 0)

        ## Training the model
        temp_doc = np.array([temp_doc])
        doc_vec = sess.run(final_output, feed_dict={documents: temp_doc})

        temp_file = doc.split(os.sep)[-1][:-4] + ".csv"
        np.savetxt(temp_file, doc_vec, delimiter=',')
        myzip.write(temp_file)
        os.remove(temp_file)



saver = tf.train.Saver()
saver.save(sess, model_file)

print("Time taken = ", (time.time() - t))

如果需要，我可以上传一个示例数据集，您可以使用它来尝试自己运行代码。使用该样本数据集，有时候训练完成后没有任何NaN进入。但是，大多数时候，NaN会在训练时弹出。

我使用的是anoronda发行版中的tensorflow版本1.1.0和python 2.7.13。

神经网络输出具有所有分量NaN的向量

0 个答案: