在cnn训练张量流中的滤波器矩阵中的nan

时间:2017-01-20 09:13:34

标签: python tensorflow convolution

我正在尝试使用this链接在tensorflow中实现cnn文本分类。 但是,我没有学习嵌入权重,而是已经提供了预定义的嵌入。 我的输入尺寸为33 * 160。

文字CNN

import tensorflow as tf
import re


class TextCNN(object):
    def __init__(self, filter_sizes, num_filters, vec_shape, l2_reg_lambda=0.0, num_classes=2):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, vec_shape[0], vec_shape[1]], name="X_train")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="Y_train")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        l2_loss = tf.constant(0.0)
        self.input_x_expanded = tf.expand_dims(self.input_x, -1)
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, vec_shape[1], 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                self.weight = W
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.input_x_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                # TODO replace relu with tanh
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, vec_shape[0] - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

            # CalculateMean cross-entropy loss
            with tf.name_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

            # Accuracy
            with tf.name_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

代码解释

vec_shape =输入的尺寸,即33 * 160。

训练:

def train_step(x_text_train, y_batch):
            feed_dict = {
                cnn.input_x: x_text_train,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: CNN.FLAGS.dropout_keep_prob
            }
            _, step, loss, accuracy, weight, scores = sess.run(
                [train_op, global_step, cnn.loss, cnn.accuracy, cnn.weight, cnn.scores],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print(grads_and_vars)
            print(scores)
            print(weight)
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

batch_iter = CNN.get_batches()
        for batch in batch_iter:
            X_train , y_train = zip(*batch)
            train_step(X_train, y_train)
            current_step = tf.train.global_step(sess, global_step)

经过一些训练步骤(大约48)后,变量权重变为nan。

在步骤47,权重变量是

[[[[ 0.10174427  0.03087347  0.04683167 ..., -0.13801393 -0.04721565
    -0.15926319]]

  [[ 0.0639746   0.03126816  0.18144369 ..., -0.03434205 -0.09856486
    -0.00755746]]]]

在步骤48,权重变量是

[[[[        nan         nan         nan ...,         nan         nan
            nan]]
 [[        nan         nan         nan ...,         nan         nan
            nan]]]]

0 个答案:

没有答案