由于输入是AdamOptimizer.compute_gradients

时间:2017-01-28 22:54:33

标签: python python-3.x optimization machine-learning tensorflow

我不确定原因,但在我的compute_gradients上调用AdamOptimizer并且学习率设置为1e-4时,我收到以下错误:

ValueError: input has 102144 elements, which isn't divisible by 91008

以下是相关的代码段:

    optimizer = tf.train.AdamOptimizer(1e-4)
    print(dcnn.loss)
    grads_and_vars = optimizer.compute_gradients(dcnn.loss)

该打印语句打印出来:

Tensor("loss/Mean:0", shape=(), dtype=float32)

我理解这个错误意味着什么(损失是错误的大小)。但是,如何解决此问题。

这是我的完整代码(我正在尝试构建动态卷积神经网络):

# train.py

import datetime
import time

import numpy as np
import os
import tensorflow as tf
from env.src.sentiment_analysis.dcnn.text_dcnn import TextDCNN
from env.src.sentiment_analysis.cnn import data_helpers as data_helpers
from tensorflow.contrib import learn

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 200, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
tf.flags.DEFINE_string("positive_file", "../rotten_tomatoes/rt-polarity.pos", "Location of the rt-polarity.pos file")
tf.flags.DEFINE_string("negative_file", "../rotten_tomatoes/rt-polarity.neg", "Location of the rt-polarity.neg file")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

print("\nParameters:")

for attr, value in sorted(FLAGS.__flags.items()):
    print("{} = {}".format(attr.upper(), value))

print("")


# Data Preparatopn

# Load data
print("Loading data...")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_file, FLAGS.negative_file)

# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))

x_arr = np.array(x_text)

seq_lens = []

for s in x_arr:
    seq_lens.append(len(s))

# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

# Split train/test set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]

print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement
    )
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        dcnn = TextDCNN(
            sequence_lengths=seq_lens,
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
        )

        # The training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = optimizer.compute_gradients(dcnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.scalar_summary("loss", dcnn.loss)
        acc_summary = tf.scalar_summary("accuracy", dcnn.accuracy)

        # Summaries for training
        train_summary_op = tf.merge_summary([loss_summary, acc_summary])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)

        # Summaries for devs
        dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)

        # Checkpointing
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")

        # TensorFlow assumes this directory already exsists so we need to create it
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.all_variables())

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.initialize_all_variables())

        def train_step(x_batch, y_batch):
            """
            A single training step.
            Args:
                x_batch: A batch of X training values.
                y_batch: A batch of Y training values

            Returns: void
            """

            print(dcnn.input_x)
            print(x_batch)
            print(dcnn.input_y)
            print(y_batch)

            feed_dict = {
                dcnn.input_x: x_batch,
                dcnn.input_y: y_batch,
                dcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }

            # Execute train_op
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, dcnn.loss, dcnn.accuracy],
                feed_dict
            )

            # Print and save to disk loss and accuracy of the current training batch
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates a model on a dev set.
            Args:
                x_batch: A batch of X training values.
                y_batch: A batch of Y training values.
                writer: The writer to use to record the loss and accuracy

            Returns: void
            """
            feed_dict = {
                dcnn.input_x: x_batch,
                dcnn.input_y: y_batch,
                dcnn.dropout_keep_prob : 1.0
            }

            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, dcnn.loss, dcnn.accuracy],
                feed_dict
            )

            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

        # Generate batches
        batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

这是第二个文件(对于缩进感到抱歉)。

# text_dcnn.py

import tensorflow as tf

class TextDCNN(object):

"""
A CNN for NLP tasks. Architecture is as follows:
Embedding layer, conv layer, max-pooling and softmax layer
"""

def __init__(self, sequence_lengths, num_classes, vocab_size, embedding_size, filter_sizes, num_filters):
    """
    Makes a new CNNClassifier
    Args:
        sequence_length: The length of each sentence
        num_classes: Number of classes in the output layer (positive and negative would be 2 classes)
        vocab_size: The size of the vocabulary, needed to define the size of the embedding layer
        embedding_size: Dimensionality of the embeddings
        filter_sizes: Number of words the convolutional filters will cover, there will be num_filters for each size
        specified.
        num_filters: The number of filters per filter size.

    Returns: A new CNNClassifier with the given parameters.

    """
    # Define the inputs and the dropout
    self.max_length = max([l for l in sequence_lengths])

    self.input_x = tf.placeholder(tf.int32, [None, self.max_length], name="input_x")
    self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
    self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

    # Runs the operations on the CPU and organizes them into an embedding scope
    with tf.device("/cpu:0"), tf.name_scope("embedding"):
        W = tf.Variable(  # Make a 4D tensor to store batch, width, height, and channel
            tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
            name="W"
        )

        self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
        self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

    pooled_outputs = []
    for i, filter_size in enumerate(filter_sizes):
        with tf.name_scope("conv-maxpool-%s" % filter_size):
            # Conv layer
            filter_shape = [filter_size, embedding_size, 1, num_filters]
            # W is the filter matrix
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(
                self.embedded_chars_expanded,
                W,
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="conv"
            )

            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

            # Max-pooling layer over the outputs
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, sequence_lengths[i] - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="pool"
            )
            pooled_outputs.append(pooled)

    # Combine all of the pooled features
    num_filters_total = num_filters * len(filter_sizes)

    pooled_outputs = [tf.reshape(out, [-1, 94, 1, self.max_length]) for out in pooled_outputs]

    self.h_pool = tf.concat(3, pooled_outputs)

    # self.h_pool = tf.concat(3, pooled_outputs)
    self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

    # Add dropout
    with tf.name_scope("dropout"):
        # casted = tf.cast(self.dropout_keep_prob, tf.int32)
        self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

    # Do raw predictions (no softmax)
    with tf.name_scope("output"):
        W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
        # xw_plus_b(...) is just Wx + b matmul alias
        self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
        self.predictions = tf.argmax(self.scores, 1, name="predictions")

    # Calculate mean cross-entropy loss
    with tf.name_scope("loss"):
        # softmax_cross_entropy_with_logits(...) calculates cross-entropy loss
        losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
        self.loss = tf.reduce_mean(losses)

    # Calculate accuracy
    with tf.name_scope("accuracy"):
        correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

我使用的训练数据是标记为正面和负面电影评论的烂番茄数据集。

1 个答案:

答案 0 :(得分:0)

这听起来像是一个问题self.embedded_chars = tf.nn.embedding_lookup(W,self.input_x)

烂番茄数据集的输入可能不适合您的占位符变量之一。我会仔细检查一下。