Question

我知道这是一个非常广泛的问题，但我已经提出了许多其他问题，我仍然无法正确实现简单动态k最大池化卷积神经网络，如{ {3}}论文。目前，我正在尝试修改this教程中的代码。我相信我已经成功实现了dynamic-k部分。但是，我的主要问题是因为每个输入的k值不同，所产生的张量是不同的形状。我尝试了无数的事情来解决这个问题（这就是为什么你可能会看到一些有趣的重塑），但我无法弄清楚如何。我认为你需要填充每个张量以使它们都是最大的张量，但我似乎无法让它工作。这是我的代码（对不起，它通常相当邋））。

# train.py
import datetime
import time

import numpy as np
import os
import tensorflow as tf
from env.src.sentiment_analysis.dcnn.text_dcnn import TextDCNN
from env.src.sentiment_analysis.cnn import data_helpers as data_helpers
from tensorflow.contrib import learn

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 256, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 200, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
tf.flags.DEFINE_string("positive_file", "../rotten_tomatoes/rt-polarity.pos", "Location of the rt-polarity.pos file")
tf.flags.DEFINE_string("negative_file", "../rotten_tomatoes/rt-polarity.neg", "Location of the rt-polarity.neg file")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

print("\nParameters:")

for attr, value in sorted(FLAGS.__flags.items()):
    print("{} = {}".format(attr.upper(), value))

print("")


# Data Preparatopn

# Load data
print("Loading data...")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_file, FLAGS.negative_file)

# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))

x_arr = np.array(x_text)

seq_lens = []

for s in x_arr:
    seq_lens.append(len(s.split(" ")))

# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

# Split train/test set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]

print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement
    )
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        print("HERE")
        print(x_train.shape)
        dcnn = TextDCNN(
            sequence_lengths=seq_lens,
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
        )

        # The training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = optimizer.compute_gradients(dcnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.scalar_summary("loss", dcnn.loss)
        acc_summary = tf.scalar_summary("accuracy", dcnn.accuracy)

        # Summaries for training
        train_summary_op = tf.merge_summary([loss_summary, acc_summary])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)

        # Summaries for devs
        dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)

        # Checkpointing
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")

        # TensorFlow assumes this directory already exsists so we need to create it
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.all_variables())

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.initialize_all_variables())

        def train_step(x_batch, y_batch):
            """
            A single training step.
            Args:
                x_batch: A batch of X training values.
                y_batch: A batch of Y training values

            Returns: void
            """

            feed_dict = {
                dcnn.input_x: x_batch,
                dcnn.input_y: y_batch,
                dcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }

            # Execute train_op
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, dcnn.loss, dcnn.accuracy],
                feed_dict
            )

            # Print and save to disk loss and accuracy of the current training batch
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates a model on a dev set.
            Args:
                x_batch: A batch of X training values.
                y_batch: A batch of Y training values.
                writer: The writer to use to record the loss and accuracy

            Returns: void
            """
            feed_dict = {
                dcnn.input_x: x_batch,
                dcnn.input_y: y_batch,
                dcnn.dropout_keep_prob : 1.0
            }

            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, dcnn.loss, dcnn.accuracy],
                feed_dict
            )

            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

        # Generate batches
        batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

这是实际的DCNN类：

import tensorflow as tf


class TextDCNN(object):
    """
    A CNN for NLP tasks. Architecture is as follows:
    Embedding layer, conv layer, max-pooling and softmax layer
    """

    def __init__(self, sequence_lengths, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters):
        """
        Makes a new CNNClassifier
        Args:
            sequence_length: The length of each sentence
            num_classes: Number of classes in the output layer (positive and negative would be 2 classes)
            vocab_size: The size of the vocabulary, needed to define the size of the embedding layer
            embedding_size: Dimensionality of the embeddings
            filter_sizes: Number of words the convolutional filters will cover, there will be num_filters for each size
            specified.
            num_filters: The number of filters per filter size.

        Returns: A new CNNClassifier with the given parameters.

        """
        # Define the inputs and the dropout
        print("SEQL")
        print(sequence_length)
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Runs the operations on the CPU and organizes them into an embedding scope
        with tf.device("/cpu:0"), tf.name_scope("embedding"):
            W = tf.Variable(  # Make a 4D tensor to store batch, width, height, and channel
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W"
            )

            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Conv layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                # W is the filter matrix
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv"
                )

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Max-pooling layer over the outputs

                print(sequence_lengths[i] - filter_size + 1)
                print(h)

                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_lengths[i] - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool"
                )

                pooled = tf.reshape(pooled, [-1, 1, 1, num_filters])

                print(pooled)

                pooled_outputs.append(pooled)

        # Combine all of the pooled features
        num_filters_total = num_filters * len(filter_sizes)

        max_shape = tf.reduce_max(pooled_outputs, 1)
        print("shapes")
        print([p.get_shape() for p in pooled_outputs])

        # pooled_outputs = [tf.pad(p, [[0, int(max_shape.get_shape()[0]) - int(p.get_shape()[0])], [0, 0], [0, 0], [0, 0]]) for p in pooled_outputs]
        # pooled_outputs = [tf.reshape(p, [-1, 1, 1, num_filters]) for p in pooled_outputs]

        # pooled_outputs = [tf.reshape(out, [-1, 1, 1, self.max_length]) for out in pooled_outputs]

        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        print("here")
        print(self.h_pool_flat)
        self.h_pool_flat = tf.reshape(self.h_pool, [max(sequence_lengths), num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            # casted = tf.cast(self.dropout_keep_prob, tf.int32)
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
            self.h_drop = tf.reshape(self.h_drop, [-1, num_filters_total])

        # Do raw predictions (no softmax)
        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            # xw_plus_b(...) is just Wx + b matmul alias
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            # softmax_cross_entropy_with_logits(...) calculates cross-entropy loss
            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
            '''print("here")
            print(losses.get_shape())
            print(self.scores.get_shape())
            print(self.input_y.get_shape())'''
            self.loss = tf.reduce_mean(losses)

        # Calculate accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

我正在使用Rotten Tomatoes情绪标记数据集。我得到的当前错误是：

InvalidArgumentError (see above for traceback): input[1,0] mismatch: 5888 vs. 4864
     [[Node: gradients/concat_grad/ConcatOffset = ConcatOffset[N=3, _device="/job:localhost/replica:0/task:0/cpu:0"](concat/concat_dim, gradients/concat_grad/ShapeN, gradients/concat_grad/ShapeN:1, gradients/concat_grad/ShapeN:2)]]

如何修复此代码，以便在汇集后将所有张量标准化为相同的大小（同时保持池动态）并使代码运行完成？

对于所有随机注释掉的行和打印件以及其他内容感到抱歉，但我已经尝试过广泛的工作。

Answer 1

这里有三点需要注意。

max-pooling和k-max pooling是两种不同的操作。

max-pooling从池化窗口中检索最大值激活，而k-max池从池化窗口中检索k个最大值。

截至目前，Tensorflow不提供k-max池的API。唯一的那个您现在正在尝试的是最大池操作而不是k-max 汇集业务。
据我所知，tensorflow不提供处理池的功能，从而导致不同大小的矩阵。因此，您可以使用bucketing to create batches of sentences of similar length并使用k-max池。

Answer 2

尽管tensorflow没有直接提供k-max池，但我认为tf.nn.top_k可能会帮助您构建该操作。

如何使用TensorFlow

2 个答案: