Tensorflow中的InvalidArgumentError:断言失败[标签ID必须> = 0] [条件x> 0没有按元素持有]

时间:2018-05-16 16:24:49

标签: tensorflow machine-learning

我在使用Python 2.7在Tensorflow 1.4.0中实现DNNLinearCombinedClassifier时遇到错误。我从Tensorflow tf.estimator快速入门教程中获取了示例代码,我想用自己的数据集运行它:comment和82个不同的类(字符串标签)。这是我的实施:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import multiprocessing
import six
import tensorflow as tf

# Define the format of your input data including unused columns
CSV_COLUMNS = ['comment_english', 'comment_sentiment', 'keyword',
               'syntax_lemma', 'section_name',
               'section_code']
CSV_COLUMN_DEFAULTS = [[''], [''], [''], [''], [''], ['']]
LABEL_COLUMN = 'section_code'
LABELS = [' TS4CFS6', ' TS3CFS6', ' TS6CFS6', ' TS7CSQ4', ' TS7CT3', ' TS2CC8', ' TS2CC4',
          ' TS7CT2', ' TS7CSQ2', ' TS2CP5', ' TS7CHP1', ' TS1CSQ4', ' TS2CC9', ' TS7CSQ1',
          ' TS2CC1', ' TS2CC5', ' TS3CFS2', ' TS4CFS2', ' TS6CFS2', ' TS3CFS1', ' TS1CSQ3',
          ' TS6CSQ2', ' TS2CC3', ' TS6CSQ4', ' TS2CC2', ' TS1CPS3', ' TS2CTR2',
          ' TS6CSQ1', ' TS1CPS1', ' TS1CSQ1', ' TS5CSQ2', ' TS3CSQ2', ' TS4CSQ2', ' TS3CTR1',
          ' TS6CFS1', ' TS6CSQ5', ' TS6CTR1', ' TS1CPS2', ' TS4CSQ4', ' TS3CFS3', ' TS6CFS3',
          ' TS4CFS3', ' TS5CFS3', ' TS3CSQ1', ' TS4CTR1', ' TS2CP2', ' TS1CPS5', ' TS7CT1',
          ' TS2CC6', ' TS7CT4', ' TS2CP4', ' TS5CTR1', ' TS4CSQ1', ' TS5CSQ1', ' TS2CP1',
          ' TS5CSQ4', ' TS5CSQ5', ' TS5CFS1', ' TS3CSQ4', ' TS7CSQ3', ' TS5CFS5', ' TS4CSQ3',
          ' TS1CTR1', ' TS2CTR1', ' TS2CC7', ' TS6CSQ3', ' TS5CSQ3', ' TS3CSQ3', ' TS1CSQ2',
          ' TS3CSQ5', ' TS3CFS4', ' TS5CFS4', ' TS4CFS4', ' TS6CFS4', ' TS5CFS2', ' TS4CSQ5',
          ' TS6CFS5', ' TS3CFS5', ' TS4CFS5', ' TS4CFS1', ' TS2CP3', ' TS1CPS4']

# Define the initial ingestion of each feature used by your model.
# Additionally, provide metadata about the feature.
INPUT_COLUMNS = [

    # For columns with a large number of values, or unknown values
    # We can use a hash function to convert to categories.
    tf.feature_column.categorical_column_with_hash_bucket(
        'comment_english', hash_bucket_size=1000, dtype=tf.string),


    # Categorical base columns

    # For categorical columns with known values we can provide lists
    # of values ahead of time.
    tf.feature_column.categorical_column_with_vocabulary_list(
        'comment_sentiment', [' NEGATIVE', ' POSITIVE', ' MIXED']),

    # For columns with a large number of values, or unknown values
    # We can use a hash function to convert to categories.
    tf.feature_column.categorical_column_with_hash_bucket(
        'keyword', hash_bucket_size=100, dtype=tf.string),
    tf.feature_column.categorical_column_with_hash_bucket(
        'syntax_lemma', hash_bucket_size=100, dtype=tf.string)

UNUSED_COLUMNS = set(CSV_COLUMNS) - {col.name for col in INPUT_COLUMNS} - {LABEL_COLUMN}

def build_estimator(config, embedding_size=4, hidden_units=None):
    """Build a wide and deep model for predicting income category.

    To define model we can use the prebuilt DNNCombinedLinearClassifier class,
    and need only define the data transformations particular to our dataset, and
    then
    assign these (potentially) transformed features to either the DNN, or linear
    regression portion of the model.

    Args:
      config: tf.contrib.learn.RunConfig defining the runtime environment for the
        estimator (including model_dir).
      embedding_size: int, the number of dimensions used to represent categorical
        features when providing them as inputs to the DNN.
      hidden_units: [int], the layer sizes of the DNN (input layer first)
      learning_rate: float, the learning rate for the optimizer.
    Returns:
      A DNNCombinedLinearClassifier
    """
    (comment_english, comment_sentiment, keyword, syntax_lemma) = INPUT_COLUMNS
    # Build an estimator.

    # Reused Transformations.
    # Continuous columns can be converted to cate
    # gorical via bucketization
    # age_buckets = tf.feature_column.bucketized_column(
    #     age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

    # Wide columns and deep columns.
    wide_columns = [
        # Interactions between different categorical features can also
        # be added as new virtual features.
        # tf.feature_column.crossed_column(
        #     ['comment_english', 'keyword', 'syntax_lemma'], hash_bucket_size=int(1e6)),
        tf.feature_column.crossed_column(
            ['keyword', 'syntax_lemma'], hash_bucket_size=int(1e4)),
        comment_english,
        comment_sentiment,
        keyword,
        syntax_lemma,
    ]

    deep_columns = [
        # Use indicator columns for low dimensional vocabularies
        tf.feature_column.indicator_column(comment_english),
        tf.feature_column.indicator_column(comment_sentiment),

        # Use embedding columns for high dimensional vocabularies
        tf.feature_column.embedding_column(
            keyword, dimension=embedding_size),
        tf.feature_column.embedding_column(syntax_lemma, dimension=embedding_size)
    ]

    return tf.estimator.DNNLinearCombinedClassifier(
        config=config,
        linear_feature_columns=wide_columns,
        dnn_feature_columns=deep_columns,
        dnn_hidden_units=hidden_units or [100, 70, 50, 25],
    )

0 个答案:

没有答案