python - tensorflow - 输入到StringToHashBucketFast操作类型错误

时间:2016-09-29 20:12:39

标签: python tensorflow

我正在尝试在TensorFlow中训练一个LinearRegressor。我一直在研究网站上的教程,现在正尝试将其应用到我自己的数据集中。

进行了许多更改后发生了类似的错误,特别是围绕VS预期传递的数据类型。

import pandas as pd
import tempfile
COLUMNS = ['imp_time', 'width', 'height', 
           'geo_region', 'venue_id', 'seller_member_id', 
           'site_domain', 'tag_id', 'geo_city', 'fold_position', 'event_type']

train_file = 'imp-train.csv' 
test_file =  'imp-test.csv'

df_train = pd.read_table(train_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna()
df_test = pd.read_table(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna()

LABEL_COLUMN = "label"
df_train[LABEL_COLUMN] = (df_train["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int)
df_test[LABEL_COLUMN] = (df_test["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int)

CATEGORICAL_COLUMNS = ["width", "height","geo_region", "venue_id",
                       "seller_member_id", "site_domain", "tag_id", "geo_city", "fold_position"]
CONTINUOUS_COLUMNS = []
import tensorflow as tf

def input_fn(df):
    # creates dict mapping from each continous feature column name (k) to
    # the values of that column stored in a constant Tensor
    continous_cols = {k: tf.constant(df[k].values)
                     for k in CONTINUOUS_COLUMNS}
    # creates a dict mapping from each categorocal feature column name (k) to
    # the values of that column stored in a tf.SparseTensor
    categorical_cols = {k: tf.SparseTensor(
          indices=[[i, 0] for i in range(df[k].size)],
          values=df[k].values,
          shape=[df[k].size, 1])
                          for k in CATEGORICAL_COLUMNS}
    # merge the two dicts into one
    feature_cols = dict(continous_cols.items() + categorical_cols.items())
    # convert the label col into a constant Tensor
    label = tf.constant(df[LABEL_COLUMN].values)
    # return the feature cols and label
    return feature_cols, label

def train_input_fn():
    return input_fn(df_train)

def eval_input_fn():
    return input_fn(df_test)   

# base categorical feature cols
width = tf.contrib.layers.sparse_column_with_hash_bucket("width", hash_bucket_size=100)
height = tf.contrib.layers.sparse_column_with_hash_bucket("height", hash_bucket_size=100)
geo_region = tf.contrib.layers.sparse_column_with_hash_bucket("geo_region", hash_bucket_size=10000)
venue_id = tf.contrib.layers.sparse_column_with_hash_bucket("venue_id", hash_bucket_size=10000)
seller_member_id = tf.contrib.layers.sparse_column_with_hash_bucket("seller_member_id", hash_bucket_size=10000)
site_domain = tf.contrib.layers.sparse_column_with_hash_bucket("site_domain", hash_bucket_size=10000)
tag_id = tf.contrib.layers.sparse_column_with_hash_bucket("tag_id", hash_bucket_size=100000)
fold_position = tf.contrib.layers.sparse_column_with_hash_bucket("fold_position", hash_bucket_size=10)

# intersecting multiple cols with CrossedColumn
width_x_height = tf.contrib.layers.crossed_column([width, height], hash_bucket_size=10000)

# building the model
model_dir = tempfile.mkdtemp()

m = tf.contrib.learn.LinearRegressor(feature_columns=[
  geo_region, venue_id, seller_member_id, site_domain, tag_id, fold_position, width_x_height],
  optimizer=tf.train.FtrlOptimizer(
    learning_rate=0.1,
    l1_regularization_strength=1.0,
    l2_regularization_strength=1.0),
  model_dir=model_dir)

# train model
m.fit(input_fn=train_input_fn, steps=200)

我的错误跟踪如下:

TypeError                                 Traceback (most recent call last)
<ipython-input-83-4f4e07dac1eb> in <module>()
     11 
     12 # train model
---> 13 m.fit(input_fn=train_input_fn, steps=200)

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    238                              steps=steps,
    239                              monitors=monitors,
--> 240                              max_steps=max_steps)
    241     logging.info('Loss for final step: %s.', loss)
    242     return self

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _train_model(self, input_fn, steps, feed_fn, init_op, init_feed_fn, init_fn, device_fn, monitors, log_every_steps, fail_on_nan_loss, max_steps)
    548       features, targets = input_fn()
    549       self._check_inputs(features, targets)
--> 550       train_op, loss_op = self._get_train_ops(features, targets)
    551 
    552       # Add default monitors.

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.pyc in _get_train_ops(self, features, targets)
    334       raise ValueError("SDCAOptimizer does not currently support regression.")
    335     self._validate_linear_feature_columns(features)
--> 336     return super(LinearRegressor, self)._get_train_ops(features, targets)
    337 
    338   def _get_eval_ops(self, features, targets, metrics=None):

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _get_train_ops(self, features, targets)
    180 
    181     features = self._get_feature_dict(features)
--> 182     logits = self._logits(features, is_training=True)
    183     if self._enable_centered_bias:
    184       centered_bias_step = [self._centered_bias_step(targets, features)]

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _logits(self, features, is_training)
    269       logits = self._dnn_logits(features, is_training)
    270     else:
--> 271       logits = self._linear_logits(features, is_training)
    272 
    273     if self._enable_centered_bias:

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _linear_logits(self, features, is_training)
    231   def _linear_logits(self, features, is_training):
    232     return self._linear_model.build_model(
--> 233         features, self._linear_feature_columns, is_training)
    234 
    235   def _centered_bias(self):

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/composable_model.pyc in build_model(self, features, feature_columns, is_training)
    175           num_outputs=self._num_label_columns,
    176           weight_collections=[self._weight_collection_name],
--> 177           scope=scope)
    178     return logits
    179 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections, trainable, scope)
    176     for column in sorted(set(feature_columns), key=lambda x: x.key):
    177       try:
--> 178         transformed_tensor = transformer.transform(column)
    179         predictions, variable = column.to_weighted_sum(transformed_tensor,
    180                                                        num_outputs,

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in transform(self, feature_column)
    382       return self._columns_to_tensors[feature_column]
    383 
--> 384     feature_column.insert_transformed_feature(self._columns_to_tensors)
    385 
    386     if feature_column not in self._columns_to_tensors:

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column.pyc in insert_transformed_feature(self, columns_to_tensors)
    362         columns_to_tensors[self.name].values,
    363         self.bucket_size,
--> 364         name=self.name + "_lookup")
    365     columns_to_tensors[self] = ops.SparseTensor(
    366         columns_to_tensors[self.name].indices, sparse_id_values,

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_string_ops.pyc in string_to_hash_bucket_fast(input, num_buckets, name)
    183   """
    184   result = _op_def_lib.apply_op("StringToHashBucketFast", input=input,
--> 185                                 num_buckets=num_buckets, name=name)
    186   return result
    187 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords)
    461             if input_arg.type != types_pb2.DT_INVALID:
    462               raise TypeError("%s expected type of %s." %
--> 463                               (prefix, dtypes.as_dtype(input_arg.type).name))
    464             else:
    465               raise TypeError(

TypeError: Input 'input' of 'StringToHashBucketFast' Op has type int64 that does not match expected type of string.

我不太确定我传递给StringToHashBucketFast操作的输入是什么。我已经尝试了所有单独的部分,当我打电话给fit

时,它们就分开了

期待一些聪明人帮忙!

1 个答案:

答案 0 :(得分:2)

StringToHashBucketFast无效int64。 此错误消息的含义是您已将一个或多个要素列声明为字符串(隐含地使用tf.contrib.layers.sparse_column_with_hash_bucket),但您提供的要素值为int64