我正在尝试在TensorFlow中训练一个LinearRegressor。我一直在研究网站上的教程,现在正尝试将其应用到我自己的数据集中。
进行了许多更改后发生了类似的错误,特别是围绕VS预期传递的数据类型。
import pandas as pd
import tempfile
COLUMNS = ['imp_time', 'width', 'height',
'geo_region', 'venue_id', 'seller_member_id',
'site_domain', 'tag_id', 'geo_city', 'fold_position', 'event_type']
train_file = 'imp-train.csv'
test_file = 'imp-test.csv'
df_train = pd.read_table(train_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna()
df_test = pd.read_table(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna()
LABEL_COLUMN = "label"
df_train[LABEL_COLUMN] = (df_train["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int)
df_test[LABEL_COLUMN] = (df_test["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int)
CATEGORICAL_COLUMNS = ["width", "height","geo_region", "venue_id",
"seller_member_id", "site_domain", "tag_id", "geo_city", "fold_position"]
CONTINUOUS_COLUMNS = []
import tensorflow as tf
def input_fn(df):
# creates dict mapping from each continous feature column name (k) to
# the values of that column stored in a constant Tensor
continous_cols = {k: tf.constant(df[k].values)
for k in CONTINUOUS_COLUMNS}
# creates a dict mapping from each categorocal feature column name (k) to
# the values of that column stored in a tf.SparseTensor
categorical_cols = {k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
shape=[df[k].size, 1])
for k in CATEGORICAL_COLUMNS}
# merge the two dicts into one
feature_cols = dict(continous_cols.items() + categorical_cols.items())
# convert the label col into a constant Tensor
label = tf.constant(df[LABEL_COLUMN].values)
# return the feature cols and label
return feature_cols, label
def train_input_fn():
return input_fn(df_train)
def eval_input_fn():
return input_fn(df_test)
# base categorical feature cols
width = tf.contrib.layers.sparse_column_with_hash_bucket("width", hash_bucket_size=100)
height = tf.contrib.layers.sparse_column_with_hash_bucket("height", hash_bucket_size=100)
geo_region = tf.contrib.layers.sparse_column_with_hash_bucket("geo_region", hash_bucket_size=10000)
venue_id = tf.contrib.layers.sparse_column_with_hash_bucket("venue_id", hash_bucket_size=10000)
seller_member_id = tf.contrib.layers.sparse_column_with_hash_bucket("seller_member_id", hash_bucket_size=10000)
site_domain = tf.contrib.layers.sparse_column_with_hash_bucket("site_domain", hash_bucket_size=10000)
tag_id = tf.contrib.layers.sparse_column_with_hash_bucket("tag_id", hash_bucket_size=100000)
fold_position = tf.contrib.layers.sparse_column_with_hash_bucket("fold_position", hash_bucket_size=10)
# intersecting multiple cols with CrossedColumn
width_x_height = tf.contrib.layers.crossed_column([width, height], hash_bucket_size=10000)
# building the model
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearRegressor(feature_columns=[
geo_region, venue_id, seller_member_id, site_domain, tag_id, fold_position, width_x_height],
optimizer=tf.train.FtrlOptimizer(
learning_rate=0.1,
l1_regularization_strength=1.0,
l2_regularization_strength=1.0),
model_dir=model_dir)
# train model
m.fit(input_fn=train_input_fn, steps=200)
我的错误跟踪如下:
TypeError Traceback (most recent call last)
<ipython-input-83-4f4e07dac1eb> in <module>()
11
12 # train model
---> 13 m.fit(input_fn=train_input_fn, steps=200)
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
238 steps=steps,
239 monitors=monitors,
--> 240 max_steps=max_steps)
241 logging.info('Loss for final step: %s.', loss)
242 return self
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _train_model(self, input_fn, steps, feed_fn, init_op, init_feed_fn, init_fn, device_fn, monitors, log_every_steps, fail_on_nan_loss, max_steps)
548 features, targets = input_fn()
549 self._check_inputs(features, targets)
--> 550 train_op, loss_op = self._get_train_ops(features, targets)
551
552 # Add default monitors.
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.pyc in _get_train_ops(self, features, targets)
334 raise ValueError("SDCAOptimizer does not currently support regression.")
335 self._validate_linear_feature_columns(features)
--> 336 return super(LinearRegressor, self)._get_train_ops(features, targets)
337
338 def _get_eval_ops(self, features, targets, metrics=None):
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _get_train_ops(self, features, targets)
180
181 features = self._get_feature_dict(features)
--> 182 logits = self._logits(features, is_training=True)
183 if self._enable_centered_bias:
184 centered_bias_step = [self._centered_bias_step(targets, features)]
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _logits(self, features, is_training)
269 logits = self._dnn_logits(features, is_training)
270 else:
--> 271 logits = self._linear_logits(features, is_training)
272
273 if self._enable_centered_bias:
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _linear_logits(self, features, is_training)
231 def _linear_logits(self, features, is_training):
232 return self._linear_model.build_model(
--> 233 features, self._linear_feature_columns, is_training)
234
235 def _centered_bias(self):
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/composable_model.pyc in build_model(self, features, feature_columns, is_training)
175 num_outputs=self._num_label_columns,
176 weight_collections=[self._weight_collection_name],
--> 177 scope=scope)
178 return logits
179
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections, trainable, scope)
176 for column in sorted(set(feature_columns), key=lambda x: x.key):
177 try:
--> 178 transformed_tensor = transformer.transform(column)
179 predictions, variable = column.to_weighted_sum(transformed_tensor,
180 num_outputs,
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in transform(self, feature_column)
382 return self._columns_to_tensors[feature_column]
383
--> 384 feature_column.insert_transformed_feature(self._columns_to_tensors)
385
386 if feature_column not in self._columns_to_tensors:
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column.pyc in insert_transformed_feature(self, columns_to_tensors)
362 columns_to_tensors[self.name].values,
363 self.bucket_size,
--> 364 name=self.name + "_lookup")
365 columns_to_tensors[self] = ops.SparseTensor(
366 columns_to_tensors[self.name].indices, sparse_id_values,
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_string_ops.pyc in string_to_hash_bucket_fast(input, num_buckets, name)
183 """
184 result = _op_def_lib.apply_op("StringToHashBucketFast", input=input,
--> 185 num_buckets=num_buckets, name=name)
186 return result
187
/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords)
461 if input_arg.type != types_pb2.DT_INVALID:
462 raise TypeError("%s expected type of %s." %
--> 463 (prefix, dtypes.as_dtype(input_arg.type).name))
464 else:
465 raise TypeError(
TypeError: Input 'input' of 'StringToHashBucketFast' Op has type int64 that does not match expected type of string.
我不太确定我传递给StringToHashBucketFast
操作的输入是什么。我已经尝试了所有单独的部分,当我打电话给fit
期待一些聪明人帮忙!
答案 0 :(得分:2)
StringToHashBucketFast
无效int64
。
此错误消息的含义是您已将一个或多个要素列声明为字符串(隐含地使用tf.contrib.layers.sparse_column_with_hash_bucket
),但您提供的要素值为int64
。