Tensorflow-无法将训练数据适合TensorForestEstimator

时间:2018-07-18 02:13:39

标签: python tensorflow scikit-learn

当前,我正在尝试使用Tensorflow的TensorForestEstimator实施随机森林回归。我已经成功使用scikit-learn的RandomForestRegressor完成了操作,并希望使用Tensorflow复制相同的结果。

我使用熊猫上传了数据,并使用scikit-learn的train_test_split拆分了训练和测试集。它包含4个功能部件(全部为数字)。

>>> X_train.shape
(2711, 4)
>>> y_train.shape
(2711,)

我为树设置了参数

num_features = int(np.log2(len(clean_data.columns)))

params = ForestHParams(num_classes=1, num_features=num_features,
                       regression=True,num_trees=447, max_nodes=1000)

regressor = TensorForestEstimator(params)

由于我在原始的scikit-learn实现中将int(np.log2(len(clean_data.columns)))用于log2参数,因此我将功能设置为max_features

但是,当尝试拟合训练数据时,我会收到类似这样的错误

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    509                 as_ref=input_arg.is_ref,
--> 510                 preferred_dtype=default_dtype)
    511           except TypeError as err:

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
   1108     if ret is None:
-> 1109       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1110 

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
    945         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 946         (dtype.name, t.dtype.name, str(t)))
    947   return t

ValueError: Tensor conversion requested dtype float32 for Tensor with dtype float64: 'Tensor("concat:0", shape=(?, 4), dtype=float64)'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-18-79323408f7f7> in <module>()
      1 # from tensorflow import cast, float32
      2 # X_train_cast = cast(X_train, float32)
----> 3 regressor.fit(x=X_train, y=y_train)
      4 
      5 #regressor.score()

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    430                 'in a future version' if date is None else ('after %s' % date),
    431                 instructions)
--> 432       return func(*args, **kwargs)
    433     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    434                                        _add_deprecated_arg_notice_to_docstring(

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    506     _verify_input_args(x, y, input_fn, None, batch_size)
    507     if x is not None:
--> 508       SKCompat(self).fit(x, y, batch_size, steps, max_steps, monitors)
    509       return self
    510 

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, batch_size, steps, max_steps, monitors)
   1525         steps=steps,
   1526         max_steps=max_steps,
-> 1527         monitors=all_monitors)
   1528     return self
   1529 

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    430                 'in a future version' if date is None else ('after %s' % date),
    431                 instructions)
--> 432       return func(*args, **kwargs)
    433     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    434                                        _add_deprecated_arg_notice_to_docstring(

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    522       hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps))
    523 
--> 524     loss = self._train_model(input_fn=input_fn, hooks=hooks)
    525     logging.info('Loss for final step: %s.', loss)
    526     return self

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _train_model(self, input_fn, hooks)
   1039       self._check_inputs(features, labels)
   1040       training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
-> 1041       model_fn_ops = self._get_train_ops(features, labels)
   1042       ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss)
   1043       all_hooks.extend(hooks)

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _get_train_ops(self, features, labels)
   1262       `ModelFnOps` object.
   1263     """
-> 1264     return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
   1265 
   1266   def _get_eval_ops(self, features, labels, metrics):

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _call_model_fn(self, features, labels, mode, metrics, config)
   1225     if 'model_dir' in model_fn_args:
   1226       kwargs['model_dir'] = self.model_dir
-> 1227     model_fn_results = self._model_fn(features, labels, **kwargs)
   1228 
   1229     if isinstance(model_fn_results, model_fn_lib.ModelFnOps):

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/client/random_forest.py in _model_fn(features, labels, mode)
    169 
    170     logits, tree_paths, regression_variance = graph_builder.inference_graph(
--> 171         features)
    172 
    173     summary.scalar('average_tree_size', graph_builder.average_size())

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.py in inference_graph(self, input_data, **inference_args)
    512             data_spec,
    513             sparse_features=processed_sparse_features,
--> 514             **inference_args)
    515         probabilities.append(probs)
    516         paths.append(path)

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.py in inference_graph(self, input_data, data_spec, sparse_features)
    686         sparse_shape,
    687         input_spec=data_spec.SerializeToString(),
--> 688         params=self.params.serialized_params_proto)
    689 
    690   def size(self):

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/ops/gen_model_ops.py in tree_predictions_v4(tree_handle, input_data, sparse_input_indices, sparse_input_values, sparse_input_shape, input_spec, params, name)
    467         sparse_input_values=sparse_input_values,
    468         sparse_input_shape=sparse_input_shape, input_spec=input_spec,
--> 469         params=params, name=name)
    470     _result = _op.outputs[:]
    471     _inputs_flat = _op.inputs

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    531             if input_arg.type != types_pb2.DT_INVALID:
    532               raise TypeError("%s expected type of %s." %
--> 533                               (prefix, dtypes.as_dtype(input_arg.type).name))
    534             else:
    535               # Update the maps with the default, if needed.

TypeError: Input 'input_data' of 'TreePredictionsV4' Op has type float64 that does not match expected type of float32.

我的假设是我必须将要素的数量设置为所有要素的数量(即使用所有要素而不是要素的子集)。但是我仍然得到与上面相同的错误。

我试图直接查看源代码,但无法真正理解问题出在哪里。 github here上正在讨论一个类似的问题。

我想知道我的实现中是否缺少某些内容?预先感谢。

编辑:尝试将X_trainy_train转换为float32

另一种尝试是将输入转换为float32。

regressor.fit(x=X_train.astype("float32"), y=y_train.astype("float32"))

但是,我仍然遇到与上面相同的错误。然后我尝试使用tf.cast

X_train_cast = cast(X_train, float32)
y_train_cast = cast(y_train, float32)
regressor.fit(x=X_train_cast, y=y_train_cast)

但是我说一个不同的错误

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-89e4fa057afb> in <module>()
      2 X_train_cast = cast(X_train, float32)
      3 y_train_cast = cast(y_train, float32)
----> 4 regressor.fit(x=X_train_cast, y=y_train_cast)
      5 
      6 #regressor.score()

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    430                 'in a future version' if date is None else ('after %s' % date),
    431                 instructions)
--> 432       return func(*args, **kwargs)
    433     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    434                                        _add_deprecated_arg_notice_to_docstring(

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    504     if (steps is not None) and (max_steps is not None):
    505       raise ValueError('Can not provide both steps and max_steps.')
--> 506     _verify_input_args(x, y, input_fn, None, batch_size)
    507     if x is not None:
    508       SKCompat(self).fit(x, y, batch_size, steps, max_steps, monitors)

~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _verify_input_args(x, y, input_fn, feed_fn, batch_size)
    102 
    103     if tensor_util.is_tensor(x) or y is not None and tensor_util.is_tensor(y):
--> 104       raise ValueError('Inputs cannot be tensors. Please provide input_fn.')
    105 
    106     if feed_fn is not None:

ValueError: Inputs cannot be tensors. Please provide input_fn.

我希望这可以使问题更加清楚。谢谢。

2 个答案:

答案 0 :(得分:0)

看起来TensorForestEstimator.fit期望一个函数,而不是明确地期望数据。参见Building input functions with tf.estimator。例如:

train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": X_train},
    y=y_train,
    num_epochs=None,
    shuffle=True)

classifier.train(input_fn=train_input_fn, steps=2000)

答案 1 :(得分:0)

使用 regressor.fit(x=X_train.astype("float32").values, y=y_train.astype("float32").values)应该解决此问题。