当前,我正在尝试使用Tensorflow的TensorForestEstimator
实施随机森林回归。我已经成功使用scikit-learn的RandomForestRegressor
完成了操作,并希望使用Tensorflow复制相同的结果。
我使用熊猫上传了数据,并使用scikit-learn的train_test_split
拆分了训练和测试集。它包含4个功能部件(全部为数字)。
>>> X_train.shape
(2711, 4)
>>> y_train.shape
(2711,)
我为树设置了参数
num_features = int(np.log2(len(clean_data.columns)))
params = ForestHParams(num_classes=1, num_features=num_features,
regression=True,num_trees=447, max_nodes=1000)
regressor = TensorForestEstimator(params)
由于我在原始的scikit-learn实现中将int(np.log2(len(clean_data.columns)))
用于log2
参数,因此我将功能设置为max_features
。
但是,当尝试拟合训练数据时,我会收到类似这样的错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
509 as_ref=input_arg.is_ref,
--> 510 preferred_dtype=default_dtype)
511 except TypeError as err:
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
1108 if ret is None:
-> 1109 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1110
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
945 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 946 (dtype.name, t.dtype.name, str(t)))
947 return t
ValueError: Tensor conversion requested dtype float32 for Tensor with dtype float64: 'Tensor("concat:0", shape=(?, 4), dtype=float64)'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-18-79323408f7f7> in <module>()
1 # from tensorflow import cast, float32
2 # X_train_cast = cast(X_train, float32)
----> 3 regressor.fit(x=X_train, y=y_train)
4
5 #regressor.score()
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
430 'in a future version' if date is None else ('after %s' % date),
431 instructions)
--> 432 return func(*args, **kwargs)
433 return tf_decorator.make_decorator(func, new_func, 'deprecated',
434 _add_deprecated_arg_notice_to_docstring(
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
506 _verify_input_args(x, y, input_fn, None, batch_size)
507 if x is not None:
--> 508 SKCompat(self).fit(x, y, batch_size, steps, max_steps, monitors)
509 return self
510
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, batch_size, steps, max_steps, monitors)
1525 steps=steps,
1526 max_steps=max_steps,
-> 1527 monitors=all_monitors)
1528 return self
1529
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
430 'in a future version' if date is None else ('after %s' % date),
431 instructions)
--> 432 return func(*args, **kwargs)
433 return tf_decorator.make_decorator(func, new_func, 'deprecated',
434 _add_deprecated_arg_notice_to_docstring(
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
522 hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps))
523
--> 524 loss = self._train_model(input_fn=input_fn, hooks=hooks)
525 logging.info('Loss for final step: %s.', loss)
526 return self
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _train_model(self, input_fn, hooks)
1039 self._check_inputs(features, labels)
1040 training_util._get_or_create_global_step_read() # pylint: disable=protected-access
-> 1041 model_fn_ops = self._get_train_ops(features, labels)
1042 ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss)
1043 all_hooks.extend(hooks)
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _get_train_ops(self, features, labels)
1262 `ModelFnOps` object.
1263 """
-> 1264 return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
1265
1266 def _get_eval_ops(self, features, labels, metrics):
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _call_model_fn(self, features, labels, mode, metrics, config)
1225 if 'model_dir' in model_fn_args:
1226 kwargs['model_dir'] = self.model_dir
-> 1227 model_fn_results = self._model_fn(features, labels, **kwargs)
1228
1229 if isinstance(model_fn_results, model_fn_lib.ModelFnOps):
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/client/random_forest.py in _model_fn(features, labels, mode)
169
170 logits, tree_paths, regression_variance = graph_builder.inference_graph(
--> 171 features)
172
173 summary.scalar('average_tree_size', graph_builder.average_size())
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.py in inference_graph(self, input_data, **inference_args)
512 data_spec,
513 sparse_features=processed_sparse_features,
--> 514 **inference_args)
515 probabilities.append(probs)
516 paths.append(path)
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.py in inference_graph(self, input_data, data_spec, sparse_features)
686 sparse_shape,
687 input_spec=data_spec.SerializeToString(),
--> 688 params=self.params.serialized_params_proto)
689
690 def size(self):
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest/python/ops/gen_model_ops.py in tree_predictions_v4(tree_handle, input_data, sparse_input_indices, sparse_input_values, sparse_input_shape, input_spec, params, name)
467 sparse_input_values=sparse_input_values,
468 sparse_input_shape=sparse_input_shape, input_spec=input_spec,
--> 469 params=params, name=name)
470 _result = _op.outputs[:]
471 _inputs_flat = _op.inputs
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
531 if input_arg.type != types_pb2.DT_INVALID:
532 raise TypeError("%s expected type of %s." %
--> 533 (prefix, dtypes.as_dtype(input_arg.type).name))
534 else:
535 # Update the maps with the default, if needed.
TypeError: Input 'input_data' of 'TreePredictionsV4' Op has type float64 that does not match expected type of float32.
我的假设是我必须将要素的数量设置为所有要素的数量(即使用所有要素而不是要素的子集)。但是我仍然得到与上面相同的错误。
我试图直接查看源代码,但无法真正理解问题出在哪里。 github here上正在讨论一个类似的问题。
我想知道我的实现中是否缺少某些内容?预先感谢。
编辑:尝试将X_train
和y_train
转换为float32
另一种尝试是将输入转换为float32。
regressor.fit(x=X_train.astype("float32"), y=y_train.astype("float32"))
但是,我仍然遇到与上面相同的错误。然后我尝试使用tf.cast
X_train_cast = cast(X_train, float32)
y_train_cast = cast(y_train, float32)
regressor.fit(x=X_train_cast, y=y_train_cast)
但是我说一个不同的错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-89e4fa057afb> in <module>()
2 X_train_cast = cast(X_train, float32)
3 y_train_cast = cast(y_train, float32)
----> 4 regressor.fit(x=X_train_cast, y=y_train_cast)
5
6 #regressor.score()
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
430 'in a future version' if date is None else ('after %s' % date),
431 instructions)
--> 432 return func(*args, **kwargs)
433 return tf_decorator.make_decorator(func, new_func, 'deprecated',
434 _add_deprecated_arg_notice_to_docstring(
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
504 if (steps is not None) and (max_steps is not None):
505 raise ValueError('Can not provide both steps and max_steps.')
--> 506 _verify_input_args(x, y, input_fn, None, batch_size)
507 if x is not None:
508 SKCompat(self).fit(x, y, batch_size, steps, max_steps, monitors)
~/Desktop/88sparses/recommendation/recom/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _verify_input_args(x, y, input_fn, feed_fn, batch_size)
102
103 if tensor_util.is_tensor(x) or y is not None and tensor_util.is_tensor(y):
--> 104 raise ValueError('Inputs cannot be tensors. Please provide input_fn.')
105
106 if feed_fn is not None:
ValueError: Inputs cannot be tensors. Please provide input_fn.
我希望这可以使问题更加清楚。谢谢。
答案 0 :(得分:0)
看起来TensorForestEstimator.fit
期望一个函数,而不是明确地期望数据。参见Building input functions with tf.estimator。例如:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": X_train},
y=y_train,
num_epochs=None,
shuffle=True)
classifier.train(input_fn=train_input_fn, steps=2000)
答案 1 :(得分:0)
使用
regressor.fit(x=X_train.astype("float32").values, y=y_train.astype("float32").values)
应该解决此问题。