无法将padded_batch tf.data.Dataset传递给tf.estimator

时间:2019-10-01 18:44:09

标签: python tensorflow

我有一个tf.data.Dataset对象,它是与tf.estimator一起使用的input_fn的输出。下面的琐碎示例:

import os
import tensorflow as tf
import numpy as np

btr = tf.estimator.BoostedTreesRegressor(
    feature_columns=[tf.feature_column.numeric_column('x', shape=(3,))]
    ,n_batches_per_layer=1
    ,model_dir=os.path.join(os.getcwd(), 'btr_classifier'))

def input_fn():

    # accommodate variable length inputs
    def generate_values():
            for i in range(50000):
                x_len = np.random.randint(1,4)
                x = np.random.rand(x_len, )
                y = np.sum(x)
                yield (x, y)

    # add a 'key' for feature_columns
    def to_dict(x, y):
        return {"x":x}, y

    # create a dataset from generator with padded_batch
    def make_dataset(batch_size=32):
        output_types  = ((tf.float32), tf.float32)
        padded_shapes = (([None]), []) 

        dataset = (tf.data.Dataset
                   .from_generator(generate_values, output_types=output_types)
                   .padded_batch(batch_size, padded_shapes=padded_shapes)
                   .repeat()
                   .map(to_dict)
                  )
        return dataset

    def _input_fn():

        dataset = make_dataset()

        return dataset

    return _input_fn

btr.train(input_fn=input_fn())

这将导致以下错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-d7ca9ca451db> in <module>
     31     return _input_fn
     32 
---> 33 btr.train(input_fn=input_fn())

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    365 
    366       saving_listeners = _check_listeners_type(saving_listeners)
--> 367       loss = self._train_model(input_fn, hooks, saving_listeners)
    368       logging.info('Loss for final step: %s.', loss)
    369       return self

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
   1156       return self._train_model_distributed(input_fn, hooks, saving_listeners)
   1157     else:
-> 1158       return self._train_model_default(input_fn, hooks, saving_listeners)
   1159 
   1160   def _train_model_default(self, input_fn, hooks, saving_listeners):

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
   1186       worker_hooks.extend(input_hooks)
   1187       estimator_spec = self._call_model_fn(
-> 1188           features, labels, ModeKeys.TRAIN, self.config)
   1189       global_step_tensor = training_util.get_global_step(g)
   1190       return self._train_with_estimator_spec(estimator_spec, worker_hooks,

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py in _call_model_fn(self, features, labels, mode, config)
   1144 
   1145     logging.info('Calling model_fn.')
-> 1146     model_fn_results = self._model_fn(features=features, **kwargs)
   1147     logging.info('Done calling model_fn.')
   1148 

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\canned\boosted_trees.py in _model_fn(features, labels, mode, config)
   1932           config,
   1933           weight_column=weight_column,
-> 1934           train_in_memory=train_in_memory)
   1935 
   1936     super(BoostedTreesRegressor, self).__init__(

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\canned\boosted_trees.py in _bt_model_fn(features, labels, mode, head, feature_columns, tree_hparams, n_batches_per_layer, config, closed_form_grad_and_hess_fn, example_id_column_name, weight_column, train_in_memory, name)
   1075     else:
   1076       input_feature_list = _get_transformed_features(
-> 1077           features, sorted_feature_columns, bucket_boundaries_dict)
   1078       if example_id_column_name:
   1079         example_ids = features[example_id_column_name]

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\canned\boosted_trees.py in _get_transformed_features(features, sorted_feature_columns, bucket_boundaries_dict)
    102   return _get_transformed_features_and_merge_with_previously_transformed(
    103       features, sorted_feature_columns, sorted_feature_columns,
--> 104       bucket_boundaries_dict)
    105 
    106 

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow_estimator\python\estimator\canned\boosted_trees.py in _get_transformed_features_and_merge_with_previously_transformed(features, sorted_feature_columns, all_sorted_columns, bucket_boundaries_dict, already_transformed_features)
    171                          'than 2, but column `{}` got: {}'.format(
    172                              source_name, column.shape))
--> 173       unstacked = array_ops.unstack(tensor, axis=1)
    174       if not bucket_boundaries_dict:
    175         result_features.extend(unstacked)

~\AppData\Local\Continuum\anaconda3\envs\tensorflow_gpuenv\lib\site-packages\tensorflow\python\ops\array_ops.py in unstack(value, num, axis, name)
   1198       num = value_shape.dims[axis].value
   1199   if num is None:
-> 1200     raise ValueError("Cannot infer num from shape %s" % value_shape)
   1201   return gen_array_ops.unpack(value, num=num, axis=axis, name=name)
   1202 

ValueError: Cannot infer num from shape (?, ?)

我能够用稍微修改的版本训练tf.keras模型,其中没有必需的feature_columns参数的格式。我是否在variant_dataset'x'上贴标签不正确?我希望这像其他任何数据集api一样简单地传递给估算器。我不太熟悉并试图变得更熟悉的部分是使用padded_batch,如果还有其他考虑,则在使用时可能需要考虑。

0 个答案:

没有答案