我遇到了一个问题,即使我可以使tf.estimator可以与虚拟数据集一起使用,但它仍可以从tfrecords文件列表中使用tf.dataset。
我可以读取和解析数据,并确认它具有正确的尺寸和值。估算器与我创建的具有相同属性的虚拟数据集一起运行。
使用tensorflow 1.13.1 我正在使用eagermode,因此没有任何显式会话在运行。我认为Estimator不需要显式的迭代器(如dataset.make_one_shot_iterator())可以工作,因此input_fn可以传递数据集本身。
Keras模型可以在shape =(31,2323)的Inputlayer上起作用
以下是失败的代码:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution(device_policy=tfe.DEVICE_PLACEMENT_SILENT)
import numpy as np
from tensorflow.contrib.data.python.ops import sliding
BATCH_SIZE = 5
EPOCHS = 10
path = 'dummy_data/*.tfrecords'
## checking if dataset is getting parsed and same as generated dummy data:
dataset = dataset_input_fn(path,EPOCHS,BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
feat, label = iterator.get_next()
print(feat.shape, label.shape)
dataset_dummy = dummy_dataset_fn(EPOCHS,BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
feat, label = iterator.get_next()
print(feat.shape, label.shape)
model = Keras_model()
estimator = tf.keras.estimator.model_to_estimator(keras_model=model)
# estimator works with dummy data:
estimator.train(input_fn=lambda:dummy_dataset_fn(
epochs=EPOCHS,
batch_size=BATCH_SIZE
))
print('succesfull run for estimator')
estimator.train(input_fn=lambda:dataset_input_fn(
path,
epochs=EPOCHS,
batch_size=BATCH_SIZE
))
两个迭代器均按预期输出(BATCH_SIZE,31,2323)的数据集。使用迭代器,我已经确认feat和label的值已正确解析。 (可以在必要时发布示例数据)
有效的dummy_dataset_fn:
def dummy_dataset_fn(epochs, batch_size):
# creates the (?, 31, 2323) (?,1) data
n_test_samples = 31*100
feature1 = np.random.rand(n_test_samples, 2323)
test_dataset=tf.data.Dataset.from_tensor_slices(feature1)
test_labels = np.random.randint(2,size=100)
test_labels = np.repeat(test_labels, 31)
test_labels = tf.data.Dataset.from_tensor_slices(test_labels)
window = 31
stride = 31
test_dataset = test_dataset.apply(sliding.sliding_window_batch(window, stride))
test_labels = test_labels.apply(sliding.sliding_window_batch(window, stride))
test_labels = test_labels.map(return_single_val)
dataset = tf.data.Dataset.zip((test_dataset, test_labels))
dataset = dataset.batch(batch_size).repeat(epochs)
return dataset
和“适当的”数据集(尽管tfrecords仍然是伪数据)
def return_single_val(line_batch):
label, _ = tf.unique(line_batch)
return label
def _parse_function_features(example_proto):
feature_description = {
'X': tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True),
'Y': tf.FixedLenFeature([], tf.int64, default_value=0),
}
# Parse the input tf.Example proto using the dictionary above.
parsed_features = tf.parse_single_example(example_proto, feature_description)
features = parsed_features["X"]
return features
def _parse_function_labels(example_proto):
feature_description = {
'X': tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True),
'Y': tf.FixedLenFeature([], tf.int64, default_value=0),
}
# Parse the input tf.Example proto using the dictionary above.
parsed_features = tf.parse_single_example(example_proto, feature_description)
labels = parsed_features["Y"]
return labels
def dataset_input_fn(wildcard, epochs, batch_size):
filelist = tf.data.Dataset.list_files(wildcard)
raw_dataset = tf.data.TFRecordDataset(filelist)
parsed_dataset_features = raw_dataset.map(_parse_function_features)
parsed_dataset_labels = raw_dataset.map(_parse_function_labels)
window = 31
stride = 31
data_features = parsed_dataset_features.apply(sliding.sliding_window_batch(window, stride))
data_labels = parsed_dataset_labels.apply(sliding.sliding_window_batch(window, stride))
data_labels = data_labels.map(return_single_val)
dataset = tf.data.Dataset.zip((data_features, data_labels))
dataset = dataset.batch(batch_size).repeat(epochs)
return dataset
代码在
处失败estimator.train(input_fn=lambda:dataset_input_fn(...
)
似乎与输入为“无”有关:
File "trainer/reproducible_tensorflow_dummydata.py", line 122, in main
estimator.train(input_fn=lambda:dataset_input_fn(
File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 358, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "{user}lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1124, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1154, in _train_model_default
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/keras.py", line 278, in model_fn
labels)
File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/keras.py", line 201, in _clone_and_build_model
optimizer_iterations=global_step)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 466, in clone_and_build_model
clone = clone_model(model, input_tensors=input_tensors)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 271, in clone_model
return _clone_functional_model(model, input_tensors=input_tensors)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 161, in _clone_functional_model
**kwargs))
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 701, in __call__
return super(RNN, self).__call__(inputs, **kwargs)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 538, in __call__
self._maybe_build(inputs)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1603, in _maybe_build
self.build(input_shapes)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 619, in build
self.cell.build(step_input_shape)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/utils/tf_utils.py", line 151, in wrapper
output_shape = fn(instance, input_shape)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 2022, in build
constraint=self.kernel_constraint)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 349, in add_weight
aggregation=aggregation)
File "{user}/lib/python2.7/site-packages/tensorflow/python/training/checkpointable/base.py", line 607, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 145, in make_variable
aggregation=aggregation)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 213, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 176, in _variable_v1_call
aggregation=aggregation)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 155, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2488, in default_variable_creator
import_scope=import_scope)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 217, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 294, in __init__
constraint=constraint)
File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 406, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 127, in <lambda>
shape, dtype=dtype, partition_info=partition_info)
File "{user}/env/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py", line 499, in __call__
scale /= max(1., (fan_in + fan_out) / 2.)
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
那么这里可能出什么问题了?