我使用2.0版构建了tf.data.Dataset.from_tensor_slices()
。我的输入是一维数组,其中包含用于裁剪大型numpy数组(60 GB)的索引。
到目前为止,“我的管道”使用np.memmap
读取数组,然后应裁剪该数组。因此,我创建了一个尺寸为(n, 4)
的数组,其中n是样本数。该(n, 4)
数组被提示到tf.data.Dataset.from_tensor_slices()
。
然后我要调用dataset.map()
,如果(n, 4)
数组的形状为[4,]
,则输入是一行。但是,我无法评估此张量的单个值,而可以在.map()
调用之前评估张量。
这是出现错误的最小工作示例:
import numpy as np
import tensorflow as tf
large_array = np.random.random((200, 200, 200))
train_array = np.random.randint(0, 50, (10, 4))
def slice_from_tensor(x):
#heigth, width, heigth_exapnd, width_exapnd = tf.split(x, 4) # Both methods fail
print(x)
heigth, width, heigth_exapnd, width_exapnd = x[0], x[1], x[2], x[3]
return tf.convert_to_tensor(large_array[heigth: heigth+heigth_exapnd,
width: width+width_exapnd, :])
train_tensor = tf.convert_to_tensor(train_array)
train_slices_set = tf.data.Dataset.from_tensor_slices(train_tensor)
print(train_slices_set)
train_set = train_slices_set.map(slice_from_tensor)
错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-46-d059560c2557> in <module>
3 train_tensor = tf.convert_to_tensor(train_array)
4 train_slices_set = tf.data.Dataset.from_tensor_slices(train_tensor)
----> 5 train_set = train_slices_set.map(slice_from_tensor)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls)
1021 """
1022 if num_parallel_calls is None:
-> 1023 return MapDataset(self, map_func, preserve_cardinality=True)
1024 else:
1025 return ParallelMapDataset(
/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, use_inter_op_parallelism, preserve_cardinality, use_legacy_function)
3008 self._transformation_name(),
3009 dataset=input_dataset,
-> 3010 use_legacy_function=use_legacy_function)
3011 variant_tensor = gen_dataset_ops.map_dataset(
3012 input_dataset._variant_tensor, # pylint: disable=protected-access
/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
2398 resource_tracker = tracking.ResourceTracker()
2399 with tracking.resource_tracker_scope(resource_tracker):
-> 2400 self._function = wrapper_fn._get_concrete_function_internal()
2401 if add_to_graph:
2402 self._function.add_to_graph(ops.get_default_graph())
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal(self, *args, **kwargs)
1328 """Bypasses error checking when getting a graph function."""
1329 graph_function = self._get_concrete_function_internal_garbage_collected(
-> 1330 *args, **kwargs)
1331 # We're returning this concrete function to someone, and they may keep a
1332 # reference to the FuncGraph without keeping a reference to the
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1322 if self.input_signature:
1323 args, kwargs = None, None
-> 1324 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1325 return graph_function
1326
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
1585 or call_context_key not in self._function_cache.missed):
1586 self._function_cache.missed.add(call_context_key)
-> 1587 graph_function = self._create_graph_function(args, kwargs)
1588 self._function_cache.primary[cache_key] = graph_function
1589 return graph_function, args, kwargs
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
1518 arg_names=arg_names,
1519 override_flat_arg_shapes=override_flat_arg_shapes,
-> 1520 capture_by_value=self._capture_by_value),
1521 self._function_attributes)
1522
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
699 converted_func)
700
--> 701 func_outputs = python_func(*func_args, **func_kwargs)
702
703 # invariant: `func_outputs` contains only Tensors, IndexedSlices,
/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py in wrapper_fn(*args)
2392 attributes=defun_kwargs)
2393 def wrapper_fn(*args): # pylint: disable=missing-docstring
-> 2394 ret = _wrapper_helper(*args)
2395 ret = self._output_structure._to_tensor_list(ret)
2396 return [ops.convert_to_tensor(t) for t in ret]
/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py in _wrapper_helper(*args)
2332 nested_args = (nested_args,)
2333
-> 2334 ret = func(*nested_args)
2335 # If `func` returns a list of tensors, `nest.flatten()` and
2336 # `ops.convert_to_tensor()` would conspire to attempt to stack
<ipython-input-45-9015e98ee7eb> in slice_from_tensor(x)
5
6 return tf.convert_to_tensor(large_array[heigth: heigth+heigth_exapnd,
----> 7 width: width+width_exapnd, :])
8
TypeError: slice indices must be integers or None or have an __index__ method
答案 0 :(得分:3)
有点变化。您首先尝试使用张量对numpy数组进行切片,然后将结果转换为张量。但是相反,您首先需要将large_array
转换为张量,然后进行切片。因此,而不是
return tf.convert_to_tensor(large_array[heigth: heigth+heigth_exapnd,
width: width+width_exapnd, :])
做
return tf.convert_to_tensor(large_array)[heigth: heigth+heigth_exapnd,
width: width+width_exapnd, :]