我试图通过一些改动来运行这个ResNet。https://github.com/tensorflow/models/tree/master/official/resnet
查找错误后,我理解问题是:
如果是初始化 - 在使用自动初始化和创建会话的Estimator时应该如何初始化它们?
这是错误:
ValueError: Tensor("IsVariableInitialized:0", shape=(), dtype=bool) must be from the same graph as Tensor("report_uninitialized_variables/IsVariableInitialized:0", shape=(), dtype=bool).
整个代码非常庞大,所以我只提供我所做的更改(因为它在没有这些更改的情况下运行)。其余代码未被触及(上面的链接中的repo)
这是原始的解析器函数(从二进制文件读取):
def parse_record(raw_record, is_training):
"""Parse CIFAR-10 image and label from a raw record."""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector = tf.decode_raw(raw_record, tf.uint8)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label = tf.cast(record_vector[0], tf.int32)
label = tf.one_hot(label, _NUM_CLASSES)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
[_NUM_CHANNELS, _HEIGHT, _WIDTH])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
image = preprocess_image(image, is_training)
return image, label
这是我从TFRecords读取的替代品:
def parse_record(raw_record, is_training):
mode = 'train' if is_training else 'val'
feature = {mode + '/image': tf.FixedLenFeature([], tf.string),
mode + '/label': tf.FixedLenFeature([], tf.int64)}
filename_queue = tf.train.string_input_producer([raw_record], num_epochs=1)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features=feature)
label = tf.cast(features['train/label'], tf.int32)
label = tf.one_hot(label, _NUM_CLASSES)
image = tf.decode_raw(features['train/image'], tf.float32)
image = tf.reshape(image, [_HEIGHT, _WIDTH, _NUM_CHANNELS])
image = preprocess_image(image, is_training)
return image, label
这是Estimator的创建地点(我没有修改过这个位)
def resnet_main(flags, model_function, input_function):
"""Shared main loop for ResNet Models."""
# Using the Winograd non-fused algorithms provides a small performance boost.
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
if flags.multi_gpu:
validate_batch_size_for_multi_gpu(flags.batch_size)
# There are two steps required if using multi-GPU: (1) wrap the model_fn,
# and (2) wrap the optimizer. The first happens here, and (2) happens
# in the model_fn itself when the optimizer is defined.
model_function = tf.contrib.estimator.replicate_model_fn(
model_function,
loss_reduction=tf.losses.Reduction.MEAN)
# Create session config based on values of inter_op_parallelism_threads and
# intra_op_parallelism_threads. Note that we default to having
# allow_soft_placement = True, which is required for multi-GPU and not
# harmful for other modes.
session_config = tf.ConfigProto(
inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
allow_soft_placement=True)
# Set up a RunConfig to save checkpoint and set session config.
run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9,
session_config=session_config)
classifier = tf.estimator.Estimator(
model_fn=model_function, model_dir=flags.model_dir, config=run_config,
params={
'resnet_size': flags.resnet_size,
'data_format': flags.data_format,
'batch_size': flags.batch_size,
'multi_gpu': flags.multi_gpu,
'version': flags.version,
})
for _ in range(flags.train_epochs // flags.epochs_between_evals):
train_hooks = hooks_helper.get_train_hooks(
flags.hooks,
batch_size=flags.batch_size,
benchmark_log_dir=flags.benchmark_log_dir)
print('Starting a training cycle.')
def input_fn_train():
return input_function(True, flags.data_dir, flags.batch_size,
flags.epochs_between_evals,
flags.num_parallel_calls, flags.multi_gpu)
classifier.train(input_fn=input_fn_train, hooks=train_hooks,
max_steps=flags.max_train_steps)
print('Starting to evaluate.')
# Evaluate the model and print results
def input_fn_eval():
return input_function(False, flags.data_dir, flags.batch_size,
1, flags.num_parallel_calls, flags.multi_gpu)
# flags.max_train_steps is generally associated with testing and profiling.
# As a result it is frequently called with synthetic data, which will
# iterate forever. Passing steps=flags.max_train_steps allows the eval
# (which is generally unimportant in those circumstances) to terminate.
# Note that eval will run for max_train_steps each loop, regardless of the
# global_step count.
eval_results = classifier.evaluate(input_fn=input_fn_eval,
steps=flags.max_train_steps)
print(eval_results)
if flags.benchmark_log_dir is not None:
benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
benchmark_logger.log_estimator_evaluation_result(eval_results)
答案 0 :(得分:0)
蛮力解决。我不知道自己在做什么,但决定发布适用于我的解决方案,即使我无法解释,因为这可能有助于另一位冒险家。
删除parse_record函数中的以下行:
filename_queue = tf.train.string_input_producer([raw_record], num_epochs=1)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
然后将serialized_example替换为raw_record(tensor,dtype = string,value = path / to / tfrecordfile)作为parse_single_example函数的源。
features = tf.parse_single_example(raw_record, features=feature)