我尝试按照本教程enter link description here进行转移学习,我使用了自己的数据集,而我尝试使用MobileNet来创建,问题出在那里的MobileNet模型中是3个检查点文件:
mobilenet_v1_0.5_128.ckpt.data-00000-of-00001
mobilenet_v1_0.5_128.ckpt.index
mobilenet_v1_0.5_128.ckpt.meta
当我使用其中一个时出现此错误:
NotFoundError (see above for traceback): Unsuccessful TensorSliceReader constructor: Failed to find any matching files for C://Users//hp//PycharmProjects//tfSlim/mobilenet_v1_0.5_128//mobilenet_v1_0.5_128.ckpt.meta
[[Node: save/RestoreV2_139 = RestoreV2[dtypes=[DT_INT32], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2_139/tensor_names, save/RestoreV2_139/shape_and_slices)]]
import tensorflow as tf
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
from tensorflow.python.platform import tf_logging as logging
#from inception_resnet_v2 import inception_resnet_v2, inception_resnet_v2_arg_scope
from models.research.slim.nets.mobilenet_v1 import mobilenet_v1, mobilenet_v1_arg_scope
import os
import time
import h5py
import numpy as np
slim = tf.contrib.slim
# ================ DATASET INFORMATION ======================
# State dataset directory where the tfrecord files are located
dataset_dir = 'C://Nassima//lymphoma//subs3'
# State where your log file is at. If it doesn't exist, create it.
log_dir = './log'
# State where your checkpoint file is
checkpoint_file = 'C://Users//hp//PycharmProjects//tfSlim/mobilenet_v1_0.5_128//mobilenet_v1_0.5_128.ckpt.meta'
# State the image size you're resizing your images to. We will use the default inception size of 299.
#image_size = 299
#image_size = 128
# State the number of classes to predict:
num_classes = 3
# State the labels file and read it
labels_file = 'C://Nassima//lymphoma//subs3//labels.txt'
labels = open(labels_file, 'r')
# Create a dictionary to refer each label to their string name
labels_to_name = {}
for line in labels:
label, string_name = line.split(':')
string_name = string_name[:-1] # Remove newline
labels_to_name[int(label)] = string_name
print(labels_to_name)
# Create the file pattern of your TFRecord files so that it could be recognized later on
"""
file_pattern = 'flowers_%s_*.tfrecord'
"""
# Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
items_to_descriptions = {
'image': 'A 3-channel RGB coloured lymphoma image that is either CLL, FL, MCL.',
'label': 'A label that is as such -- 0:CLL, 1:FL, 2:MCL'
}
# ================= TRAINING INFORMATION ==================
# State the number of epochs to train
num_epochs = 1
# State your batch size
#batch_size = 8
file_mean = "C://Nassima//lymphoma//subs3//train//mean.hdf5"
TRAINING_SET_SIZE = 41860
BATCH_SIZE = 128
IMAGE_SIZE = 144
IMAGE_RESIZE = 128
# Learning rate information and configuration (Up to you to experiment)
initial_learning_rate = 0.0002
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 2
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
class _image_object: # image object from protobuf
def __init__(self):
self.image = tf.Variable([], dtype=tf.string)
self.height = tf.Variable([], dtype=tf.int64)
self.width = tf.Variable([], dtype=tf.int64)
self.filename = tf.Variable([], dtype=tf.string)
self.label = tf.Variable([], dtype=tf.int32)
def read_and_decode(filename_queue, mean):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features = {
"image/encoded": tf.FixedLenFeature([], tf.string),
"image/height": tf.FixedLenFeature([], tf.int64),
"image/width": tf.FixedLenFeature([], tf.int64),
"image/filename": tf.FixedLenFeature([], tf.string),
"image/class/label": tf.FixedLenFeature([], tf.int64),})
image_encoded = features["image/encoded"]
image_raw = tf.decode_raw(image_encoded, tf.float32)
image_object = _image_object()
#image_object.image = tf.image.resize_image_with_crop_or_pad(image_raw, IMAGE_SIZE, IMAGE_SIZE)
image_r = tf.reshape(image_raw, [IMAGE_SIZE, IMAGE_SIZE, 3])
#added
image_r = image_r - mean
image_r = tf.random_crop(image_r ,[IMAGE_RESIZE ,IMAGE_RESIZE ,3], seed = 0, name = None)
image_object.image = image_r
image_object.height = features["image/height"]
image_object.width = features["image/width"]
image_object.filename = features["image/filename"]
image_object.label = tf.cast(features["image/class/label"], tf.int64)
return image_object
def flower_input(mean, if_random = True, if_training = True):
if(if_training):
filenames = [os.path.join(dataset_dir, "lymphoma_train_0000%d-of-00005.tfrecord" % i) for i in range(0, 5)]
else:
filenames = [os.path.join(dataset_dir, "lymphoma_validation_0000%d-of-00005.tfrecord" % i) for i in range(0, 5)]
for f in filenames:
if not tf.gfile.Exists(f):
raise ValueError("Failed to find file: " + f)
filename_queue = tf.train.string_input_producer(filenames)
image_object = read_and_decode(filename_queue, mean)
image = tf.image.per_image_standardization(image_object.image)
# image = image_object.image
# image = tf.image.adjust_gamma(tf.cast(image_object.image, tf.float32), gamma=1, gain=1) # Scale image to (0, 1)
filename = image_object.filename
label = image_object.label
if(if_random):
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(TRAINING_SET_SIZE * min_fraction_of_examples_in_queue)
print("Filling queue with %d images before starting to train. " "This will take a few minutes." % min_queue_examples)
num_preprocess_threads = 1
image_batch, label_batch, filename_batch = tf.train.shuffle_batch(
[image, label, filename],
batch_size=BATCH_SIZE,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * BATCH_SIZE,
min_after_dequeue=min_queue_examples)
return image_batch, label_batch, filename_batch
else:
image_batch, label_batch, filename_batch = tf.train.batch(
[image, label, filename],
batch_size=BATCH_SIZE,
num_threads=1)
return image_batch, label_batch, filename_batch
"""
# ============== DATASET LOADING ======================
"""
def run():
# Create the log directory here. Must be done here otherwise import will activate this unneededly.
if not os.path.exists(log_dir):
os.mkdir(log_dir)
# ======================= TRAINING PROCESS =========================
# Now we start to construct the graph and build our model
with tf.Graph().as_default() as graph:
tf.logging.set_verbosity(tf.logging.INFO) # Set the verbosity to INFO level
# ajouter le mean de l'image
hdf5_file = h5py.File(file_mean, "r")
# subtract the training mean
mm = hdf5_file["train_mean"][0, ...]
mm = mm[np.newaxis, ...]
# Total number of samples
mean = tf.convert_to_tensor(mm, np.float32)
# First create the dataset and load one batch
images, labels, _ = flower_input(mean, if_random=True, if_training=True)
# Know the number steps to take before decaying the learning rate and batches per epoch
num_batches_per_epoch = int(TRAINING_SET_SIZE / BATCH_SIZE)
num_steps_per_epoch = num_batches_per_epoch # Because one step is one batch processed
decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
# Create the model inference
with slim.arg_scope(mobilenet_v1_arg_scope()):
logits, end_points = mobilenet_v1(images, num_classes= num_classes, is_training=True)
# Define the scopes that you want to exclude for restoration
#exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
exclude = ['MobilenetV1/Logits', 'MobilenetV1/AuxLogits']
#exclude = ["MobilenetV1/Logits/Conv2d_1c_1x1"]
#exclude = []
variables_to_restore = slim.get_variables_to_restore(exclude=exclude)
# Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
one_hot_labels = slim.one_hot_encoding(labels, num_classes)
# Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
total_loss = tf.losses.get_total_loss() # obtain the regularization losses as well
# Create the global step for monitoring the learning_rate and training.
global_step = get_or_create_global_step()
# Define your exponentially decaying learning rate
lr = tf.train.exponential_decay(
learning_rate=initial_learning_rate,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=learning_rate_decay_factor,
staircase=True)
# Now we can define the optimizer that takes on the learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
# Create the train_op.
train_op = slim.learning.create_train_op(total_loss, optimizer)
# State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
predictions = tf.argmax(end_points['Predictions'], 1)
probabilities = end_points['Predictions']
accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
metrics_op = tf.group(accuracy_update, probabilities)
# Now finally create all the summaries you need to monitor and group them into one summary op.
tf.summary.scalar('losses/Total_Loss', total_loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('learning_rate', lr)
my_summary_op = tf.summary.merge_all()
# Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
def train_step(sess, train_op, global_step):
'''
Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
'''
# Check the time for each sess run
start_time = time.time()
total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
time_elapsed = time.time() - start_time
# Run the logging to print some results
logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
return total_loss, global_step_count
# Now we create a saver function that actually restores the variables from a checkpoint file in a sess
saver = tf.train.Saver(variables_to_restore)
saver = tf.train.import_meta_graph(checkpoint_file)
#added
def restore_fn(sess):
return saver.restore(sess, 'C://Users//hp//PycharmProjects//tfSlim/mobilenet_v1_0.5_128//mobilenet_v1_0.5_128.ckpt')
# Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)
# Run the managed session
with sv.managed_session() as sess:
for step in range(num_steps_per_epoch * num_epochs):
# At the start of every epoch, show the vital information:
if step % num_batches_per_epoch == 0:
logging.info('Epoch %s/%s', step / num_batches_per_epoch + 1, num_epochs)
learning_rate_value, accuracy_value = sess.run([lr, accuracy])
logging.info('Current Learning Rate: %s', learning_rate_value)
logging.info('Current Streaming Accuracy: %s', accuracy_value)
# optionally, print your logits and predictions for a sanity check that things are going fine.
logits_value, probabilities_value, predictions_value, labels_value = sess.run(
[logits, probabilities, predictions, labels])
print
'logits: \n', logits_value
print
'Probabilities: \n', probabilities_value
print
'predictions: \n', predictions_value
print
'Labels:\n:', labels_value
# Log the summaries every 10 step.
if step % 10 == 0:
loss, _ = train_step(sess, train_op, sv.global_step)
summaries = sess.run(my_summary_op)
sv.summary_computed(sess, summaries)
# If not, simply run the training step
else:
loss, _ = train_step(sess, train_op, sv.global_step)
# We log the final training loss and accuracy
logging.info('Final Loss: %s', loss)
logging.info('Final Accuracy: %s', sess.run(accuracy))
# Once all the training has been done, save the log files and checkpoint model
logging.info('Finished training! Saving model to disk now.')
# saver.save(sess, "./flowers_model.ckpt")
#sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
if __name__ == '__main__':
run()
,错误是
File "C:/Users/hp/PycharmProjects/tfSlim/lympho_mobileNet/train_lymphoma2.py", line 272, in <module>
run()
File "C:/Users/hp/PycharmProjects/tfSlim/lympho_mobileNet/train_lymphoma2.py", line 230, in run
sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\supervisor.py", line 300, in __init__
self._init_saver(saver=saver)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\supervisor.py", line 448, in _init_saver
saver = saver_mod.Saver()
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1218, in __init__
self.build()
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1227, in build
self._build(self._filename, build_save=True, build_restore=True)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1263, in _build
build_save=build_save, build_restore=build_restore)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 729, in _build_internal
saveables = self._ValidateAndSliceInputs(names_to_saveables)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 582, in _ValidateAndSliceInputs
names_to_saveables = BaseSaverBuilder.OpListToDict(names_to_saveables)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 554, in OpListToDict
name)
ValueError: At least two variables have the same name: MobilenetV1/Conv2d_7_depthwise/BatchNorm/gamma
我认为是因为排除了图层或指令
tf.train.import_meta_graph(checkpoint_file)
答案 0 :(得分:0)
您正在加载元文件,而您应该只提供 mobilenet_v1_0.5_128.ckpt
的路径