我正在使用以下代码在InceptionV1上训练花卉数据集。此代码提供Here
import os
from datasets import flowers
from nets import inception
from preprocessing import inception_preprocessing
slim = tf.contrib.slim
image_size = inception.inception_v1.default_image_size
def get_init_fn():
"""Returns a function run by the chief worker to warm-start the training."""
checkpoint_exclude_scopes=["InceptionV1/Logits", "InceptionV1/AuxLogits"]
exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]
variables_to_restore = []
for var in slim.get_model_variables():
excluded = False
for exclusion in exclusions:
if var.op.name.startswith(exclusion):
excluded = True
break
if not excluded:
variables_to_restore.append(var)
return slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
variables_to_restore)
train_dir = '/tmp/inception_finetuned/'
with tf.Graph().as_default():
tf.logging.set_verbosity(tf.logging.INFO)
dataset = flowers.get_split('train', flowers_data_dir)
images, _, labels = load_batch(dataset, height=image_size, width=image_size)
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_v1_arg_scope()):
logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)
# Specify the loss function:
one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
slim.losses.softmax_cross_entropy(logits, one_hot_labels)
total_loss = slim.losses.get_total_loss()
# Create some summaries to visualize the training process:
tf.scalar_summary('losses/Total Loss', total_loss)
# Specify the optimizer and create the train op:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = slim.learning.create_train_op(total_loss, optimizer)
# Run the training:
final_loss = slim.learning.train(
train_op,
logdir=train_dir,
init_fn=get_init_fn(),
number_of_steps=2)
print('Finished training. Last batch loss %f' % final_loss)
我使用以下代码评估模型,准确度为58.34%
import numpy as np
import tensorflow as tf
from datasets import flowers
from nets import inception
slim = tf.contrib.slim
image_size = inception.inception_v1.default_image_size
batch_size = 3
with tf.Graph().as_default():
tf.logging.set_verbosity(tf.logging.INFO)
dataset = flowers.get_split('train', flowers_data_dir)
images, images_raw, labels = load_batch(dataset, height=image_size, width=image_size)
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_v1_arg_scope()):
logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)
predictions = tf.argmax(logits, 1)
checkpoint_path = tf.train.latest_checkpoint(train_dir)
init_fn = slim.assign_from_checkpoint_fn(
checkpoint_path,
slim.get_variables_to_restore())
names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
'eval/Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
'eval/Recall@5': slim.metrics.streaming_recall_at_k(logits, labels, 5),
})
# Define the streaming summaries to write:
for metric_name, metric_value in names_to_values.items():
tf.summary.scalar(metric_name, metric_value)
print('Running evaluation Loop...')
# Load the most recent checkpoint of variables saved
checkpoint_path = tf.train.latest_checkpoint(train_dir)
# Evaluates the model at the given checkpoint path
metric_values = slim.evaluation.evaluate_once(
master='',
checkpoint_path=checkpoint_path,
logdir=train_dir,
num_evals=100,
eval_op=list(names_to_updates.values()),
final_op=list(names_to_values.values()),
summary_op=tf.summary.merge_all())
names_to_values = dict(zip(names_to_values.keys(), metric_values))
for name in names_to_values:
print('%s: %f' % (name, names_to_values[name]))
除了配置检查点和训练目录外,我只用“V2”和“V4”替换代码中的“V1”并训练模型。
首先,对于所有100次迭代,“V2”和“V4”的训练损失始终为4%左右。 其次,“V2”和“V4”的评估准确率约为25%
我是TF的新手,所以我肯定在这里缺少一些东西,我做错了什么?
答案 0 :(得分:1)
在微调像Inception V3这样的相当大的卷积网络时,可能会出现许多问题。以下是一些可以改进模型的指示:
InceptionV1/Logits
和InceptionV1/AuxLogits
排除在tf.Graph
之外。这些张量是卷积基础顶部的完全连接层。从本质上讲,这可以让您训练自己的InceptionV1/Logits
和InceptionV1/AuxLogits
。但是,这段代码没有"冻结"卷积基数,意味着卷积滤波器是可训练的。这是一个坏主意,因为从随机初始化的完全连接层流出的大梯度可能会破坏卷积基础中的学习权重。这对更大的网络具有更大的灾难性影响,这可以解释为什么V2和V4比V1更差。您可以阅读有关微调网络here的更多信息。 Tensorflow有一个关于微调不同模型here的更多文档部分。它还使用slim
,它是Tensorflow的一个用户友好且简洁的包装器。也许你可以尝试一下。祝你好运。