我试图从头开始训练一个移动网络模型,以识别日文字符的图像。
我通过我创建的一些tfrecord文件喂网,如果我读取文件,它们包含我期望的数据(图像)。
我有64x64x1图像,在转移到网络之前我转换为64x64x3图像。我有956个班级。
使用GPU(大约100.000次迭代)训练网络8-10小时后,验证集的准确度始终为0.
训练的损失减少并且大约为0,但是评估数据的损失首先增加并且在渐近值(即16)附近稳定。
我尝试利用tensorflow估算器来获得干净的代码,并且我使用rmsprop作为优化器(我也试过了adam)
我认为我的代码中应该存在一些宏观错误,但我找不到它。
我想知道我是否必须在送网前给hot_one编码标签,如果我也可以使用标签" 0 ..... 0"全零。
非常感谢任何帮助。
问候。
import tensorflow as tf
import os
import numpy as np
import sys
from PIL import Image
import shutil
def imgs_input_fn(filenames, perform_shuffle=False, repeat_count=1, batch_size=1):
def _parse_function(serialized):
features = \
{
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/channel': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([], tf.int64)
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.parse_single_example(serialized=serialized,
features=features)
# Get the image as raw bytes.
image_shape = tf.stack([parsed_example['image/width'], parsed_example['image/height'], 3])
image_raw = parsed_example['image/encoded']
label = tf.cast(parsed_example['image/class/label'], tf.int32)
#image = tf.decode_raw(image_raw, tf.uint8)
image = tf.image.decode_png(image_raw, 3)
image = tf.cast(image, tf.float32)
image = tf.reshape(image, image_shape)
#image = tf.image.resize_images(image, (224, 224))
## ADDED
image = tf.image.per_image_standardization(image)
num_classes = 956
## HOT ONE ENCODING
d = dict(zip([input_name], [image])), tf.one_hot(label, num_classes)
return d
dataset = tf.data.TFRecordDataset(filenames=filenames)
# Parse the serialized data in the TFRecords files.
# This returns TensorFlow tensors for the image and labels.
dataset = dataset.map(_parse_function)
if perform_shuffle:
# Randomizes input using a window of 256 elements (read into memory)
dataset = dataset.shuffle(buffer_size=8192)
dataset = dataset.repeat(repeat_count) # Repeats dataset this # times
dataset = dataset.batch(batch_size) # Batch size to use
return dataset.make_one_shot_iterator().get_next()
path_tfrecords_train = [
'/home/myuser/data_set/dump_train_00000-of-00006.tfrecord',
'/home/myuser/data_set/dump_train_00001-of-00006.tfrecord',
'/home/myuser/data_set/dump_train_00002-of-00006.tfrecord',
'/home/myuser/data_set/dump_train_00003-of-00006.tfrecord',
'/home/myuser/data_set/dump_train_00004-of-00006.tfrecord'
]
path_tfrecords_test = [
'/home/myuser/data_set/dump_validation_00000-of-00002.tfrecord',
'/home/myuser/data_set/dump_validation_00001-of-00002.tfrecord',
]
def relu6(x):
return tf.nn.relu6(x)
with tf.keras.utils.CustomObjectScope({'relu6': relu6}):
model = tf.keras.applications.MobileNet(
#input_shape=(128, 128, 3),
#alpha=0.25,
depth_multiplier=1,
dropout=0.001,
weights=None,
input_tensor=None,
pooling=None,
classes=956,
include_top=True
)
model.summary()
input_name = model.input_names[0]
model.compile(loss=['categorical_crossentropy'],
optimizer='rmsprop',
metrics=['accuracy', tf.keras.metrics.categorical_accuracy])
model_dir = os.path.join(os.getcwd(), "model_metadata")
os.makedirs(model_dir, exist_ok=True)
estimator_model = tf.keras.estimator.model_to_estimator(keras_model=model,
model_dir=model_dir)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: imgs_input_fn(path_tfrecords_train,
perform_shuffle=True,
repeat_count=1,
batch_size=64),
max_steps=500)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: imgs_input_fn(path_tfrecords_test,
perform_shuffle=False,
batch_size=1))
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
# serialize model to JSON
model_json = model.to_json()
model_json_file = os.path.join(model_dir, "model.json")
with open(model_json_file, "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model_weight_file = os.path.join(model_dir, "model.h5")
model.save(model_weight_file)
print("Saved model to disk")