为了更好地理解在TensorFlow中创建CNN模型,我编写了一个代码,用于在模型上训练,评估和执行图像推理。我有两个输出类。代码运行正常,没有任何错误。
以下是我遇到问题的地方:我在图片 Image_a.jpg 上运行推理功能并获得结果 Class_a 。现在我在图像文件夹上运行推理功能,其中包括 Image_a.jpg 但是我在这里得到的结果 Image_a.jpg 与我得到的结果不符我只测试那个图像。很多图像都会发生这种情况。
我做错了什么?
这是主要代码:
class _image_object:
def __init__(self):
self.image = tf.Variable([], dtype=tf.string)
self.height = tf.Variable([], dtype=tf.int64)
self.width = tf.Variable([], dtype=tf.int64)
self.filename = tf.Variable([], dtype=tf.string)
self.label = tf.Variable([], dtype=tf.int32)
def get_files(data_dir, subset):
"""
Gets the traning/validation image data or tfRecord files
from given data_dir.
Args:
data_dir: directory where the tfRecord files lie.
subset: train or validation
Returns:
python list of all (tfRecord) data set files.
Raises:
ValueError: if there are not data_files matching the subset.
"""
if subset not in ['train', 'validation']:
print ('Invalid subset!')
exit(-1)
# Gets all files whose names start with 'subset' in 'data_dir' path
# and makes a list.
tf_record_pattern = os.path.join(data_dir, '%s-*' % subset)
data_files = tf.gfile.Glob(tf_record_pattern)
print('Data files found: ', data_files)
if not data_files:
print ('No files found for data dir %s at %s' % (subset,
data_dir))
exit(-1)
# Return the list of filenames
return data_files
def read_and_decode(filename_queue):
"""
Parses the tfRecord files and extracts the image data and label data.
NOTE:
The format used for the features dict here to parse the required data
must have the same format that was used when creating the tfRecord files.
Args:
filename_queue: queue of the data files to be read.
Returns:
An object of the _image_object which contains the required image and
label data.
"""
# Creates an object of the TFRecordReader.
reader = tf.TFRecordReader()
(_, serialized_example) = reader.read(filename_queue)
# Parses the required data from the tfRecord files.
features = tf.parse_single_example(serialized_example, features = {
'image/height': tf.FixedLenFeature([], dtype=tf.int64),
'image/width': tf.FixedLenFeature([], dtype=tf.int64),
'image/class/label': tf.FixedLenFeature([], dtype=tf.int64),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string),
'image/filename': tf.FixedLenFeature([], dtype=tf.string),})
# Gets the raw pixel values of the image.
image_encoded = features['image/encoded']
# Decode a JPEG-encoded image to a uint8 tensor.
image_raw = tf.image.decode_jpeg(image_encoded, channels=3)
# Creates an object of the _image_object class.
image_object = _image_object()
# Resizes images used for training/testing to the size expected by the model.
image_object.image = \
tf.image.resize_image_with_crop_or_pad(image_raw,
FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT)
# Stores path and names of the images
image_object.filename = features['image/filename']
# Stores the labels
image_object.label = tf.cast(features['image/class/label'], tf.int64)
return image_object
def batch_inputs(data_dir, is_training):
"""
Parses the image and label data from the tfRecord files.
Stores the image and labels data in batches.
Args:
data_dir: directory where the tfRecord files lie.
is_training: boolean to indicate if training is being performed or validation.
Returns: A list of tensors for image data and labels as batches.
"""
if is_training:
# Gets the list of training data files.
files = get_files(data_dir, 'train')
# Shuffles the filenames and outputs them to a queue for an input pipeline.
filename_queue = tf.train.string_input_producer(files, shuffle=True)
else:
# Gets the list of validation data files.
files = get_files(data_dir, 'validation')
# Outputs the filenames to a queue for an input pipeline.
filename_queue = tf.train.string_input_producer(files, shuffle=False)
# Reads and parses the tfRecrd files to get image and label data.
image_object = read_and_decode(filename_queue)
# Normalizes the image.
image = tf.image.per_image_standardization(image_object.image)
label = image_object.label
filename = image_object.filename
# Store the data as batches
(image_batch, label_batch, filename_batch) = tf.train.batch([image, label, filename], batch_size=FLAGS.BATCH_SIZE, num_threads=1)
return (image_batch, tf.reshape(label_batch, [FLAGS.BATCH_SIZE]), filename_batch)
def model(image_batch, is_training=True):
"""
Defines the CNN architecture.
"""
# Input Layer
# Reshape image_batch to 4-D tensor: [batch_size, IMAGE_WIDTH, IMAGE_HEIGHT, channels]
'''input_layer = tf.reshape(image_batch, [-1, FLAGS.IMAGE_WIDTH,
#FLAGS.IMAGE_HEIGHT, 3])'''
'''the results vary with and without the above statement as well but the input is of the right shape in both cases'''
# Convolutional Layer #1
# Computes 32 features using a 5x5 filter with ReLU activation.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 256, 256, 3]
# Output Tensor Shape: [batch_size, 256, 256, 32]
conv1 = tf.layers.conv2d(inputs=image_batch, filters=32,
kernel_size=[5, 5], padding='same',
activation=tf.nn.relu)
# Pooling Layer #1
# First max pooling layer with a 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 256, 256, 32]
# Output Tensor Shape: [batch_size, 128, 128, 32]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
strides=2)
# Convolutional Layer #2
# Computes 64 features using a 5x5 filter.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 128, 128, 32]
# Output Tensor Shape: [batch_size, 128, 128, 64]
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5,
5], padding='same', activation=tf.nn.relu)
# Pooling Layer #2
# Second max pooling layer with a 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 128, 128, 64]
# Output Tensor Shape: [batch_size, 64, 64, 64]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
strides=2)
# Convolutional Layer #3
# Computes 128 features using 5x5 filter.
# Input tensor shape: [batch_size, 64, 64, 64]
# Output Tensor Shape: [batch_size, 64, 64, 128]
conv3 = tf.layers.conv2d(inputs=pool2, filters=128, kernel_size=[5,
5], padding='same', activation=tf.nn.relu)
# Pooling Layer #3
# Third max pooling layer with 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 64, 64, 128]
# Output Tensor Shape: [batch_size, 32, 32, 128]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2],
strides=2)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 32, 32, 128]
# Output Tensor Shape: [batch_size, 32 * 32 * 128]
pool3_flat = tf.reshape(pool3, [-1, 32 * 32 * 128])
# Dense Layer or Fully Connected Layer #1
# Densely connected layer with 2048 neurons
# Input Tensor Shape: [batch_size, 32 * 32 * 128]
# Output Tensor Shape: [batch_size, 2048]
fc_Layer_1 = tf.layers.dense(inputs=pool3_flat, units=2048,
activation=tf.nn.relu)
# Add dropout operation; FLAGS.DROP_RATE probability that element will be kept
# eg. if FLAGS.DROP_RATE = 0.4. 40% of elements will be randomly dropped
dropout = tf.layers.dropout(inputs=fc_Layer_1,
rate=FLAGS.DROP_RATE,
training=is_training)
# Dense Layer or Fully Connected Layer #2
# Densely connected layer with 1024 neurons
# Output Tensor Shape: [batch_size, 1024]
fc_Layer_2 = tf.layers.dense(inputs=dropout, units=1024,
activation=tf.nn.relu)
# Dense Layer or Fully Connected Layer #3 or
# Logits layer
# Input Tensor Shape: [batch_size, 1024]
# Output Tensor Shape: [batch_size, 2]
_logits = tf.layers.dense(inputs=fc_Layer_2,
units=FLAGS.NO_OF_CLASSES)
return _logits
def loss_fn(logits, labels):
"""
Finds the loss on the output ofthe final layer
or the logits layer of the CNN model and the actual labels.
Args:
logits: The output of the final layer of the model.
labels: The actual labels of the images
"""
# Convert the data type of the tensor
# Requiredd for the entropy function.
labels = tf.cast(labels, tf.int32)
# Find the loss by calculating the cross entropy
cross_entropy = \
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy,
name='cross_entropy')
return cross_entropy_mean
def optimizer_fn(loss):
"""
Runs an optimizer on the loss function for back propagation.
In this case, Gradient Descent is used.
Args:
loss: the loss function on which to apply the optimizer.
"""
optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.LEARNING_RATE)
_training_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return _training_op
def preproc_jpeg(image_buffer):
"""
Preprocessing function that resizes images
Called only in the export function
"""
image = tf.image.decode_jpeg(image_buffer, channels=3)
crop = tf.image.resize_images(image, (FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT))
image_out = tf.image.per_image_standardization(crop)
return image_out
def train_fn():
"""
Runs the training graph
"""
# Load image and label data and store in batches.
(images, labels, filenames) = batch_inputs(FLAGS.DATA_DIR, is_training=True)
# Load the CNN model.
logits_out = model(images, is_training=True)
# Calculates the loss of the final layer of the model.
loss_out = loss_fn(logits_out, labels)
# Apply optimizer function for back propagation on the loss.
# Final op that is run/executed.
train_op = optimizer_fn(loss_out)
# For saving the checkpoints.
saver = tf.train.Saver()
# Required for effiecient GPU usage
config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
config.gpu_options.allocator_type = 'BFC'
with tf.Session(config=config) as sess:
with tf.device(FLAGS.device_id):
# Initialize all variables
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
# Run for FLAGS.EPOCH times
for epoch_index in range(FLAGS.EPOCH):
# Run for FLAGS.NO_OF_TRAINING_IMAGES/FLAGS.BATCH_SIZE times
# to go through all batches
for batch_index in range(no_of_batches):
# Run train_op, the computational graph for the CNN;
# loss_out is only run to print the accuracy
(_, loss_value) = sess.run([train_op, loss_out])
# Print results every 10 runs to monitor training.
is_last_step = (epoch_index + 1 == FLAGS.EPOCH)
if epoch_index % 10 == 0 or is_last_step:
print ('epoch: ', epoch_index)
print ('loss: ', loss_value)# loss should ideally keep reducing
# Save checkpoints
saver.save(sess, CHECKPOINTS_PATH)
print('Checkpoints successfully saved to', CHECKPOINTS_PATH)
coord.request_stop()
coord.join(threads)
def model_export():
"""
Exports the model for tensor serving
"""
with tf.Graph().as_default():
serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
feature_configs = {'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string)}
tf_example = tf.parse_example(serialized_tf_example, feature_configs)
jpegs = tf_example['image/encoded']
images = tf.map_fn(preproc_jpeg, jpegs, dtype=tf.float32)
label_list = ['Correct','Defect']
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
# Load the CNN model.
logits_out = model(images, is_training=False)
softmax_output = tf.nn.softmax(logits_out)
values, indices = tf.nn.top_k(softmax_output, 1)
class_tensor = tf.constant(label_list)
table = tf.contrib.lookup.index_to_string_table_from_tensor(class_tensor)
classes = table.lookup(tf.to_int64(indices))
# For restoring the checkpoints.
saver = tf.train.Saver()
# Restore the checkpoints
saver.restore(sess, CHECKPOINTS_PATH)
output_path = os.path.join(tf.compat.as_bytes(FLAGS.export_dir), tf.compat.as_bytes(str(FLAGS.model_version)))
print('Exporting trained model to %s' % output_path)
builder = tf.saved_model.builder.SavedModelBuilder(output_path)
# Build the signature_def_map.
classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
classes_output_tensor_info = tf.saved_model.utils.build_tensor_info(classes)
scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(values)
classification_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={
tf.saved_model.signature_constants.CLASSIFY_INPUTS:
classify_inputs_tensor_info
},
outputs={
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
classes_output_tensor_info,
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
scores_output_tensor_info
},
method_name=tf.saved_model.signature_constants.
CLASSIFY_METHOD_NAME))
predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(jpegs)
prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={'images': predict_inputs_tensor_info},
outputs={
'classes': classes_output_tensor_info,
'scores': scores_output_tensor_info
},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
))
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'predict_images':
prediction_signature,
tf.saved_model.signature_constants.
DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature,
},
legacy_init_op=legacy_init_op)
builder.save()
print('Successfully exported model to %s' % FLAGS.export_dir)
def eval_fn():
"""
Runs the evaluation graph
"""
# Load image and label data and store in batches.
(images, labels, filenames) = batch_inputs(FLAGS.DATA_DIR, is_training=False)
# Load the CNN model.
logits_out = model(images, is_training=False)
softmax_output = tf.nn.softmax(logits_out)
# Gets the predicted output class index
prediction = tf.argmax(softmax_output, 1)
# Returns if predicted value is correct.
correct_prediction = tf.equal(prediction, labels)
# Calculates the accuracy of each batch
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# For restoring the checkpoints.
saver = tf.train.Saver()
# Required for effiecient GPU usage
config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
config.gpu_options.allocator_type = 'BFC'
with tf.Session(config=config) as sess:
with tf.device(FLAGS.device_id):
# Initialize all variables
sess.run(tf.global_variables_initializer())
# Restore the checkpoints
saver.restore(sess, CHECKPOINTS_PATH)
print('Checkpoints restored successfully from', CHECKPOINTS_PATH)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
accuracyfinal = 0
for batch_index in range(no_of_batches):
accuracy_out, pred, fname = sess.run([accuracy, correct_prediction, filenames])
accuracyfinal += accuracy_out
print('Accuracy: ', accuracy_out*100)
# Prints the filenames along with predicted class
#print('filenames: ', fname)
#print('Prediction: ', pred)
print('Accuracy: ', (accuracyfinal/no_of_batches)*100)
coord.request_stop()
coord.join(threads)
def resolve_file(fname):
"""
Function that checks if given fname exists
"""
if os.path.exists(fname): return fname
for suffix in ('.jpg', '.png', '.JPG', '.PNG', '.jpeg'):
cand = fname + suffix
if os.path.exists(cand):
return cand
return None
def inference_fn():
files = []
coder_obj = ImageCoder()
if (os.path.isdir(FLAGS.FILENAME)):
for relpath in os.listdir(FLAGS.FILENAME):
abspath = os.path.join(FLAGS.FILENAME, relpath)
if os.path.isfile(abspath) and any([abspath.endswith('.' + ty) for ty in ('jpg', 'png', 'JPG', 'PNG', 'jpeg')]):
#print(abspath)
files.append(abspath)
else:
files.append(FLAGS.FILENAME)
image_placeholder = tf.placeholder(tf.float32, [None, FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT, 3])
# Load the CNN model.
logits_out = model(image_placeholder, is_training=False)
softmax_output = tf.nn.softmax(logits_out)
# Gets the predicted output class index
prediction = tf.argmax(softmax_output, 1)
image_files = list(filter(lambda x: x is not None, [resolve_file(f) for f in files]))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for image_file in image_files:
# Read and get jpeg data from the image filename(s)
image_op = read_image(image_file, coder_obj) # defined in utils.py
saver = tf.train.Saver()
saver.restore(sess, CHECKPOINTS_PATH)
# Run the prediction function on the given image(s)
inference_result = sess.run(prediction, feed_dict={image_placeholder: image_op})
print('Result for image %s is: ' % image_file)
print(inference_result)
def main(argv=None):
if(FLAGS.TRAIN):
# Run training function
train_fn()
# Reset the training graph
tf.reset_default_graph()
if(FLAGS.EVAL):
# Run testing function
eval_fn()
if(FLAGS.EXPORT):
# Run export function
model_export()
if(FLAGS.INFERENCE):
# Run the inference function
inference_fn()
if __name__ == '__main__':
tf.app.run()
以下是依赖 utils.py :
的代码class ImageCoder(object):
def __init__(self):
# Create a single Session to run all image coding calls.
config = tf.ConfigProto(allow_soft_placement=True)
self._sess = tf.Session(config=config)
lf.crop_image = tf.image.resize_images(self._decode_jpeg, (RESIZE_WIDTH, RESIZE_HEIGHT))
self.image_standradisation = tf.image.per_image_standardization(self.crop_image)
self.num_img = 1
self.images_single = tf.placeholder(dtype=tf.float32, shape=(self.num_img, RESIZE_WIDTH, RESIZE_HEIGHT, 3))
self.image_batch_single = tf.stack(self.images_single)
self._images_batch = tf.reshape(self.image_standradisation, [1, RESIZE_WIDTH, RESIZE_HEIGHT, 3])
def png_to_jpeg(self, image_data):
return self._sess.run(self._png_to_jpeg,
feed_dict={self._png_data: image_data})
def decode_jpeg(self, image_data):
#image = self._sess.run(self.image_standradisation, feed_dict={self._decode_jpeg_data: image_data})
image = self._sess.run(self._images_batch, feed_dict={self._decode_jpeg_data: image_data})
return image
def run_stack(self, images_array):
return self._sess.run(self.image_batch_single, feed_dict={self.images_single: images_array})
def _is_png(filename):
"""Determine if a file contains a PNG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a PNG.
"""
return '.png' in filename
def read_image(filename, coder):
"""Process image file to get jpeg data and resize them.
Args:
filename: string, path to an image file e.g., '/path/to/example.JPG'.
coder: instance of ImageCoder to provide TensorFlow image coding utils.
Returns:
image_buffer: string, JPEG encoding of RGB image.
"""
# Read the image file.
with tf.gfile.FastGFile(filename, 'rb') as f:
image_data = f.read()
# Convert any PNG to JPEG's for consistency.
if _is_png(filename):
print('Converting PNG to JPEG for %s' % filename)
image_data = coder.png_to_jpeg(image_data)
image = coder.decode_jpeg(image_data)
return image
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
self.crop_image = tf.image.resize_images(self._decode_jpeg, (RESIZE_WIDTH, RESIZE_HEIGHT))
self.image_standradisation = tf.image.per_image_standardization(self.crop_image)
self.num_img = 1
self.images_single = tf.placeholder(dtype=tf.float32, shape=(self.num_img, RESIZE_WIDTH, RESIZE_HEIGHT, 3))
self.image_batch_single = tf.stack(self.images_single)
self._images_batch = tf.reshape(self.image_standradisation, [1, RESIZE_WIDTH, RESIZE_HEIGHT, 3])
def png_to_jpeg(self, image_data):
return self._sess.run(self._png_to_jpeg,
feed_dict={self._png_data: image_data})
def decode_jpeg(self, image_data):
#image = self._sess.run(self.image_standradisation, feed_dict={self._decode_jpeg_data: image_data})
image = self._sess.run(self._images_batch, feed_dict={self._decode_jpeg_data: image_data})
return image
def run_stack(self, images_array):
return self._sess.run(self.image_batch_single, feed_dict={self.images_single: images_array})
def _is_png(filename):
"""Determine if a file contains a PNG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a PNG.
"""
return '.png' in filename
def read_image(filename, coder):
"""Process image file to get jpeg data and resize them.
Args:
filename: string, path to an image file e.g., '/path/to/example.JPG'.
coder: instance of ImageCoder to provide TensorFlow image coding utils.
Returns:
image_buffer: string, JPEG encoding of RGB image.
"""
# Read the image file.
with tf.gfile.FastGFile(filename, 'rb') as f:
image_data = f.read()
# Convert any PNG to JPEG's for consistency.
if _is_png(filename):
print('Converting PNG to JPEG for %s' % filename)
image_data = coder.png_to_jpeg(image_data)
image = coder.decode_jpeg(image_data)
return image