Question

为了更好地理解在TensorFlow中创建CNN模型，我编写了一个代码，用于在模型上训练，评估和执行图像推理。我有两个输出类。代码运行正常，没有任何错误。

以下是我遇到问题的地方：我在图片 Image_a.jpg 上运行推理功能并获得结果 Class_a 。现在我在图像文件夹上运行推理功能，其中包括 Image_a.jpg 但是我在这里得到的结果 Image_a.jpg 与我得到的结果不符我只测试那个图像。很多图像都会发生这种情况。

我做错了什么？

这是主要代码：

class _image_object:    
    def __init__(self):
        self.image = tf.Variable([], dtype=tf.string)
        self.height = tf.Variable([], dtype=tf.int64)
        self.width = tf.Variable([], dtype=tf.int64)
        self.filename = tf.Variable([], dtype=tf.string)
        self.label = tf.Variable([], dtype=tf.int32)

def get_files(data_dir, subset):
    """
    Gets the traning/validation image data or tfRecord files
    from given data_dir.
    Args:
        data_dir: directory where the tfRecord files lie.
        subset: train or validation
    Returns:
        python list of all (tfRecord) data set files.
    Raises:
        ValueError: if there are not data_files matching the subset.
    """

    if subset not in ['train', 'validation']:
        print ('Invalid subset!')
        exit(-1)

    # Gets all files whose names start with 'subset' in 'data_dir' path
    # and makes a list.
    tf_record_pattern = os.path.join(data_dir, '%s-*' % subset)
    data_files = tf.gfile.Glob(tf_record_pattern)

    print('Data files found: ', data_files)

    if not data_files:
        print ('No files found for data dir %s at %s' % (subset,
                data_dir))
        exit(-1)

    # Return the list of filenames
    return data_files    

def read_and_decode(filename_queue):
    """
    Parses the tfRecord files and extracts the image data and label data.
    NOTE:
    The format used for the features dict here to parse the required data 
    must have the same format that was used when creating the tfRecord files.
    Args:
        filename_queue: queue of the data files to be read.
    Returns:
        An object of the _image_object which contains the required image and
        label data.
    """

    # Creates an object of the TFRecordReader.
    reader = tf.TFRecordReader()
    (_, serialized_example) = reader.read(filename_queue)

    # Parses the required data from the tfRecord files.
    features = tf.parse_single_example(serialized_example, features = {
        'image/height': tf.FixedLenFeature([], dtype=tf.int64),
        'image/width': tf.FixedLenFeature([], dtype=tf.int64),
        'image/class/label': tf.FixedLenFeature([], dtype=tf.int64),
        'image/encoded': tf.FixedLenFeature([], dtype=tf.string),
        'image/filename': tf.FixedLenFeature([], dtype=tf.string),})

    # Gets the raw pixel values of the image.
    image_encoded = features['image/encoded']

    # Decode a JPEG-encoded image to a uint8 tensor.
    image_raw = tf.image.decode_jpeg(image_encoded, channels=3)

    # Creates an object of the _image_object class.
    image_object = _image_object()

    # Resizes images used for training/testing to the size expected by the model.
    image_object.image = \
        tf.image.resize_image_with_crop_or_pad(image_raw,
            FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT)

    # Stores path and names of the images
    image_object.filename = features['image/filename']

    # Stores the labels
    image_object.label = tf.cast(features['image/class/label'], tf.int64)

    return image_object    

def batch_inputs(data_dir, is_training):
    """
    Parses the image and label data from the tfRecord files.
    Stores the image and labels data in batches.
    Args:
        data_dir: directory where the tfRecord files lie.
        is_training: boolean to indicate if training is being performed or validation.
    Returns: A list of tensors for image data and labels as batches.
    """

    if is_training:

        # Gets the list of training data files.
        files = get_files(data_dir, 'train')

        # Shuffles the filenames and outputs them to a queue for an input pipeline.
        filename_queue = tf.train.string_input_producer(files, shuffle=True)
    else:

        # Gets the list of validation data files.
        files = get_files(data_dir, 'validation')

        # Outputs the filenames to a queue for an input pipeline.
        filename_queue = tf.train.string_input_producer(files, shuffle=False)

    # Reads and parses the tfRecrd files to get image and label data.
    image_object = read_and_decode(filename_queue)

    # Normalizes the image. 
    image = tf.image.per_image_standardization(image_object.image)
    label = image_object.label

    filename = image_object.filename

    # Store the data as batches
    (image_batch, label_batch, filename_batch) = tf.train.batch([image, label, filename], batch_size=FLAGS.BATCH_SIZE, num_threads=1)

    return (image_batch, tf.reshape(label_batch, [FLAGS.BATCH_SIZE]), filename_batch)    

def model(image_batch, is_training=True):
    """
    Defines the CNN architecture.
    """

    # Input Layer
    # Reshape image_batch to 4-D tensor: [batch_size, IMAGE_WIDTH, IMAGE_HEIGHT, channels]
    '''input_layer = tf.reshape(image_batch, [-1, FLAGS.IMAGE_WIDTH,
                             #FLAGS.IMAGE_HEIGHT, 3])'''

    '''the results vary with and without the above statement as well but the input is of the right shape in both cases'''

    # Convolutional Layer #1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 256, 256, 3]
    # Output Tensor Shape: [batch_size, 256, 256, 32]
    conv1 = tf.layers.conv2d(inputs=image_batch, filters=32,
                             kernel_size=[5, 5], padding='same',
                             activation=tf.nn.relu)

    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 256, 256, 32]
    # Output Tensor Shape: [batch_size, 128, 128, 32]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                    strides=2)

    # Convolutional Layer #2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 128, 128, 32]
    # Output Tensor Shape: [batch_size, 128, 128, 64]
    conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5,
                             5], padding='same', activation=tf.nn.relu)

    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 128, 128, 64]
    # Output Tensor Shape: [batch_size, 64, 64, 64]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                    strides=2)

    # Convolutional Layer #3
    # Computes 128 features using 5x5 filter.
    # Input tensor shape: [batch_size, 64, 64, 64]
    # Output Tensor Shape: [batch_size, 64, 64, 128]
    conv3 = tf.layers.conv2d(inputs=pool2, filters=128, kernel_size=[5,
                             5], padding='same', activation=tf.nn.relu)

    # Pooling Layer #3
    # Third max pooling layer with 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 64, 64, 128]
    # Output Tensor Shape: [batch_size, 32, 32, 128]
    pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2],
                                    strides=2)

    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 32, 32, 128]
    # Output Tensor Shape: [batch_size, 32 * 32 * 128]
    pool3_flat = tf.reshape(pool3, [-1, 32 * 32 * 128])

    # Dense Layer or Fully Connected Layer #1
    # Densely connected layer with 2048 neurons
    # Input Tensor Shape: [batch_size, 32 * 32 * 128]
    # Output Tensor Shape: [batch_size, 2048]
    fc_Layer_1 = tf.layers.dense(inputs=pool3_flat, units=2048,
                                 activation=tf.nn.relu)

    # Add dropout operation; FLAGS.DROP_RATE probability that element will be kept
    # eg. if FLAGS.DROP_RATE = 0.4. 40% of elements will be randomly dropped
    dropout = tf.layers.dropout(inputs=fc_Layer_1,
                                rate=FLAGS.DROP_RATE,
                                training=is_training)

    # Dense Layer or Fully Connected Layer #2
    # Densely connected layer with 1024 neurons
    # Output Tensor Shape: [batch_size, 1024]
    fc_Layer_2 = tf.layers.dense(inputs=dropout, units=1024,
                                 activation=tf.nn.relu)

    # Dense Layer or Fully Connected Layer #3 or 
    # Logits layer
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 2]    
    _logits = tf.layers.dense(inputs=fc_Layer_2,
                              units=FLAGS.NO_OF_CLASSES)

    return _logits    

def loss_fn(logits, labels):
    """
    Finds the loss on the output ofthe final layer 
    or the logits layer of the CNN model and the actual labels.
    Args:
        logits: The output of the final layer of the model.
        labels: The actual labels of the images 
    """
    # Convert the data type of the tensor
    # Requiredd for the entropy function.
    labels = tf.cast(labels, tf.int32)

    # Find the loss by calculating the cross entropy
    cross_entropy = \
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
            labels=labels, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy,
            name='cross_entropy')

    return cross_entropy_mean    

def optimizer_fn(loss):
    """
    Runs an optimizer on the loss function for back propagation.
    In this case, Gradient Descent is used.
    Args:
        loss: the loss function on which to apply the optimizer.
    """
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.LEARNING_RATE)

    _training_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())

    return _training_op    

def preproc_jpeg(image_buffer):
    """
    Preprocessing function that resizes images
    Called only in the export function
    """
    image = tf.image.decode_jpeg(image_buffer, channels=3)

    crop = tf.image.resize_images(image, (FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT))

    image_out = tf.image.per_image_standardization(crop)

    return image_out    

def train_fn():
    """
    Runs the training graph 
    """
    # Load image and label data and store in batches.
    (images, labels, filenames) = batch_inputs(FLAGS.DATA_DIR, is_training=True)

    # Load the CNN model.
    logits_out = model(images, is_training=True)

    # Calculates the loss of the final layer of the model.
    loss_out = loss_fn(logits_out, labels)

    # Apply optimizer function for back propagation on the loss.
    # Final op that is run/executed. 
    train_op = optimizer_fn(loss_out)

    # For saving the checkpoints.
    saver = tf.train.Saver()

    # Required for effiecient GPU usage
    config = tf.ConfigProto(allow_soft_placement = True)
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:
        with tf.device(FLAGS.device_id):
            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            # Run for FLAGS.EPOCH times
            for epoch_index in range(FLAGS.EPOCH):
                # Run for FLAGS.NO_OF_TRAINING_IMAGES/FLAGS.BATCH_SIZE times
                # to go through all batches
                for batch_index in range(no_of_batches):
                    # Run train_op, the computational graph for the CNN; 
                    # loss_out is only run to print the accuracy
                    (_, loss_value) = sess.run([train_op, loss_out])

                # Print results every 10 runs to monitor training.
                is_last_step = (epoch_index + 1 == FLAGS.EPOCH)

                if epoch_index % 10 == 0 or is_last_step:
                    print ('epoch: ', epoch_index)
                    print ('loss: ', loss_value)# loss should ideally keep reducing

            # Save checkpoints
            saver.save(sess, CHECKPOINTS_PATH)

            print('Checkpoints successfully saved to', CHECKPOINTS_PATH)

            coord.request_stop()
            coord.join(threads)    

def model_export():
    """
    Exports the model for tensor serving
    """
    with tf.Graph().as_default():

        serialized_tf_example = tf.placeholder(tf.string, name='tf_example')

        feature_configs = {'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string)}

        tf_example = tf.parse_example(serialized_tf_example, feature_configs)

        jpegs = tf_example['image/encoded']

        images = tf.map_fn(preproc_jpeg, jpegs, dtype=tf.float32)

        label_list = ['Correct','Defect']

        config = tf.ConfigProto(allow_soft_placement=True)

        with tf.Session(config=config) as sess:
            # Load the CNN model.
            logits_out = model(images, is_training=False)

            softmax_output = tf.nn.softmax(logits_out)

            values, indices = tf.nn.top_k(softmax_output, 1)

            class_tensor = tf.constant(label_list)

            table = tf.contrib.lookup.index_to_string_table_from_tensor(class_tensor)

            classes = table.lookup(tf.to_int64(indices))

            # For restoring the checkpoints.
            saver = tf.train.Saver()

            # Restore the checkpoints
            saver.restore(sess, CHECKPOINTS_PATH)

            output_path = os.path.join(tf.compat.as_bytes(FLAGS.export_dir), tf.compat.as_bytes(str(FLAGS.model_version)))

            print('Exporting trained model to %s' % output_path)

            builder = tf.saved_model.builder.SavedModelBuilder(output_path)

            # Build the signature_def_map.
            classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(serialized_tf_example)

            classes_output_tensor_info = tf.saved_model.utils.build_tensor_info(classes)

            scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(values)

            classification_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={
                        tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                    classify_inputs_tensor_info
                    },
                    outputs={
                    tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
                        classes_output_tensor_info,
                        tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                        scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.
                    CLASSIFY_METHOD_NAME))

            predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(jpegs)

            prediction_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={'images': predict_inputs_tensor_info},
                    outputs={
                        'classes': classes_output_tensor_info,
                        'scores': scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
                ))

            legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={
                    'predict_images':
                    prediction_signature,
                    tf.saved_model.signature_constants.
                    DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    classification_signature,
                },
                legacy_init_op=legacy_init_op)

            builder.save()

            print('Successfully exported model to %s' % FLAGS.export_dir)    

def eval_fn():
    """
    Runs the evaluation graph
    """
    # Load image and label data and store in batches.
    (images, labels, filenames) = batch_inputs(FLAGS.DATA_DIR, is_training=False)

    # Load the CNN model.
    logits_out = model(images, is_training=False)

    softmax_output = tf.nn.softmax(logits_out)

    # Gets the predicted output class index
    prediction = tf.argmax(softmax_output, 1)

    # Returns if predicted value is correct.
    correct_prediction = tf.equal(prediction, labels)

    # Calculates the accuracy of each batch
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # For restoring the checkpoints.
    saver = tf.train.Saver()

    # Required for effiecient GPU usage
    config = tf.ConfigProto(allow_soft_placement = True)
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session(config=config) as sess:
        with tf.device(FLAGS.device_id):            
            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Restore the checkpoints
            saver.restore(sess, CHECKPOINTS_PATH)

            print('Checkpoints restored successfully from', CHECKPOINTS_PATH)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            accuracyfinal = 0

            for batch_index in range(no_of_batches):
                accuracy_out, pred, fname = sess.run([accuracy, correct_prediction, filenames])
                accuracyfinal += accuracy_out

                print('Accuracy: ', accuracy_out*100)
                # Prints the filenames along with predicted class
                #print('filenames: ', fname)
                #print('Prediction: ', pred)

            print('Accuracy: ', (accuracyfinal/no_of_batches)*100)

            coord.request_stop()
            coord.join(threads)    

def resolve_file(fname):
    """
    Function that checks if given fname exists
    """
    if os.path.exists(fname): return fname
    for suffix in ('.jpg', '.png', '.JPG', '.PNG', '.jpeg'):
        cand = fname + suffix
        if os.path.exists(cand):
            return cand
    return None    

def inference_fn():
    files = []

    coder_obj = ImageCoder()

    if (os.path.isdir(FLAGS.FILENAME)):
        for relpath in os.listdir(FLAGS.FILENAME):
            abspath = os.path.join(FLAGS.FILENAME, relpath)

            if os.path.isfile(abspath) and any([abspath.endswith('.' + ty) for ty in ('jpg', 'png', 'JPG', 'PNG', 'jpeg')]):
                #print(abspath)
                files.append(abspath)
    else:
        files.append(FLAGS.FILENAME)

    image_placeholder = tf.placeholder(tf.float32, [None, FLAGS.IMAGE_WIDTH, FLAGS.IMAGE_HEIGHT, 3])

    # Load the CNN model.
    logits_out = model(image_placeholder, is_training=False)

    softmax_output = tf.nn.softmax(logits_out)

    # Gets the predicted output class index
    prediction = tf.argmax(softmax_output, 1)

    image_files = list(filter(lambda x: x is not None, [resolve_file(f) for f in files]))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for image_file in image_files:
            # Read and get jpeg data from the image filename(s) 
            image_op = read_image(image_file, coder_obj) # defined in utils.py
            saver = tf.train.Saver()
            saver.restore(sess, CHECKPOINTS_PATH)

            # Run the prediction function on the given image(s)
            inference_result = sess.run(prediction, feed_dict={image_placeholder: image_op})
            print('Result for image %s is: ' % image_file)
            print(inference_result)               

def main(argv=None):

    if(FLAGS.TRAIN):
        # Run training function
        train_fn()

        # Reset the training graph
        tf.reset_default_graph()

    if(FLAGS.EVAL):
        # Run testing function
        eval_fn()

    if(FLAGS.EXPORT):
        # Run export function
        model_export()

    if(FLAGS.INFERENCE):
        # Run the inference function
        inference_fn()            

if __name__ == '__main__':
    tf.app.run()

以下是依赖 utils.py ：

的代码

class ImageCoder(object):        
    def __init__(self):
        # Create a single Session to run all image coding calls.
        config = tf.ConfigProto(allow_soft_placement=True)
        self._sess = tf.Session(config=config)
        lf.crop_image = tf.image.resize_images(self._decode_jpeg, (RESIZE_WIDTH, RESIZE_HEIGHT))

        self.image_standradisation = tf.image.per_image_standardization(self.crop_image)

        self.num_img = 1
        self.images_single = tf.placeholder(dtype=tf.float32, shape=(self.num_img, RESIZE_WIDTH, RESIZE_HEIGHT, 3))
        self.image_batch_single = tf.stack(self.images_single)

        self._images_batch = tf.reshape(self.image_standradisation, [1, RESIZE_WIDTH, RESIZE_HEIGHT, 3])


    def png_to_jpeg(self, image_data):
        return self._sess.run(self._png_to_jpeg,
                              feed_dict={self._png_data: image_data})

    def decode_jpeg(self, image_data):

        #image = self._sess.run(self.image_standradisation, feed_dict={self._decode_jpeg_data: image_data})
        image = self._sess.run(self._images_batch, feed_dict={self._decode_jpeg_data: image_data})            
        return image   

    def run_stack(self, images_array):
        return self._sess.run(self.image_batch_single, feed_dict={self.images_single: images_array})       


def _is_png(filename):
    """Determine if a file contains a PNG format image.
    Args:
    filename: string, path of the image file.
    Returns:
    boolean indicating if the image is a PNG.
    """
    return '.png' in filename

def read_image(filename, coder):
    """Process image file to get jpeg data and resize them.
    Args:
    filename: string, path to an image file e.g., '/path/to/example.JPG'.
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    Returns:
    image_buffer: string, JPEG encoding of RGB image.
    """

    # Read the image file.
    with tf.gfile.FastGFile(filename, 'rb') as f:
        image_data = f.read()

    # Convert any PNG to JPEG's for consistency.
    if _is_png(filename):
        print('Converting PNG to JPEG for %s' % filename)
        image_data = coder.png_to_jpeg(image_data)    

    image = coder.decode_jpeg(image_data)

    return image
        # Initializes function that converts PNG to JPEG data.
        self._png_data = tf.placeholder(dtype=tf.string)
        image = tf.image.decode_png(self._png_data, channels=3)
        self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)

        # Initializes function that decodes RGB JPEG data.
        self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
        self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
        self.crop_image = tf.image.resize_images(self._decode_jpeg, (RESIZE_WIDTH, RESIZE_HEIGHT))

        self.image_standradisation = tf.image.per_image_standardization(self.crop_image)

        self.num_img = 1
        self.images_single = tf.placeholder(dtype=tf.float32, shape=(self.num_img, RESIZE_WIDTH, RESIZE_HEIGHT, 3))
        self.image_batch_single = tf.stack(self.images_single)

        self._images_batch = tf.reshape(self.image_standradisation, [1, RESIZE_WIDTH, RESIZE_HEIGHT, 3])


    def png_to_jpeg(self, image_data):
        return self._sess.run(self._png_to_jpeg,
                              feed_dict={self._png_data: image_data})

    def decode_jpeg(self, image_data):

        #image = self._sess.run(self.image_standradisation, feed_dict={self._decode_jpeg_data: image_data})
        image = self._sess.run(self._images_batch, feed_dict={self._decode_jpeg_data: image_data})            
        return image   

    def run_stack(self, images_array):
        return self._sess.run(self.image_batch_single, feed_dict={self.images_single: images_array})       


def _is_png(filename):
    """Determine if a file contains a PNG format image.
    Args:
    filename: string, path of the image file.
    Returns:
    boolean indicating if the image is a PNG.
    """
    return '.png' in filename

def read_image(filename, coder):
    """Process image file to get jpeg data and resize them.
    Args:
    filename: string, path to an image file e.g., '/path/to/example.JPG'.
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    Returns:
    image_buffer: string, JPEG encoding of RGB image.
    """

    # Read the image file.
    with tf.gfile.FastGFile(filename, 'rb') as f:
        image_data = f.read()

    # Convert any PNG to JPEG's for consistency.
    if _is_png(filename):
        print('Converting PNG to JPEG for %s' % filename)
        image_data = coder.png_to_jpeg(image_data)    

    image = coder.decode_jpeg(image_data)

    return image

以下是我用作编写代码的引用的链接： 1，2，3

TensorFlow CNN模型的推理结果不同

0 个答案: