Question

我尝试使用Tensorflow和CNN解决图像二进制分类问题。问题是在训练期间损失函数没有最小化。那么，我是否正确选择并实现了损失函数和CNN本身？以下是培训过程的日志：

11:33: step 0, accuracy = 0.21, loss = 12.23 (29.4 examples/sec; 8.718 sec/batch)
11:36: step 100, accuracy = 0.44, loss = 42.59 (159.5 examples/sec; 1.605 sec/batch)
11:39: step 200, accuracy = 0.68, loss = 72.22 (155.5 examples/sec; 1.646 sec/batch)
11:41: step 300, accuracy = 0.39, loss = 35.76 (157.0 examples/sec; 1.631 sec/batch)
11:44: step 400, accuracy = 0.54, loss = 55.42 (155.7 examples/sec; 1.644 sec/batch)
11:47: step 500, accuracy = 0.68, loss = 72.16 (152.5 examples/sec; 1.679 sec/batch)
11:50: step 600, accuracy = 0.34, loss = 29.44 (157.1 examples/sec; 1.629 sec/batch)
11:53: step 700, accuracy = 0.58, loss = 59.97 (152.9 examples/sec; 1.675 sec/batch)
11:55: step 800, accuracy = 0.65, loss = 68.29 (154.2 examples/sec; 1.660 sec/batch)
11:58: step 900, accuracy = 0.36, loss = 32.56 (155.0 examples/sec; 1.651 sec/batch)
12:01: step 1000, accuracy = 0.56, loss = 54.97 (150.0 examples/sec; 1.707 sec/batch)
12:04: step 1100, accuracy = 0.62, loss = 63.46 (155.3 examples/sec; 1.648 sec/batch)
12:06: step 1200, accuracy = 0.36, loss = 31.22 (154.3 examples/sec; 1.659 sec/batch)
12:09: step 1300, accuracy = 0.57, loss = 57.37 (154.7 examples/sec; 1.654 sec/batch)
12:12: step 1400, accuracy = 0.57, loss = 57.37 (154.9 examples/sec; 1.653 sec/batch)
12:15: step 1500, accuracy = 0.31, loss = 25.31 (151.1 examples/sec; 1.695 sec/batch)
12:17: step 1600, accuracy = 0.57, loss = 58.76 (154.5 examples/sec; 1.657 sec/batch)
12:20: step 1700, accuracy = 0.55, loss = 56.54 (157.2 examples/sec; 1.628 sec/batch)
12:23: step 1800, accuracy = 0.34, loss = 28.18 (156.3 examples/sec; 1.637 sec/batch)
12:26: step 1900, accuracy = 0.64, loss = 67.77 (155.4 examples/sec; 1.647 sec/batch)
12:28: step 2000, accuracy = 0.50, loss = 48.81 (157.1 examples/sec; 1.630 sec/batch)
12:31: step 2100, accuracy = 0.30, loss = 23.36 (158.2 examples/sec; 1.618 sec/batch)
12:34: step 2200, accuracy = 0.66, loss = 70.25 (156.4 examples/sec; 1.637 sec/batch)
12:37: step 2300, accuracy = 0.53, loss = 53.08 (157.0 examples/sec; 1.630 sec/batch)
12:39: step 2400, accuracy = 0.30, loss = 24.90 (156.5 examples/sec; 1.636 sec/batch)
12:42: step 2500, accuracy = 0.67, loss = 72.63 (158.5 examples/sec; 1.615 sec/batch)
12:45: step 2600, accuracy = 0.49, loss = 47.38 (156.5 examples/sec; 1.636 sec/batch)
12:48: step 2700, accuracy = 0.32, loss = 26.96 (155.3 examples/sec; 1.648 sec/batch)
12:50: step 2800, accuracy = 0.69, loss = 74.03 (157.8 examples/sec; 1.623 sec/batch)
12:53: step 2900, accuracy = 0.47, loss = 44.35 (157.3 examples/sec; 1.627 sec/batch)
12:56: step 3000, accuracy = 0.36, loss = 32.07 (159.3 examples/sec; 1.607 sec/batch)
12:58: step 3100, accuracy = 0.65, loss = 69.93 (159.2 examples/sec; 1.608 sec/batch)
13:01: step 3200, accuracy = 0.45, loss = 42.89 (158.9 examples/sec; 1.611 sec/batch)
13:04: step 3300, accuracy = 0.43, loss = 40.28 (157.5 examples/sec; 1.625 sec/batch)
13:07: step 3400, accuracy = 0.63, loss = 65.13 (156.6 examples/sec; 1.635 sec/batch)
13:09: step 3500, accuracy = 0.43, loss = 40.59 (157.6 examples/sec; 1.624 sec/batch)
13:12: step 3600, accuracy = 0.40, loss = 37.83 (157.1 examples/sec; 1.630 sec/batch)`

这是第一个脚本data_converter.py，它将所有图像拟合到一个相同大小的图像并产生一些扭曲。然后，脚本将每个图像写入带有标签的二进制文件 - [image_label_byte，image_converted_to_bytearray]

import cv2
import numpy as np
import os
import random


def __resize_and_crop(img, size):
    shape = np.shape(img)
    if len(shape) == 3:
        h = shape[0]
        w = shape[1]
        if h < w:
            scale = h / size
            shape = (size, int(w / scale))
        else:
            scale = w / size
            shape = (int(h / scale), size)
        img = cv2.resize(img, shape)
        h = shape[0]
        w = shape[1]
        if h < w:
            delta = (w - size) / 2
            img = img[delta:(w - delta), 0:size]
        else:
            delta = (h - size) / 2
            img = img[0:size, delta:(h - delta)]
        return img
    else:
        print("Wrong shape!")
        return None


def __randomly_rotate(img):
    degrees = [90, 180, 270]
    size = img.shape[0]
    rotation_matrix = cv2.getRotationMatrix2D((size / 2, size / 2), degrees[random.randint(0, 2)], 1.0)
    return cv2.warpAffine(img, rotation_matrix, (size, size))


def preprocess_image(root_folder, outputpath, size, distort):
    """
    Converts images from 'root_folder' into a byte array where first byte is
    a label 0 for cats and 1 for dogs. Afterwards, writes converted images into
    a file with 'outputpath'.
    """
    with open(outputpath, 'w') as output_file:
        output_file.write('')
    counter = 0
    with open(outputpath, 'br+') as output_file:
        for root, dirs, filenames in os.walk(root_folder):
            print(len(filenames))
            for fname in filenames:
                if fname.endswith('.jpg'):
                    img_path = os.path.join(root, fname)
                    img = cv2.imread(img_path)
                    resized = __resize_and_crop(img, size)
                    label = 0 if fname.split('.')[0] == 'cat' else 1
                    images = [resized]
                    if distort:
                        images.append(__randomly_rotate(resized))
                        images.append(cv2.flip(resized, 1))
                    for indx, i in enumerate(images):
                        b_arr = bytearray([label]) + bytearray(np.array(i).flatten())
                        if len(b_arr) == size * size * 3 + 1:
                            output_file.write(b_arr)
                            counter += 1
                        if counter % 1000 == 0:
                            print(counter)
    print("Total images: " + str(counter))


if __name__ == '__main__':
    preprocess_image('data/train', 'train_batch', 64, True)

第二个脚本是cnn_input，它提供数据到CNN：

import tensorflow as tf

IMAGE_SIZE = 64
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000


def read_image(filename_queue):
    class ImageRecord(object):
        pass

    result = ImageRecord()

    label_bytes = 1
    result.height = IMAGE_SIZE
    result.width = IMAGE_SIZE
    result.depth = 3
    image_bytes = result.height * result.width * result.depth
    record_bytes = label_bytes + image_bytes

    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    result.key, value = reader.read(filename_queue)

    record_bytes = tf.decode_raw(value, tf.uint8)

    result.label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32)

    depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]),
                         [result.depth, result.height, result.width])
    result.uint8image = tf.transpose(depth_major, [1, 2, 0])

    return result


def _generate_image_and_label_batch(image, label, min_queue_examples,
                                batch_size, shuffle):
    num_preprocess_threads = 4
    if shuffle:
        images, label_batch = tf.train.shuffle_batch(
            [image, label],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples)
    else:
        images, label_batch = tf.train.batch(
            [image, label],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size)
        print("Finished shuffle batch")
    return images, label_batch


def inputs(data_files, batch_size, train):
    for f in data_files:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)
    filename_queue = tf.train.string_input_producer(data_files)

    image = read_image(filename_queue)
    reshaped_image = tf.cast(image.uint8image, tf.float32)

    if train:
        reshaped_image = tf.image.random_brightness(reshaped_image, max_delta=63)
        reshaped_image = tf.image.random_contrast(reshaped_image, lower=0.2, upper=1.8)
        reshaped_image = tf.image.per_image_whitening(reshaped_image)

    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN if train else NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
    min_fraction_of_examples_in_queue = 0.2

    min_queue_examples = int(num_examples_per_epoch *
                         min_fraction_of_examples_in_queue)
    print('Filling queue with %d images. '
      'This will take a few minutes.' % min_queue_examples)
    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(reshaped_image, image.label,
                                       min_queue_examples, batch_size,
                                       shuffle=train)

第三个是CNN的一部分：

import datetime
import tensorflow as tf
import time
import numpy as np
import os

import cnn_input


tf.app.flags.DEFINE_integer('batch_size', 256, """Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('train_dir', 'train/',
                       """Directory where to write event logs """
                       """and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 60000, '')

FLAGS = tf.flags.FLAGS


def __conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def __maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                      padding='SAME')


def cnn(x):
    with tf.variable_scope('conv1'):
        w = tf.Variable(tf.random_normal([5, 5, 3, 64]))
    b = tf.Variable(tf.random_normal([64]))
    conv1 = __conv2d(x, w, b)

    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                     padding='SAME', name='pool1')

    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                  name='norm1')

    with tf.variable_scope('conv2'):
        w = tf.Variable(tf.random_normal([5, 5, 64, 64]))
        b = tf.Variable(tf.random_normal([64]))
        conv2 = __conv2d(norm1, w, b)

    # norm2
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                  name='norm2')
    # pool2
    pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                       strides=[1, 2, 2, 1], padding='SAME', name='pool2')

    # local3
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
        dim = reshape.get_shape()[1].value
        w = tf.Variable(tf.random_normal([dim, 1024], stddev=0.04))
        b = tf.Variable(tf.random_normal([1024]))
        local3 = tf.nn.relu(tf.matmul(reshape, w) + b, name=scope.name)

    # local4
    with tf.variable_scope('local4') as scope:
        w = tf.Variable(tf.random_normal([1024, 192], stddev=0.04))
        b = tf.Variable(tf.random_normal([192]))
        local4 = tf.nn.relu(tf.matmul(local3, w) + b, name=scope.name)

    # softmax, i.e. softmax(WX + b)
    with tf.variable_scope('softmax_linear') as scope:
        w = tf.Variable(tf.random_normal([192, 1], stddev=1 / 192.0))
        b = tf.Variable(tf.random_normal([1]))
        softmax_linear = tf.add(tf.matmul(local4, w), b, name=scope.name)
    return tf.nn.sigmoid(softmax_linear)


def loss(predicted, labels):
    labels = tf.cast(labels, tf.float32)
    c = tf.nn.l2_loss(predicted - labels)
    tf.add_to_collection('losses', c)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')


def optimize(total_loss, global_step):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.005)
    optimizer.minimize(total_loss, global_step)
    return tf.no_op()


def train():
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)
        images, labels = cnn_input.inputs(['data/train_batch'], tf.app.flags.FLAGS.batch_size, True)
        predicted = cnn(images)
        cost = loss(predicted, labels)

        train_op = optimize(cost, global_step)
        saver = tf.train.Saver(tf.all_variables())

        accuracy = tf.reduce_mean(tf.abs(predicted - tf.cast(labels, tf.float32)))

        init = tf.initialize_all_variables()

        sess = tf.Session()
        sess.run(init)

        tf.train.start_queue_runners(sess)

        for step in range(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value, acc = sess.run([train_op, cost, accuracy])
                duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step % 100 == 0:
            num_examples_per_step = FLAGS.batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)

            format_str = ('%s: step %d, accuracy = %.2f, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print(format_str % (datetime.datetime.now(), step, acc, loss_value,
                                examples_per_sec, sec_per_batch))
        if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)


if __name__ == '__main__':
    train()

CNN的损耗函数不随时间减少

0 个答案: