我尝试使用Tensorflow和CNN解决图像二进制分类问题。问题是在训练期间损失函数没有最小化。那么,我是否正确选择并实现了损失函数和CNN本身?以下是培训过程的日志:
11:33: step 0, accuracy = 0.21, loss = 12.23 (29.4 examples/sec; 8.718 sec/batch)
11:36: step 100, accuracy = 0.44, loss = 42.59 (159.5 examples/sec; 1.605 sec/batch)
11:39: step 200, accuracy = 0.68, loss = 72.22 (155.5 examples/sec; 1.646 sec/batch)
11:41: step 300, accuracy = 0.39, loss = 35.76 (157.0 examples/sec; 1.631 sec/batch)
11:44: step 400, accuracy = 0.54, loss = 55.42 (155.7 examples/sec; 1.644 sec/batch)
11:47: step 500, accuracy = 0.68, loss = 72.16 (152.5 examples/sec; 1.679 sec/batch)
11:50: step 600, accuracy = 0.34, loss = 29.44 (157.1 examples/sec; 1.629 sec/batch)
11:53: step 700, accuracy = 0.58, loss = 59.97 (152.9 examples/sec; 1.675 sec/batch)
11:55: step 800, accuracy = 0.65, loss = 68.29 (154.2 examples/sec; 1.660 sec/batch)
11:58: step 900, accuracy = 0.36, loss = 32.56 (155.0 examples/sec; 1.651 sec/batch)
12:01: step 1000, accuracy = 0.56, loss = 54.97 (150.0 examples/sec; 1.707 sec/batch)
12:04: step 1100, accuracy = 0.62, loss = 63.46 (155.3 examples/sec; 1.648 sec/batch)
12:06: step 1200, accuracy = 0.36, loss = 31.22 (154.3 examples/sec; 1.659 sec/batch)
12:09: step 1300, accuracy = 0.57, loss = 57.37 (154.7 examples/sec; 1.654 sec/batch)
12:12: step 1400, accuracy = 0.57, loss = 57.37 (154.9 examples/sec; 1.653 sec/batch)
12:15: step 1500, accuracy = 0.31, loss = 25.31 (151.1 examples/sec; 1.695 sec/batch)
12:17: step 1600, accuracy = 0.57, loss = 58.76 (154.5 examples/sec; 1.657 sec/batch)
12:20: step 1700, accuracy = 0.55, loss = 56.54 (157.2 examples/sec; 1.628 sec/batch)
12:23: step 1800, accuracy = 0.34, loss = 28.18 (156.3 examples/sec; 1.637 sec/batch)
12:26: step 1900, accuracy = 0.64, loss = 67.77 (155.4 examples/sec; 1.647 sec/batch)
12:28: step 2000, accuracy = 0.50, loss = 48.81 (157.1 examples/sec; 1.630 sec/batch)
12:31: step 2100, accuracy = 0.30, loss = 23.36 (158.2 examples/sec; 1.618 sec/batch)
12:34: step 2200, accuracy = 0.66, loss = 70.25 (156.4 examples/sec; 1.637 sec/batch)
12:37: step 2300, accuracy = 0.53, loss = 53.08 (157.0 examples/sec; 1.630 sec/batch)
12:39: step 2400, accuracy = 0.30, loss = 24.90 (156.5 examples/sec; 1.636 sec/batch)
12:42: step 2500, accuracy = 0.67, loss = 72.63 (158.5 examples/sec; 1.615 sec/batch)
12:45: step 2600, accuracy = 0.49, loss = 47.38 (156.5 examples/sec; 1.636 sec/batch)
12:48: step 2700, accuracy = 0.32, loss = 26.96 (155.3 examples/sec; 1.648 sec/batch)
12:50: step 2800, accuracy = 0.69, loss = 74.03 (157.8 examples/sec; 1.623 sec/batch)
12:53: step 2900, accuracy = 0.47, loss = 44.35 (157.3 examples/sec; 1.627 sec/batch)
12:56: step 3000, accuracy = 0.36, loss = 32.07 (159.3 examples/sec; 1.607 sec/batch)
12:58: step 3100, accuracy = 0.65, loss = 69.93 (159.2 examples/sec; 1.608 sec/batch)
13:01: step 3200, accuracy = 0.45, loss = 42.89 (158.9 examples/sec; 1.611 sec/batch)
13:04: step 3300, accuracy = 0.43, loss = 40.28 (157.5 examples/sec; 1.625 sec/batch)
13:07: step 3400, accuracy = 0.63, loss = 65.13 (156.6 examples/sec; 1.635 sec/batch)
13:09: step 3500, accuracy = 0.43, loss = 40.59 (157.6 examples/sec; 1.624 sec/batch)
13:12: step 3600, accuracy = 0.40, loss = 37.83 (157.1 examples/sec; 1.630 sec/batch)`
这是第一个脚本data_converter.py,它将所有图像拟合到一个相同大小的图像并产生一些扭曲。然后,脚本将每个图像写入带有标签的二进制文件 - [image_label_byte,image_converted_to_bytearray]
import cv2
import numpy as np
import os
import random
def __resize_and_crop(img, size):
shape = np.shape(img)
if len(shape) == 3:
h = shape[0]
w = shape[1]
if h < w:
scale = h / size
shape = (size, int(w / scale))
else:
scale = w / size
shape = (int(h / scale), size)
img = cv2.resize(img, shape)
h = shape[0]
w = shape[1]
if h < w:
delta = (w - size) / 2
img = img[delta:(w - delta), 0:size]
else:
delta = (h - size) / 2
img = img[0:size, delta:(h - delta)]
return img
else:
print("Wrong shape!")
return None
def __randomly_rotate(img):
degrees = [90, 180, 270]
size = img.shape[0]
rotation_matrix = cv2.getRotationMatrix2D((size / 2, size / 2), degrees[random.randint(0, 2)], 1.0)
return cv2.warpAffine(img, rotation_matrix, (size, size))
def preprocess_image(root_folder, outputpath, size, distort):
"""
Converts images from 'root_folder' into a byte array where first byte is
a label 0 for cats and 1 for dogs. Afterwards, writes converted images into
a file with 'outputpath'.
"""
with open(outputpath, 'w') as output_file:
output_file.write('')
counter = 0
with open(outputpath, 'br+') as output_file:
for root, dirs, filenames in os.walk(root_folder):
print(len(filenames))
for fname in filenames:
if fname.endswith('.jpg'):
img_path = os.path.join(root, fname)
img = cv2.imread(img_path)
resized = __resize_and_crop(img, size)
label = 0 if fname.split('.')[0] == 'cat' else 1
images = [resized]
if distort:
images.append(__randomly_rotate(resized))
images.append(cv2.flip(resized, 1))
for indx, i in enumerate(images):
b_arr = bytearray([label]) + bytearray(np.array(i).flatten())
if len(b_arr) == size * size * 3 + 1:
output_file.write(b_arr)
counter += 1
if counter % 1000 == 0:
print(counter)
print("Total images: " + str(counter))
if __name__ == '__main__':
preprocess_image('data/train', 'train_batch', 64, True)
第二个脚本是cnn_input,它提供数据到CNN:
import tensorflow as tf
IMAGE_SIZE = 64
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
def read_image(filename_queue):
class ImageRecord(object):
pass
result = ImageRecord()
label_bytes = 1
result.height = IMAGE_SIZE
result.width = IMAGE_SIZE
result.depth = 3
image_bytes = result.height * result.width * result.depth
record_bytes = label_bytes + image_bytes
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
result.key, value = reader.read(filename_queue)
record_bytes = tf.decode_raw(value, tf.uint8)
result.label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32)
depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]),
[result.depth, result.height, result.width])
result.uint8image = tf.transpose(depth_major, [1, 2, 0])
return result
def _generate_image_and_label_batch(image, label, min_queue_examples,
batch_size, shuffle):
num_preprocess_threads = 4
if shuffle:
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
else:
images, label_batch = tf.train.batch(
[image, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * batch_size)
print("Finished shuffle batch")
return images, label_batch
def inputs(data_files, batch_size, train):
for f in data_files:
if not tf.gfile.Exists(f):
raise ValueError('Failed to find file: ' + f)
filename_queue = tf.train.string_input_producer(data_files)
image = read_image(filename_queue)
reshaped_image = tf.cast(image.uint8image, tf.float32)
if train:
reshaped_image = tf.image.random_brightness(reshaped_image, max_delta=63)
reshaped_image = tf.image.random_contrast(reshaped_image, lower=0.2, upper=1.8)
reshaped_image = tf.image.per_image_whitening(reshaped_image)
num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN if train else NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
min_fraction_of_examples_in_queue = 0.2
min_queue_examples = int(num_examples_per_epoch *
min_fraction_of_examples_in_queue)
print('Filling queue with %d images. '
'This will take a few minutes.' % min_queue_examples)
# Generate a batch of images and labels by building up a queue of examples.
return _generate_image_and_label_batch(reshaped_image, image.label,
min_queue_examples, batch_size,
shuffle=train)
第三个是CNN的一部分:
import datetime
import tensorflow as tf
import time
import numpy as np
import os
import cnn_input
tf.app.flags.DEFINE_integer('batch_size', 256, """Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('train_dir', 'train/',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 60000, '')
FLAGS = tf.flags.FLAGS
def __conv2d(x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def __maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
def cnn(x):
with tf.variable_scope('conv1'):
w = tf.Variable(tf.random_normal([5, 5, 3, 64]))
b = tf.Variable(tf.random_normal([64]))
conv1 = __conv2d(x, w, b)
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
with tf.variable_scope('conv2'):
w = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b = tf.Variable(tf.random_normal([64]))
conv2 = __conv2d(norm1, w, b)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
dim = reshape.get_shape()[1].value
w = tf.Variable(tf.random_normal([dim, 1024], stddev=0.04))
b = tf.Variable(tf.random_normal([1024]))
local3 = tf.nn.relu(tf.matmul(reshape, w) + b, name=scope.name)
# local4
with tf.variable_scope('local4') as scope:
w = tf.Variable(tf.random_normal([1024, 192], stddev=0.04))
b = tf.Variable(tf.random_normal([192]))
local4 = tf.nn.relu(tf.matmul(local3, w) + b, name=scope.name)
# softmax, i.e. softmax(WX + b)
with tf.variable_scope('softmax_linear') as scope:
w = tf.Variable(tf.random_normal([192, 1], stddev=1 / 192.0))
b = tf.Variable(tf.random_normal([1]))
softmax_linear = tf.add(tf.matmul(local4, w), b, name=scope.name)
return tf.nn.sigmoid(softmax_linear)
def loss(predicted, labels):
labels = tf.cast(labels, tf.float32)
c = tf.nn.l2_loss(predicted - labels)
tf.add_to_collection('losses', c)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def optimize(total_loss, global_step):
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.005)
optimizer.minimize(total_loss, global_step)
return tf.no_op()
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
images, labels = cnn_input.inputs(['data/train_batch'], tf.app.flags.FLAGS.batch_size, True)
predicted = cnn(images)
cost = loss(predicted, labels)
train_op = optimize(cost, global_step)
saver = tf.train.Saver(tf.all_variables())
accuracy = tf.reduce_mean(tf.abs(predicted - tf.cast(labels, tf.float32)))
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
tf.train.start_queue_runners(sess)
for step in range(FLAGS.max_steps):
start_time = time.time()
_, loss_value, acc = sess.run([train_op, cost, accuracy])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 100 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, accuracy = %.2f, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.datetime.now(), step, acc, loss_value,
examples_per_sec, sec_per_batch))
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
if __name__ == '__main__':
train()