AlexNet并没有收敛于张量流

时间:2017-10-23 18:17:00

标签: python tensorflow

我最近决定从MATLAB迁移到Tensorflow。我首先在Tensorflow中创建了ALexNet模型。我想从头开始训练我自己的数据上的AlexNet。 (我使用matconvnet在MATLAB中成功完成了这项工作)。但是,我的张量流模型永远不会收敛。损失和准确性保持不变。 我将数据保存在tfrecords文件中并读取它,并可以验证数据是否正确加载。但我无法理解模型deosnt似乎训练的原因。 我在Ubuntu 16.04上使用Tensorflow 1.2.0和Python 2.7。

这是我的代码:Where' val.tfrecords'是包含我的培训日期的tfrecords文件

import numpy as np
import matplotlib.pyplot as plt
import sys
import cv2
from random import shuffle
import random as rand
import glob
import tensorflow as tf
import os
import scipy.misc
import math

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# Initialize the weights with random numbers
def W_init (w_height,w_width,num_channels,num_filters,method = 'normal', W_name = 'w'):
    if method is 'normal': std =1
    if method is 'xavier': std = std = np.sqrt(2./(w_width*w_height*num_channels))
    return(tf.Variable(tf.truncated_normal([w_height,w_width,num_channels,num_filters], mean=0.0, stddev=std),name = W_name,trainable=True))



# Create wrappers for simplicity

# Convolution layer
def conv_layer(x,W,b,stride,pad):
    x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], "CONSTANT")
    y = tf.nn.conv2d(x, W, strides=[1,stride,stride,1], padding='VALID')
    y = tf.nn.bias_add(y,b)
    return y

# Pooling layer
def pool_layer(x,k,stride,method):
    if method is 'max':
        y = tf.nn.max_pool(x, ksize = [1,k,k,1], strides = [1,stride,stride,1], padding='VALID')
    if method is 'avg':
        y = tf.nn.avg_pool(x, ksize = [1,k,k,1], strides = [1,stride,stride,1], padding='VALID')
    return y 


# Create a model AlexNet
def AlexNet(x,y_):

    # input
    x = tf.reshape(x,shape =[-1,224,224,3] )

    # conv1 + relu1
    conv1 = tf.nn.relu(conv_layer(x,W=W_init (11,11,3,96,method = 'xavier',W_name ='w1'), b=tf.Variable(tf.zeros([96]),name='b1',trainable=True),stride=4,pad= 3))
    # maxpool1
    max1 = pool_layer(conv1,k=2,stride=2,method='max')


    # conv2 + relu2
    conv2 = tf.nn.relu(conv_layer(max1,W=W_init (5,5,96,256,method = 'xavier',W_name ='w2'), b=tf.Variable(tf.zeros([256]),name='b2',trainable=True),stride=1,pad= 2))
    # maxpool2
    max2 = pool_layer(conv2,k=2,stride=2,method='max')

    # conv3 + relu3
    conv3 = tf.nn.relu(conv_layer(max2,W=W_init (3,3,256,384,method = 'xavier',W_name ='w3'), b=tf.Variable(tf.zeros([384]),name='b3',trainable=True),stride=1,pad= 1))

    # conv4 + relu4
    conv4 = tf.nn.relu(conv_layer(conv3,W=W_init (3,3,384,384,method = 'xavier',W_name ='w4'), b=tf.Variable(tf.zeros([384]),name='b4',trainable=True),stride=1,pad= 1))

    # conv5 + relu5
    conv5 = tf.nn.relu(conv_layer(conv4,W=W_init (3,3,384,256,method = 'xavier',W_name ='w5'), b=tf.Variable(tf.zeros([256]),name='b5',trainable=True),stride=1,pad= 1))
    # maxpool5
    max5 = pool_layer(conv5,k=2,stride=2,method='max')

    # flatten the convolution output to use in fc layer
    max5_size = np.product([s.value for s in max5.get_shape()[1:]])
    max5_flat = tf.reshape(max5, [-1, max5_size ])    


    #  fc6 + relu6 +drop6
    fc6 = tf.nn.relu(tf.matmul(max5_flat,tf.Variable(tf.truncated_normal([max5_size,4096],mean=0.0, stddev=2./math.sqrt(max5_size)),name='w6',trainable=True))+  tf.Variable(tf.zeros([4096]),name='b6',trainable=True) )
    drop6 = tf.nn.dropout(fc6, 0.5)

    #  fc7 + relu7 +drop7
    fc7 = tf.nn.relu(tf.matmul(drop6, tf.Variable(tf.truncated_normal([4096,4096], mean=0.0, stddev=2./math.sqrt(4096)),name='w7',trainable=True))+ tf.Variable(tf.zeros([4096]),name='b7',trainable=True) )
    drop7 = tf.nn.dropout(fc7, 0.5)

    #  fc8
    fc8 = tf.matmul(drop7, tf.Variable(tf.truncated_normal([4096,23], mean=0.0, stddev=2./math.sqrt(23)),name='w8',trainable=True))+ tf.Variable(tf.zeros([23]),name='b8',trainable=True) 

    y = tf.nn.softmax(fc8) 
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))

    # Evaluate model
    correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))    
    optimizer = tf.train.AdamOptimizer(0.01).minimize(cost)


    return cost,accuracy



train_filename = '/home/Documents/MyData/val.tfrecords'

# Model parameters
learning_rate = 0.01
Nimages = 1087
mean_image = np.load('mean_image_256.npy') # This is the 
n_input = 224*224*3 # img shape: 224*224*3
n_classes = 23
batch_size = 200 
Num_epochs = 1000
display_step = 3*batch_size

## Read the tfrecord file we've just created #############################################
# 1- Create a list of filenames: In this case there's only a single file 
train_data_path = train_filename


# TF graph Inputs and Placeholders
x_ = tf.placeholder(tf.float32, [None,224,224,3])
y_ = tf.placeholder(tf.float32, [None,n_classes])

# Define out loss and optimizer

with tf.Session() as sess:
    feature = {'train/image' : tf.FixedLenFeature([], tf.string),
               'train/label' : tf.FixedLenFeature([], tf.int64),
               'train/height': tf.FixedLenFeature([], tf.int64),
               'train/width' : tf.FixedLenFeature([], tf.int64)}

    # 2- Create a queue to hold filenames: To do so, we use tf.train.string_input_producer function which hold filenames in a FIFO queue.
    # it gets the list of filnames. It also has some optional arguments including  num_epochs which indicates the number of epoch you want to to load the data,
    # and shuffle which indicates whether to suffle the filenames in the list or not. It is set to True by default.

    train_filename_queue = tf.train.string_input_producer([train_data_path], num_epochs=None)
    val_filename_queue   = tf.train.string_input_producer([val_data_path], num_epochs=None)

    # 3- Define a reader and read the next record
    # For files of TFRecords we need to define a TFRecordReader with reader = tf.TFRecordReader().
    # Now, the reader returns the next record using: reader.read(filename_queue)
    reader = tf.TFRecordReader()

    _, train_serialized_example = reader.read(train_filename_queue)
    _,  val_serialized_example =  reader.read(val_filename_queue)

    # 4- Decode the record read by the reader
    # A decoder is needed to decode the record read by the reader.
    # In case of using TFRecords files the decoder should be tf.parse_single_example. it takes a serialized Example and a dictionary
    # which maps feature keys to FixedLenFeature or VarLenFeature values
    # and returns a dictionary which maps feature keys to Tensor values: features = tf.parse_single_example(serialized_example, features=feature)
    train_features = tf.parse_single_example(train_serialized_example, features=feature)
    val_features = tf.parse_single_example(val_serialized_example, features=feature)

    # 5- Convert the image data from string back to the numbers
    # tf.decode_raw(bytes, out_type) takes a Tensor of type string and convert it to typeout_type.
    # However, for labels which have not been converted to string, we just need to cast them using tf.cast(x, dtype)
    train_image = tf.decode_raw(train_features['train/image'], tf.float32)

    # 6- Cast label data into int32 and Reshape image data into the original shape
    train_label = tf.cast(train_features['train/label'], tf.int32)
    train_label = tf.one_hot(train_label, n_classes)
    train_height = tf.cast(train_features['train/height'], tf.int32)
    train_width = tf.cast(train_features['train/width'], tf.int32)
    train_image = tf.reshape(train_image, tf.stack([train_height, train_width, 3]))



    # 7- Any preprocessing here ...
    train_image = tf.image.central_crop(train_image, 1)
    train_image = tf.image.resize_images(train_image, [256,256])
    train_image = tf.random_crop(train_image, [224, 224, 3])
    train_image = tf.image.random_flip_left_right(train_image)





    # 8- Creates batches by randomly shuffling tensors
    # Batching: Another queue is needed to create batches from the examples. You can create the batch queue using:
    # tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
    # where capacity is the maximum size of queue, min_after_dequeue is the minimum size of queue after dequeue,
    # and num_threads is the number of threads enqueuing examples.
    # Using more than one thread, it comes up with a faster reading.
    # The first argument in a list of tensors which you want to create batches from.
    train_images, train_labels = tf.train.shuffle_batch([train_image, train_label], batch_size=batch_size, capacity=3*batch_size, num_threads=1, min_after_dequeue=batch_size,allow_smaller_final_batch=True)




    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    cost,accuracy = AlexNet(x_,y_)

    # 9- Initialize all global and local variables

    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess.run(init_op) 
    writer = tf.summary.FileWriter(logdir = '/tmp/tf/foo', graph=tf.get_default_graph())
    writer.flush()

    # 10- Create a coordinator and run all QueueRunner objects
    # Filing the example queue: Some functions of tf.train such as tf.train.shuffle_batch add tf.train.
    # QueueRunner objects to your graph. Each of these objects hold a list of enqueue op for a queue 
    # to run in a thread. Therefore, to fill a queue you need to call tf.train.start_queue_runners
    # which starts threades for all the queue runners in the graph. 
    # However, to manage these threads you need a tf.train.Coordinator to terminate the threads at the proper time.



    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()


    train_acc = np.zeros(Num_epochs)
    val_acc = np.zeros(Num_epochs)

    for epoch in range(Num_epochs):
        im_counter = 0

        for iter in range(Nimages/batch_size):
            im_counter+=batch_size
            # Get a training image batch  and  subtract mean
            t_img, t_lbl = sess.run([train_images, train_labels])
            t_img = (t_img - scipy.misc.imresize(mean_image, (224,224)))/255
            #t_lbl = to_onehot(t_lbl,n_classes)



            # Run session

            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={x_: t_img, y_: t_lbl})

            # Display training results
            if (im_counter%display_step)==0:
                print "epoch " + str(epoch) +  " Processed images: " + str(im_counter) + "/" +str(Nimages)+", Minibatch Loss= " + \
                "{:.6f}".format(loss) + ", Accuracy= " + \
                "{:.5f}".format(acc) 

        # After an epoch is trained, save model and run validation
        saver.save(sess, 'AlexNet_saved_model.ckpt')
        train_acc[epoch] = acc

        im_counter = 0


    print "Optimization Finished!"
    plt.plot(train_acc)
    plt.title('Training accuracy')
    plt.show()            
    # Stop the threads
    coord.request_stop()

    # Wait for threads to stop
    coord.join(threads) 
    sess.close()  

1 个答案:

答案 0 :(得分:0)

我在代码中看到至少一个错误:

y = tf.nn.softmax(fc8) 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))

在致电tf.nn.softmax_cross_entropy_with_logits之前,您不应该应用 softmax documentation清楚地说明了这一点:

  

警告:此操作需要未缩放的日志,因为它执行softmax   在logits内部提高效率。不要把这个叫做   输出softmax,因为它会产生不正确的结果。