AlexNet中非常大的损失价值

时间:2017-06-29 04:30:33

标签: machine-learning tensorflow deep-learning

实际上我正在使用AlexNet将我的图像分为两组,我在一批60张图像中将图像输入到模型中,每批次后我得到的损失是6到7位数大()对于ex.1428529.0),在这里我很困惑,为什么我的损失是如此大的价值,因为在MNIST数据集上我得到的损失与此相比非常小。任何人都可以解释我为什么会得到这么大的损失价值 提前致谢; - )

以下是代码: -

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os

img_size = 227
num_channels = 1
img_flat_size = img_size * img_size 
num_classes = 2
drop = 0.5

x = tf.placeholder(tf.float32,[None,img_flat_size])
y = tf.placeholder(tf.float32,[None,num_classes])
drop_p = tf.placeholder(tf.float32)

def new_weight(shape):
    return tf.Variable(tf.random_normal(shape))

def new_bias(size):
    return tf.Variable(tf.random_normal(size))

def new_conv(x,num_input_channels,filter_size,num_filters,stride,padd="SAME"):
    shape = [filter_size,filter_size,num_input_channels,num_filters]

    weight = new_weight(shape)
    bias = new_bias([num_filters])

    conv = tf.nn.conv2d(x,weight,strides=[1,stride,stride,1],padding=padd)

    conv = tf.nn.bias_add(conv,bias)

    return tf.nn.relu(conv)

def new_max_pool(x,k,stride):
    max_pool = tf.nn.max_pool(x,ksize=[1,k,k,1],strides=[1,stride,stride,1],padding="VALID")
    return max_pool

def flatten_layer(layer):
    layer_shape = layer.get_shape()
    num_features = layer_shape[1:4].num_elements()

    flat_layer = tf.reshape(layer,[-1,num_features])

    return flat_layer,num_features

def new_fc_layer(x,num_input,num_output):
    weight = new_weight([num_input,num_output])
    bias = new_bias([num_output])

    fc_layer = tf.matmul(x,weight) + bias

    return fc_layer

def lrn(x, radius, alpha, beta, bias=1.0):
    """Create a local response normalization layer."""
    return tf.nn.local_response_normalization(x, depth_radius=radius,
                                              alpha=alpha, beta=beta,
                                              bias=bias)


def AlexNet(x,drop,img_size):
    x = tf.reshape(x,shape=[-1,img_size,img_size,1])

    conv1 = new_conv(x,num_channels,11,96,4,"VALID")
    max_pool1 = new_max_pool(conv1,3,2)
    norm1 = lrn(max_pool1, 2, 2e-05, 0.75)

    conv2 = new_conv(norm1,96,5,256,1)
    max_pool2 = new_max_pool(conv2,3,2)
    norm2 = lrn(max_pool2, 2, 2e-05, 0.75)    

    conv3 = new_conv(norm2,256,3,384,1)

    conv4 = new_conv(conv3,384,3,384,1)

    conv5 = new_conv(conv4,384,3,256,1)

    max_pool3 = new_max_pool(conv5,3,2)

    layer , num_features = flatten_layer(max_pool3)

    fc1 = new_fc_layer(layer,num_features,4096)
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1,drop)

    fc2 = new_fc_layer(fc1,4096,4096)
    fc2 = tf.nn.relu(fc2)
    fc2 = tf.nn.dropout(fc2,drop)

    out = new_fc_layer(fc2,4096,2)

    return out #, tf.nn.softmax(out)

def read_and_decode(tfrecords_file, batch_size):
    '''read and decode tfrecord file, generate (image, label) batches
    Args:
        tfrecords_file: the directory of tfrecord file
        batch_size: number of images in each batch
    Returns:
        image: 4D tensor - [batch_size, width, height, channel]
        label: 1D tensor - [batch_size]
    '''
    # make an input queue from the tfrecord file
    filename_queue = tf.train.string_input_producer([tfrecords_file])

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    img_features = tf.parse_single_example(
                                        serialized_example,
                                        features={
                                               'label': tf.FixedLenFeature([], tf.int64),
                                               'image_raw': tf.FixedLenFeature([], tf.string),
                                               })
    image = tf.decode_raw(img_features['image_raw'], tf.uint8)

    ##########################################################
    # you can put data augmentation here, I didn't use it
    ##########################################################
    # all the images of notMNIST are 28*28, you need to change the image size if you use other dataset.

    image = tf.reshape(image, [227, 227])
    label = tf.cast(img_features['label'], tf.int32)    
    image_batch, label_batch = tf.train.batch([image, label],
                                                batch_size= batch_size,
                                                num_threads= 1, 
                                                capacity = 6000)
    return tf.reshape(image_batch,[batch_size,227*227*1]), tf.reshape(label_batch, [batch_size])

pred  = AlexNet(x,drop_p,img_size) #pred

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))

optimiser = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)

correct_pred = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))

cost = tf.summary.scalar('loss',loss)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    merge_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter('./AlexNet',graph = tf.get_default_graph())

    tf_record_file = 'train.tfrecords'
    x_val ,y_val = read_and_decode(tf_record_file,20)
    y_val = tf.one_hot(y_val,depth=2,on_value=1,off_value=0)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    x_val = x_val.eval()
    y_val = y_val.eval()
    epoch = 2
    for i in range(epoch):
        _, summary= sess.run([optimiser,merge_summary],feed_dict={x:x_val,y:y_val,drop_p:drop})
        summary_writer.add_summary(summary,i)
        loss_a,accu = sess.run([loss,accuracy],feed_dict={x:x_val,y:y_val,drop_p:1.0})
        print "Epoch "+str(i+1) +', Minibatch Loss = '+ \
            "{:.6f}".format(loss_a) + ', Training Accuracy = '+ \
            '{:.5f}'.format(accu)

    print "Optimization Finished!"

    tf_record_file1 = 'test.tfrecords'
    x_v ,y_v = read_and_decode(tf_record_file1,10)
    y_v = tf.one_hot(y_v,depth=2,on_value=1,off_value=0)
    coord1 = tf.train.Coordinator()
    threads1 = tf.train.start_queue_runners(coord=coord1)
    x_v = sess.run(x_v)
    y_v = sess.run(y_v)
    print "Testing Accuracy : "
    print sess.run(accuracy,feed_dict={x:x_v,y:y_v,drop_p:1.0})

    coord.request_stop()
    coord.join(threads)
    coord1.request_stop()
    coord1.join(threads1)

1 个答案:

答案 0 :(得分:0)

看看confusion matrix是什么。它是一个绩效评估员。此外,您应该比较您的精确度与召回率。精确度是阳性预测的准确性,召回率是分类器正确检测到的阳性实例的比率。通过结合精度和召回,您可以得到F_1 score,它会继续评估模型的问题。

我建议您使用Scikit-Learn和TensorFlow 选择动手机器学习文本。这是一本真正全面的书,涵盖了我上面详细描述的内容。