无法提高Oxford-102上的AlexNet准确性(Tensorflow)

时间:2018-02-04 21:03:17

标签: python tensorflow machine-learning

您好我尝试使用预训练的权重实现AlexNet 而不使用。我尝试在 Oxford-102 数据集上训练网络,但我在整个过程中一直保持0.9%的准确率并且更改参数没有帮助,在代码下方有人可以帮助我吗?

我正在关注this教程

我将给定的测试集(更大)切换为训练集,并给予训练以用作测试集。我使用Gradient Descent作为优化器。

我构建的AlexNet与给定的文章完全相同,我计算精度的方式可能有问题吗?

以下是我加载数据的方式

import os
import sys
import warnings

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from skimage.io import imread
from skimage.transform import resize

from scipy.io import loadmat

import tensorflow as tf

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

set_ids = loadmat('setid.mat')

set_ids

test_ids = set_ids['trnid'].tolist()[0]
train_ids = set_ids['tstid'].tolist()[0]

def indexes_processing(int_list):
    returned_list = []
    for index, element in enumerate(int_list):
        returned_list.append(str(element))
    for index, element in enumerate(returned_list):
        if int(element) < 10:
            returned_list[index] = '0000' + element
        elif int(element) < 100:
            returned_list[index] = '000' + element
        elif int(element) < 1000:
            returned_list[index] = '00' + element
        else:
            returned_list[index] = '0' + element
    return returned_list

raw_train_ids = indexes_processing(train_ids)
raw_test_ids = indexes_processing(test_ids)

train_images = []
test_images = []
train_labels = []
test_labels = []

image_labels = (loadmat('imagelabels.mat')['labels'] - 1).tolist()[0]

labels = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen ', 'watercress', 'canna lily', 'hippeastrum ', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']

labels[16]

def one_hot_encode(labels):
    '''
    One hot encode the output labels to be numpy arrays of 0s and 1s
    '''
    out = np.zeros((len(labels), 102))
    for index, element in enumerate(labels):
        out[index, element] = 1
    return out

class ProcessImage():

    def __init__(self):           
        self.i = 0

        self.training_images = np.zeros((6149, 227, 227, 3))
        self.training_labels = None

        self.testing_images = np.zeros((1020, 227, 227, 3))
        self.testing_labels = None

    def set_up_images(self):
        print('Processing Training Images...')
        i = 0
        for element in raw_train_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.training_images[i] = img
            i += 1
        print('Done!')

        i = 0
        print('Processing Testing Images...')
        for element in raw_test_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.testing_images[i] = img
            i += 1
        print('Done!')

        print('Processing Training and Testing Labels...')
        encoded_labels = one_hot_encode(image_labels)
        for train_id in train_ids:
            train_labels.append(encoded_labels[train_id - 1])
        for test_id in test_ids:
            test_labels.append(encoded_labels[test_id - 1])
        self.training_labels = train_labels
        self.testing_labels = test_labels
        print('Done!')

    def next_batch(self, batch_size):
        x = self.training_images[self.i:self.i + batch_size]
        y = self.training_labels[self.i:self.i + batch_size]
        self.i = (self.i + batch_size) % len(self.training_images)
        return x, y

image_processor = ProcessImage()

image_processor.set_up_images()

我的图表

# Helper Functions for AlexNet
def init_weights(filter_height, filter_width, num_channels, num_filters):
    init_random_dist = tf.truncated_normal([filter_height, filter_width, num_channels, num_filters], stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def conv2d(x, W, stride_y, stride_x, padding='SAME'):
    return tf.nn.conv2d(x, W, strides=[1,stride_y,stride_x,1], padding=padding)

def max_pool(x, filter_height, filter_width, stride_y, stride_x, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1,filter_height,filter_width,1], strides=[1,stride_y,stride_x,1], padding=padding)

def conv_layer(input_x, filter_height, filter_width, num_channels, num_filters, stride_y, stride_x, padding='SAME', groups=1):
    W = init_weights(filter_height, filter_width, int(num_channels/groups), num_filters)
    b = init_bias([num_filters])
    convolve = lambda i, k: tf.nn.conv2d(i, k, strides=[1,stride_y,stride_x,1], padding=padding)
    if groups == 1:
        conv = convolve(input_x, W)
    else:
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=input_x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
        conv = tf.concat(axis=3, values=output_groups)
    bias = tf.reshape(tf.nn.bias_add(conv, b), tf.shape(conv))
    return tf.nn.relu(bias)

def lrn(x, radius, alpha, beta, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, bias=bias)

def fully_connected(input_layer, num_in, num_out, relu=True):
    W = tf.truncated_normal([num_in, num_out], stddev=0.1)
    W = tf.Variable(W)
    b = init_bias([num_out])
    out = tf.nn.xw_plus_b(input_layer, W, b)
    if relu:
        return tf.nn.relu(out)
    else:
        return out

def drop_out(x, keep_prob):
    return tf.nn.dropout(x, keep_prob=keep_prob)

x = tf.placeholder(tf.float32, shape=[None, 227, 227, 3])
y_true = tf.placeholder(tf.float32, shape=[None, 102])
keep_prob = tf.placeholder(tf.float32)

# Create the graph

# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv_1 = conv_layer(x, filter_height=11, filter_width=11, num_channels=3, num_filters=96, stride_y=4, stride_x=4, padding='VALID')
norm_1 = lrn(conv_1, radius=2, alpha=1e-05, beta=0.75)
pool_1 = max_pool(norm_1, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
pool_1.get_shape()

# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool
conv_2 = conv_layer(pool_1, filter_height=5, filter_width=5, num_channels=96, num_filters=256, stride_y=1, stride_x=1, groups=2)
norm_2 = lrn(conv_2, radius=2, alpha=1e-05, beta=0.75)
pool_2 = max_pool(norm_2, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 3rd Layer: Conv (w ReLu)
conv_3 = conv_layer(pool_2, filter_height=3, filter_width=3, num_channels=256, num_filters=384, stride_y=1, stride_x=1)

# 4th Layer: Conv (w ReLu)
conv_4 = conv_layer(conv_3, filter_height=3, filter_width=3, num_channels=384, num_filters=384, stride_y=1, stride_x=1, groups=2)

# 5th Layer: Conv (w ReLu) -> Pool
conv_5 = conv_layer(conv_4, filter_height=3, filter_width=3, num_channels=384, num_filters=256, stride_y=1, stride_x=1, groups=2)
pool_5 = max_pool(conv_5, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
pool_6_flat = tf.reshape(pool_5, [-1, 6*6*256])
full_6 = fully_connected(pool_6_flat, 6*6*256, 4096)
full_6_dropout = drop_out(full_6, keep_prob)

# 7th Layer: FC (w ReLu) -> Dropout
full_7 = fully_connected(full_6_dropout, 4096, 4096)
full_7_dropout = drop_out(full_7, keep_prob)

# 8th Layer: FC and return unscaled activations
y_pred = fully_connected(full_7_dropout, 4096, 102, relu=False)

损失函数和优化程序

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cross_entropy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

运行会话

with tf.Session() as sess:
    sess.run(init)
    for i in range(15000):
        batches = image_processor.next_batch(128)
        sess.run(train, feed_dict={x:batches[0], y_true:batches[1], keep_prob:0.5})

        if (i%1000 == 0):
            print('On Step {}'.format(i))
            print('Accuracy is: ')
            matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
            acc = tf.reduce_mean(tf.cast(matches, tf.float32))

            print(sess.run(acc, feed_dict={x:image_processor.testing_images, y_true:image_processor.testing_labels, keep_prob:1.0}))

            print('Saving model...')
            saver.save(sess, 'models/model_iter.ckpt', global_step=i)
            print('Saved at step: {}'.format(i))
            print('\n')
    print('Saving final model...')
    saver.save(sess, 'models/model_final.ckpt')
    print('Saved')

我一遍又一遍地(在整个15000个时期内)得到相同的准确度0.00903922 无论我多么努力地改变参数,我甚至试图将图像的大小从224更改为227,但它仍然给出了相同的精度0.00903922。

1 个答案:

答案 0 :(得分:0)

你的准确性对我来说很好,虽然每次在循环中定义都有点奇怪。

让我困扰的是你只训练十步的事实。您的训练集似乎包含6149个图像,并且您正在批量训练128个图像。这样做了十次,你已经看过6000张图片中的1280张了 - 太少了,无法看到效果的影响。

相反,你想要查看所有训练数据 - 大约48个训练步骤,或者一个纪元 - 你最好想要这样做一些时间。时期的确切数量取决于多种因素,如数据和网络,但您至少需要10个时期 - 因此,480个训练步骤。