Question

此VGGNet是使用Tensorflow框架从头开始实现的，其中所有层均在代码中定义。我在这里面临的主要问题是，即使我等待了相当长的时间，训练准确性（更不用说验证准确性）还是会提高。我怀疑现在很少有问题引起此问题。首先，我认为该网络对于cifar-10数据集而言太深和太宽。其次，从整个数据集中提取数据批处理并不是穷举，也就是说，在整个数据集中一遍又一遍地使用批处理选择，而不会消除在进行中的时代中选择的那些示例。

但是，经过数小时和数天的实验，我仍然无法使此代码正常工作。

我希望我可以提取有问题的代码部分来提出问题，但是由于我无法在此处找到确切的部分，因此让我上载整个代码。

import os
import sys
import tensorflow as tf
import numpy as np
import scipy as sci
import math
import matplotlib.pyplot as plt
import time
import random
import imageio
import pickle
import cv2
import json
from pycocotools.coco import COCO


class SVGG:
    def __init__(self, num_output_classes):
        self.input_layer_size = 0
        self.num_output_classes = num_output_classes

        # Data
        self.X = []
        self.Y = []

        self.working_x = []
        self.working_y = []

        self.testX = []
        self.testY = []

        # hard coded for now. Have to change.
        self.input_data_size = 32 # 32 X 32
        self.input_data_size_flat = 3072 # 32 X 32 X 3 == 3072
        self.num_of_channels = 3 # 3 for colour image

        self.input_data_size = 32  # 32 X 32
        self.input_data_size_flat = self.input_data_size * self.input_data_size  # 32 X 32 X 3 == 3072
        self.num_of_channels = 3  # 3 for colour image

        self.convolution_layers = []
        self.convolution_weights = []
        self.fully_connected_layers = []
        self.fully_connected_weights = []

    def feed_examples(self, input_X, input_Y):
        """
        Feed examples to be learned
        :param input_X: Training dataset X
        :param input_Y: Traning dataset label
        :return:
        """

        # Take first input and calculate its size
        # hard code size
        self.X = input_X
        self.Y = input_Y
        self.input_data_size_flat = len(self.X[0]) * len(self.X[0][0]) * len(self.X[0][0][0])

    def feed_test_data(self, test_X, test_Y):
        self.testX = test_X
        self.testY = test_Y

    def run(self):
        x = tf.placeholder(tf.float32, [None, self.input_data_size_flat], name='x')
        x_data = tf.reshape(x, [-1, self.input_data_size, self.input_data_size, 3])

        y_true = tf.placeholder(tf.float32, [None, self.num_output_classes], name='y_true')
        y_true_cls = tf.argmax(y_true, axis=1)


        """
        VGG layers
        """


        # Create layers
        ######################################## Input Layer ########################################
        input_layer, input_weight = self.create_convolution_layer(x_data, num_input_channels=3, filter_size=3, num_filters=64,
                                                        use_pooling=True) # False

        ######################################## Convolutional Layer ########################################
        ############### Conv Layer 1 #################
        conv_1_1, w_1_1 = self.create_convolution_layer(input=input_layer, num_input_channels=64, filter_size=3, num_filters=64, use_pooling=False)
        conv_1_2, w_1_2 = self.create_convolution_layer(input=conv_1_1, num_input_channels=64, filter_size=3, num_filters=128, use_pooling=True)

        ############### Conv Layer 2 #################
        conv_2_1, w_2_1 = self.create_convolution_layer(input=conv_1_2, num_input_channels=128, filter_size=3, num_filters=128, use_pooling=False)
        conv_2_2, w_2_2 = self.create_convolution_layer(input=conv_2_1, num_input_channels=128, filter_size=3, num_filters=256, use_pooling=True)

        ############### Conv Layer 3 #################
        conv_3_1, w_3_1 = self.create_convolution_layer(input=conv_2_2, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
        conv_3_2, w_3_2 = self.create_convolution_layer(input=conv_3_1, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
        conv_3_3, w_3_3 = self.create_convolution_layer(input=conv_3_2, num_input_channels=256, filter_size=3, num_filters=512, use_pooling=True)

        ############### Conv Layer 4 #################
        conv_4_1, w_4_1 = self.create_convolution_layer(input=conv_3_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_4_2, w_4_2 = self.create_convolution_layer(input=conv_4_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_4_3, w_4_3 = self.create_convolution_layer(input=conv_4_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)

        ############### Conv Layer 5 #################
        conv_5_1, w_5_1 = self.create_convolution_layer(input=conv_4_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_5_2, w_5_2 = self.create_convolution_layer(input=conv_5_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_5_3, w_5_3 = self.create_convolution_layer(input=conv_5_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)

        layer_flat, num_features = self.flatten_layer(conv_5_3)

        ######################################## Fully Connected Layer ########################################
        fc_1 = self.create_fully_connected_layer(input=layer_flat, num_inputs=num_features, num_outputs=4096)
        fc_2 = self.create_fully_connected_layer(input=fc_1, num_inputs=4096, num_outputs=4096)
        fc_3 = self.create_fully_connected_layer(input=fc_2, num_inputs=4096, num_outputs=self.num_output_classes, use_dropout=False)


        # Normalize prediction
        y_prediction = tf.nn.softmax(fc_3)

        # The class-number is the index of the largest element
        y_prediction_class = tf.argmax(y_prediction, axis=1)

        # Cost-Fuction to be optimized
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_3, labels=y_true)
        # => Now we have a measure of how well the model performs on each image individually. But in order to use the
        # Cross entropy to guide the optimization of the model's variable swe need a single value, so we simply take the
        # Average of the cross-entropy for all the image classifications
        cost = tf.reduce_mean(cross_entropy)

        # Optimizer
        optimizer_adam = tf.train.AdamOptimizer(learning_rate=0.002).minimize(cost)

        # Performance measure
        correct_prediction = tf.equal(y_prediction_class, y_true_cls)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        total_iterations = 0
        num_iterations = 100000

        start_time = time.time()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for i in range(num_iterations):
                x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=0, high=40000, batch_size=128)
                feed_dict_train = {x: x_batch, y_true: y_true_batch}
                sess.run(optimizer_adam, feed_dict_train)

                if i % 100 == 99:
                    # Calculate the accuracy on the training-set.
                    x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=40000, high=50000, batch_size=1000)
                    feed_dict_validate = {x: x_batch, y_true: y_true_batch}
                    acc = sess.run(accuracy, feed_dict=feed_dict_validate)
                    # Message for printing.
                    msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
                    # print(sess.run(y_prediction, feed_dict=feed_dict_train))
                    # print(sess.run(y_prediction_class, feed_dict=feed_dict_train))

                    print(msg.format(i + 1, acc))

                if i % 10000 == 9999:
                    oSaver = tf.train.Saver()
                    oSess = sess
                    path = "./model/_" + "iteration_" + str(i) + ".ckpt"
                    oSaver.save(oSess, path)

                if i == num_iterations - 1:
                    x_batch, y_true_batch, _ = self.get_batch(X=self.testX, Y=self.testY, low=0, high=10000, batch_size=10000)
                    feed_dict_test = {x: x_batch, y_true: y_true_batch}
                    test_accuracy = sess.run(accuracy, feed_dict=feed_dict_test)
                    msg = "Test Accuracy: {0:>6.1%}"
                    print(msg.format(test_accuracy))

    def get_batch(self, X, Y, low=0, high=50000, batch_size=128):
        x_batch = []
        y_batch = np.ndarray(shape=(batch_size, self.num_output_classes))
        index = np.random.randint(low=low, high=high, size=batch_size)

        counter = 0
        for idx in index:
            x_batch.append(X[idx].flatten())
            y_batch[counter] = one_hot_encoded(Y[idx], self.num_output_classes)
            y_batch_cls = Y[idx]
            counter += 1

        return x_batch, y_batch, y_batch_cls

    def generate_new_weights(self, shape):
        w = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
        return w

    def generate_new_biases(self, shape):
        b = tf.Variable(tf.constant(0.05, shape=[shape]))
        return b

    def create_convolution_layer(self, input, num_input_channels, filter_size, num_filters, use_pooling):
        """

        :param input: The previous layer
        :param num_input_channels: Number of channels in previous layer
        :param filter_size: W and H of each filter
        :param num_filters: Number of filters
        :return:
        """
        shape = [filter_size, filter_size, num_input_channels, num_filters]
        weights = self.generate_new_weights(shape)
        biases = self.generate_new_biases(num_filters)

        layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
        layer += biases

        # Max Pooling
        if use_pooling:
            layer = tf.nn.max_pool(layer, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

        # ReLu. Using elu for better performance
        layer = tf.nn.elu(layer)

        return layer, weights

    def create_fully_connected_layer(self, input, num_inputs, num_outputs, use_dropout=True):
        weights = self.generate_new_weights(shape=[num_inputs, num_outputs])
        biases = self.generate_new_biases(shape=num_outputs)

        layer = tf.matmul(input, weights) + biases

        layer = tf.nn.elu(layer)

        if use_dropout:
            keep_prob = tf.placeholder(tf.float32)
            keep_prob = 0.5
            layer = tf.nn.dropout(layer, keep_prob)

        return layer

    def flatten_layer(self, layer):
        """
                Flattens dimension that is output by a convolution layer.
                Flattening is need to feed into a fully-connected-layer.
                :param layer:
                :return:
        """
        # shape [num_images, img_height, img_width, num_channels]
        layer_shape = layer.get_shape()

        # Number of features h x w x channels
        num_features = layer_shape[1: 4].num_elements()

        # Reshape
        layer_flat = tf.reshape(layer, [-1, num_features])

        # Shape is now [num_images, img_height * img_width * num_channels]
        return layer_flat, num_features


def unpickle(file):
    with open(file, 'rb') as file:
        dict = pickle.load(file, encoding='bytes')

    return dict

def convert_to_individual_image(flat):

    img_R = flat[0:1024].reshape((32, 32))
    img_G = flat[1024:2048].reshape((32, 32))
    img_B = flat[2048:3072].reshape((32, 32))

    #B G R
    mean = [125.3, 123.0, 113.9]

    img = np.dstack((img_R - mean[0], img_G - mean[1], img_B - mean[2]))
    img = np.array(img)
    # img = cv2.resize(img, (224, 224), img)

    return img


def read_coco_data(img_path, annotation_path):
    coco = COCO(annotation_path)
    ids = list(coco.imgs.keys())
    ann_keys = list(coco.anns.keys())

    print(coco.imgs[ids[0]])
    print(coco.anns[ann_keys[0]])


def one_hot_encoded(class_numbers, num_classes=None):
    if num_classes is None:
        num_classes = np.max(class_numbers) + 1

    return np.eye(num_classes, dtype=float)[class_numbers]


if __name__ == '__main__':
    data = []
    labels = []

    val_data = []
    val_label = []

    # cifar-10
    counter = 0
    for i in range(1, 6):
        unpacked = unpickle("./cifar10/data_batch_" + str(i))
        tmp_data = unpacked[b'data']
        tmp_label = unpacked[b'labels']

        inner_counter = 0
        for flat in tmp_data:
            converted = convert_to_individual_image(flat)
            data.append(converted)
            labels.append(tmp_label[inner_counter])
            counter += 1
            inner_counter += 1
            cv2.imwrite("./img/" + str(counter) + ".jpg", converted)

    # Test data
    unpacked = unpickle("./cifar10/test_batch")
    test_data = []
    test_data_flat = unpacked[b'data']
    test_label = unpacked[b'labels']

    for flat in test_data_flat:
        test_data.append(convert_to_individual_image(flat))

    svgg = SVGG(10)
    svgg.feed_examples(input_X=data, input_Y=labels)
    svgg.feed_test_data(test_X=test_data, test_Y=test_label)
    svgg.run()

VGG16 Tensorflow实现无法在cifar-10上学习

0 个答案: