此VGGNet是使用Tensorflow框架从头开始实现的,其中所有层均在代码中定义。 我在这里面临的主要问题是,即使我等待了相当长的时间,训练准确性(更不用说验证准确性)还是会提高。我怀疑现在很少有问题引起此问题。首先,我认为该网络对于cifar-10数据集而言太深和太宽。其次,从整个数据集中提取数据批处理并不是穷举,也就是说,在整个数据集中一遍又一遍地使用批处理选择,而不会消除在进行中的时代中选择的那些示例。
但是,经过数小时和数天的实验,我仍然无法使此代码正常工作。
我希望我可以提取有问题的代码部分来提出问题,但是由于我无法在此处找到确切的部分,因此让我上载整个代码。
import os
import sys
import tensorflow as tf
import numpy as np
import scipy as sci
import math
import matplotlib.pyplot as plt
import time
import random
import imageio
import pickle
import cv2
import json
from pycocotools.coco import COCO
class SVGG:
def __init__(self, num_output_classes):
self.input_layer_size = 0
self.num_output_classes = num_output_classes
# Data
self.X = []
self.Y = []
self.working_x = []
self.working_y = []
self.testX = []
self.testY = []
# hard coded for now. Have to change.
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = 3072 # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = self.input_data_size * self.input_data_size # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.convolution_layers = []
self.convolution_weights = []
self.fully_connected_layers = []
self.fully_connected_weights = []
def feed_examples(self, input_X, input_Y):
"""
Feed examples to be learned
:param input_X: Training dataset X
:param input_Y: Traning dataset label
:return:
"""
# Take first input and calculate its size
# hard code size
self.X = input_X
self.Y = input_Y
self.input_data_size_flat = len(self.X[0]) * len(self.X[0][0]) * len(self.X[0][0][0])
def feed_test_data(self, test_X, test_Y):
self.testX = test_X
self.testY = test_Y
def run(self):
x = tf.placeholder(tf.float32, [None, self.input_data_size_flat], name='x')
x_data = tf.reshape(x, [-1, self.input_data_size, self.input_data_size, 3])
y_true = tf.placeholder(tf.float32, [None, self.num_output_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
"""
VGG layers
"""
# Create layers
######################################## Input Layer ########################################
input_layer, input_weight = self.create_convolution_layer(x_data, num_input_channels=3, filter_size=3, num_filters=64,
use_pooling=True) # False
######################################## Convolutional Layer ########################################
############### Conv Layer 1 #################
conv_1_1, w_1_1 = self.create_convolution_layer(input=input_layer, num_input_channels=64, filter_size=3, num_filters=64, use_pooling=False)
conv_1_2, w_1_2 = self.create_convolution_layer(input=conv_1_1, num_input_channels=64, filter_size=3, num_filters=128, use_pooling=True)
############### Conv Layer 2 #################
conv_2_1, w_2_1 = self.create_convolution_layer(input=conv_1_2, num_input_channels=128, filter_size=3, num_filters=128, use_pooling=False)
conv_2_2, w_2_2 = self.create_convolution_layer(input=conv_2_1, num_input_channels=128, filter_size=3, num_filters=256, use_pooling=True)
############### Conv Layer 3 #################
conv_3_1, w_3_1 = self.create_convolution_layer(input=conv_2_2, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_2, w_3_2 = self.create_convolution_layer(input=conv_3_1, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_3, w_3_3 = self.create_convolution_layer(input=conv_3_2, num_input_channels=256, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 4 #################
conv_4_1, w_4_1 = self.create_convolution_layer(input=conv_3_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_2, w_4_2 = self.create_convolution_layer(input=conv_4_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_3, w_4_3 = self.create_convolution_layer(input=conv_4_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 5 #################
conv_5_1, w_5_1 = self.create_convolution_layer(input=conv_4_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_2, w_5_2 = self.create_convolution_layer(input=conv_5_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_3, w_5_3 = self.create_convolution_layer(input=conv_5_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
layer_flat, num_features = self.flatten_layer(conv_5_3)
######################################## Fully Connected Layer ########################################
fc_1 = self.create_fully_connected_layer(input=layer_flat, num_inputs=num_features, num_outputs=4096)
fc_2 = self.create_fully_connected_layer(input=fc_1, num_inputs=4096, num_outputs=4096)
fc_3 = self.create_fully_connected_layer(input=fc_2, num_inputs=4096, num_outputs=self.num_output_classes, use_dropout=False)
# Normalize prediction
y_prediction = tf.nn.softmax(fc_3)
# The class-number is the index of the largest element
y_prediction_class = tf.argmax(y_prediction, axis=1)
# Cost-Fuction to be optimized
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_3, labels=y_true)
# => Now we have a measure of how well the model performs on each image individually. But in order to use the
# Cross entropy to guide the optimization of the model's variable swe need a single value, so we simply take the
# Average of the cross-entropy for all the image classifications
cost = tf.reduce_mean(cross_entropy)
# Optimizer
optimizer_adam = tf.train.AdamOptimizer(learning_rate=0.002).minimize(cost)
# Performance measure
correct_prediction = tf.equal(y_prediction_class, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
total_iterations = 0
num_iterations = 100000
start_time = time.time()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_iterations):
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=0, high=40000, batch_size=128)
feed_dict_train = {x: x_batch, y_true: y_true_batch}
sess.run(optimizer_adam, feed_dict_train)
if i % 100 == 99:
# Calculate the accuracy on the training-set.
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=40000, high=50000, batch_size=1000)
feed_dict_validate = {x: x_batch, y_true: y_true_batch}
acc = sess.run(accuracy, feed_dict=feed_dict_validate)
# Message for printing.
msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
# print(sess.run(y_prediction, feed_dict=feed_dict_train))
# print(sess.run(y_prediction_class, feed_dict=feed_dict_train))
print(msg.format(i + 1, acc))
if i % 10000 == 9999:
oSaver = tf.train.Saver()
oSess = sess
path = "./model/_" + "iteration_" + str(i) + ".ckpt"
oSaver.save(oSess, path)
if i == num_iterations - 1:
x_batch, y_true_batch, _ = self.get_batch(X=self.testX, Y=self.testY, low=0, high=10000, batch_size=10000)
feed_dict_test = {x: x_batch, y_true: y_true_batch}
test_accuracy = sess.run(accuracy, feed_dict=feed_dict_test)
msg = "Test Accuracy: {0:>6.1%}"
print(msg.format(test_accuracy))
def get_batch(self, X, Y, low=0, high=50000, batch_size=128):
x_batch = []
y_batch = np.ndarray(shape=(batch_size, self.num_output_classes))
index = np.random.randint(low=low, high=high, size=batch_size)
counter = 0
for idx in index:
x_batch.append(X[idx].flatten())
y_batch[counter] = one_hot_encoded(Y[idx], self.num_output_classes)
y_batch_cls = Y[idx]
counter += 1
return x_batch, y_batch, y_batch_cls
def generate_new_weights(self, shape):
w = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
return w
def generate_new_biases(self, shape):
b = tf.Variable(tf.constant(0.05, shape=[shape]))
return b
def create_convolution_layer(self, input, num_input_channels, filter_size, num_filters, use_pooling):
"""
:param input: The previous layer
:param num_input_channels: Number of channels in previous layer
:param filter_size: W and H of each filter
:param num_filters: Number of filters
:return:
"""
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = self.generate_new_weights(shape)
biases = self.generate_new_biases(num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
# Max Pooling
if use_pooling:
layer = tf.nn.max_pool(layer, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
# ReLu. Using elu for better performance
layer = tf.nn.elu(layer)
return layer, weights
def create_fully_connected_layer(self, input, num_inputs, num_outputs, use_dropout=True):
weights = self.generate_new_weights(shape=[num_inputs, num_outputs])
biases = self.generate_new_biases(shape=num_outputs)
layer = tf.matmul(input, weights) + biases
layer = tf.nn.elu(layer)
if use_dropout:
keep_prob = tf.placeholder(tf.float32)
keep_prob = 0.5
layer = tf.nn.dropout(layer, keep_prob)
return layer
def flatten_layer(self, layer):
"""
Flattens dimension that is output by a convolution layer.
Flattening is need to feed into a fully-connected-layer.
:param layer:
:return:
"""
# shape [num_images, img_height, img_width, num_channels]
layer_shape = layer.get_shape()
# Number of features h x w x channels
num_features = layer_shape[1: 4].num_elements()
# Reshape
layer_flat = tf.reshape(layer, [-1, num_features])
# Shape is now [num_images, img_height * img_width * num_channels]
return layer_flat, num_features
def unpickle(file):
with open(file, 'rb') as file:
dict = pickle.load(file, encoding='bytes')
return dict
def convert_to_individual_image(flat):
img_R = flat[0:1024].reshape((32, 32))
img_G = flat[1024:2048].reshape((32, 32))
img_B = flat[2048:3072].reshape((32, 32))
#B G R
mean = [125.3, 123.0, 113.9]
img = np.dstack((img_R - mean[0], img_G - mean[1], img_B - mean[2]))
img = np.array(img)
# img = cv2.resize(img, (224, 224), img)
return img
def read_coco_data(img_path, annotation_path):
coco = COCO(annotation_path)
ids = list(coco.imgs.keys())
ann_keys = list(coco.anns.keys())
print(coco.imgs[ids[0]])
print(coco.anns[ann_keys[0]])
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
if __name__ == '__main__':
data = []
labels = []
val_data = []
val_label = []
# cifar-10
counter = 0
for i in range(1, 6):
unpacked = unpickle("./cifar10/data_batch_" + str(i))
tmp_data = unpacked[b'data']
tmp_label = unpacked[b'labels']
inner_counter = 0
for flat in tmp_data:
converted = convert_to_individual_image(flat)
data.append(converted)
labels.append(tmp_label[inner_counter])
counter += 1
inner_counter += 1
cv2.imwrite("./img/" + str(counter) + ".jpg", converted)
# Test data
unpacked = unpickle("./cifar10/test_batch")
test_data = []
test_data_flat = unpacked[b'data']
test_label = unpacked[b'labels']
for flat in test_data_flat:
test_data.append(convert_to_individual_image(flat))
svgg = SVGG(10)
svgg.feed_examples(input_X=data, input_Y=labels)
svgg.feed_test_data(test_X=test_data, test_Y=test_label)
svgg.run()