Question

所以我正在训练CNN并计算每批的训练准确性。其中大部分都提供100％的批量训练准确性。我觉得还可以，因为我正在根据我训练的数据测试我的模型。但是在某些迭代中，我获得了90％或90％的批量训练准确性。最糟糕的是，有时它会快速下降到0％并且反弹回100％批量训练精度。我在https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/04_Save_Restore.ipynb中使用了算法，他们也计算了批量训练的准确性，但他们得不到相同的结果。他们开始时批量训练的准确率约为80％，并且逐渐增加到98％。这是为什么？

我怀疑我的网络过度拟合。

这是我的确切代码：

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import pyfftw
from scipy import signal
import xlrd
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
import time
from datetime import timedelta
import math
import os
from sklearn.metrics import confusion_matrix

##matplotlib inline
plt.style.use('ggplot')


## define funtions
def read_data(file_path):
##    column_names = ['user-id','activity','timestamp', 'x-axis', 'y-axis', 'z-axis']
    column_names = ['activity','timestamp', 'Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz', 'Mx', 'My', 'Mz'] ## 3 sensors
    data = pd.read_csv(file_path,header = None, names = column_names)
    return data

def feature_normalize(dataset):
    mu = np.mean(dataset,axis = 0)
    sigma = np.std(dataset,axis = 0)
    return (dataset - mu)/sigma

def plot_axis(ax, x, y, title):
    ax.plot(x, y)
    ax.set_title(title)
    ax.xaxis.set_visible(False)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(x), max(x)])
    ax.grid(True)

def plot_activity(activity,data):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows = 3, figsize = (15, 10), sharex = True)
    plot_axis(ax0, data['timestamp'], data['Ax'], 'x-axis')
    plot_axis(ax1, data['timestamp'], data['Ay'], 'y-axis')
    plot_axis(ax2, data['timestamp'], data['Az'], 'z-axis')
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.90)
    plt.show()

def windows(data, size):
    start = 0
    while start < data.count():
        yield start, start + size
        start += (size / 2)

def segment_signal(data, window_size = None, num_channels=None): # edited
    segments = np.empty((0,window_size,num_channels)) #change from 3 to 9 channels for AGM fusion #use variable num_channels=9
    labels = np.empty((0))
    for (n_start, n_end) in windows(data['timestamp'], window_size):
##        x = data["x-axis"][start:end]
##        y = data["y-axis"][start:end]
##        z = data["z-axis"][start:end]
        n_start = int(n_start)
        n_end = int(n_end)
        Ax = data["Ax"][n_start:n_end]
        Ay = data["Ay"][n_start:n_end]
        Az = data["Az"][n_start:n_end]
        Gx = data["Gx"][n_start:n_end]
        Gy = data["Gy"][n_start:n_end]
        Gz = data["Gz"][n_start:n_end]
        Mx = data["Mx"][n_start:n_end]
        My = data["My"][n_start:n_end]
        Mz = data["Mz"][n_start:n_end]
        if(len(dataset['timestamp'][n_start:n_end]) == window_size): # include only windows with size of 90
            segments = np.vstack([segments,np.dstack([Ax,Ay,Az,Gx,Gy,Gz,Mx,My,Mz])])
            labels = np.append(labels,stats.mode(data["activity"][n_start:n_end])[0][0])
    return segments, labels

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.0, shape = shape)
    return tf.Variable(initial)

def depthwise_conv2d(x, W):
    return tf.nn.depthwise_conv2d(x,W, [1, 1, 1, 1], padding='VALID')

def apply_depthwise_conv(x,weights,biases):
    return tf.nn.relu(tf.add(depthwise_conv2d(x, weights),biases))

def apply_max_pool(x,kernel_size,stride_size):
    return tf.nn.max_pool(x, ksize=[1, 1, kernel_size, 1], 
                          strides=[1, 1, stride_size, 1], padding='VALID') 

#------------------------get dataset----------------------#

## run shoaib_dataset.py to generate dataset_shoaib_total.txt

## get data from dataset_shoaib_total.txt
dataset = read_data('dataset_shoaib_total.txt')


#--------------------preprocessing------------------------#

dataset['Ax'] = feature_normalize(dataset['Ax'])
dataset['Ay'] = feature_normalize(dataset['Ay'])
dataset['Az'] = feature_normalize(dataset['Az'])
dataset['Gx'] = feature_normalize(dataset['Gx'])
dataset['Gy'] = feature_normalize(dataset['Gy'])
dataset['Gz'] = feature_normalize(dataset['Gz'])
dataset['Mx'] = feature_normalize(dataset['Mx'])
dataset['My'] = feature_normalize(dataset['My'])
dataset['Mz'] = feature_normalize(dataset['Mz'])


###--------------------plot activity data----------------#

##for activity in np.unique(dataset["activity"]):
##    subset = dataset[dataset["activity"] == activity][:180]
##    plot_activity(activity,subset)


#------------------fixed hyperparameters--------------------#

window_size = 200 #from 90 #FIXED at 4 seconds


#----------------input hyperparameters------------------#

input_height = 1
input_width = window_size
num_labels = 6
num_channels = 9 #from 3 channels #9 channels for AGM


#-------------------sliding time window----------------#

segments, labels = segment_signal(dataset, window_size=window_size, num_channels=num_channels)
labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)
reshaped_segments = segments.reshape(len(segments), (window_size*num_channels)) #use variable num_channels instead of constant 3 channels


#------------divide data into test and training set-----------#

train_test_split = np.random.rand(len(reshaped_segments)) < 0.80
train_x_init = reshaped_segments[train_test_split]
train_y_init = labels[train_test_split]
test_x = reshaped_segments[~train_test_split]
test_y = labels[~train_test_split]

train_validation_split = np.random.rand(len(train_x_init)) < 0.80
train_x = train_x_init[train_validation_split]
train_y = train_y_init[train_validation_split]
validation_x = train_x_init[~train_validation_split]
validation_y = train_y_init[~train_validation_split]


#---------------training hyperparameters----------------#

batch_size = 10
kernel_size = 60 #from 60 #optimal 2
depth = 15 #from 60 #optimal 15
num_hidden = 1000 #from 1000 #optimal 80

learning_rate = 0.0001
training_epochs = 8

total_batches = train_x.shape[0] ##// batch_size


#---------define placeholders for input----------#

X = tf.placeholder(tf.float32, shape=[None,input_width * num_channels], name="input")
X_reshaped = tf.reshape(X,[-1,input_height,input_width,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])


#---------------------perform convolution-----------------#

# first convolutional layer 
c_weights = weight_variable([1, kernel_size, num_channels, depth])
c_biases = bias_variable([depth * num_channels])

c = apply_depthwise_conv(X_reshaped,c_weights,c_biases)
p = apply_max_pool(c,20,2)

# second convolutional layer
c2_weights = weight_variable([1, 6,depth*num_channels,depth//10])
c2_biases = bias_variable([(depth*num_channels)*(depth//10)])

c = apply_depthwise_conv(p,c2_weights,c2_biases)


#--------------flatten data for fully connected layers----------#

shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])


#------------fully connected layers----------------#

f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))


#----------------------dropout------------------#

keep_prob = tf.placeholder(tf.float32)
drop_layer = tf.nn.dropout(f, keep_prob)


#----------------------softmax layer----------------#

out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.add(tf.matmul(drop_layer, out_weights),out_biases), name="y_")


#-----------------loss optimization-------------#

loss = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)


#-----------------compute accuracy---------------#

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

cost_history = np.empty(shape=[1],dtype=float)
saver = tf.train.Saver()

session = tf.Session()
session.run(tf.global_variables_initializer())

#-------------early stopping-----------------#

# Best validation accuracy seen so far.
best_validation_accuracy = 0.0

# Iteration-number for last improvement to validation accuracy.
last_improvement = 0

# Stop optimization if no improvement found in this many iterations.
require_improvement = 1000

# Counter for total number of iterations performed so far.
total_iterations = 0

def validation_accuracy():
    return session.run(accuracy, feed_dict={X: validation_x, Y: validation_y, keep_prob: 1.0})

def next_batch(b, batch_size, train_x, train_y):
    ##for b in range(total_batches):    
    offset = (b * batch_size) % (train_y.shape[0] - batch_size)
    batch_x = train_x[offset:(offset + batch_size), :]
    batch_y = train_y[offset:(offset + batch_size), :]
    return batch_x, batch_y

def optimize(num_iterations):
    # Ensure we update the global variables rather than local copies.
    global total_iterations
    global best_validation_accuracy
    global last_improvement

    # Start-time used for printing time-usage below.
    start_time = time.time()

    for i in range(num_iterations):

        # Increase the total number of iterations performed.
        # It is easier to update it in each iteration because
        # we need this number several times in the following.
        total_iterations += 1

        # Get a batch of training examples.
        # x_batch now holds a batch of images and
        # y_true_batch are the true labels for those images.
        ##x_batch, y_true_batch = data.train.next_batch(train_batch_size)
        x_batch, y_true_batch = next_batch(i, batch_size, train_x, train_y)

        # Put the batch into a dict with the proper names
        # for placeholder variables in the TensorFlow graph.
        feed_dict_train = {X: x_batch,
                           Y: y_true_batch, keep_prob: 0.5}

        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train
        # to the placeholder variables and then runs the optimizer.
        session.run(optimizer, feed_dict=feed_dict_train)

        # Print status every 100 iterations and after last iteration.
        if (total_iterations % 100 == 0) or (i == (num_iterations - 1)):

            # Calculate the accuracy on the training-batch.
            acc_train = session.run(accuracy, feed_dict={X: x_batch,
                           Y: y_true_batch, keep_prob: 1.0})

            # Calculate the accuracy on the validation-set.
            # The function returns 2 values but we only need the first.
            ##acc_validation, _ = validation_accuracy()
            acc_validation = validation_accuracy()

            # If validation accuracy is an improvement over best-known.
            if acc_validation > best_validation_accuracy:
                # Update the best-known validation accuracy.
                best_validation_accuracy = acc_validation

                # Set the iteration for the last improvement to current.
                last_improvement = total_iterations

                # Save all variables of the TensorFlow graph to file.
                saver.save(sess=session, save_path="../shoaib-har_agm_es.ckpt")

                # A string to be printed below, shows improvement found.
                improved_str = '*'
            else:
                # An empty string to be printed below.
                # Shows that no improvement was found.
                improved_str = ''

            # Status-message for printing.
            msg = "Iter: {0:>6}, Train-Batch Accuracy: {1:>6.1%}, Validation Acc: {2:>6.1%} {3}"

            # Print it.
            print(msg.format(i + 1, acc_train, acc_validation, improved_str))

        # If no improvement found in the required number of iterations.
        if total_iterations - last_improvement > require_improvement:
            print("No improvement found in a while, stopping optimization.")

            # Break out from the for-loop.
            break

    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time

    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))


optimize(10000)

输出：

训练准确度究竟是什么？它甚至算了吗？或者您是否计算了整个训练数据的训练准确度，而不仅仅是您训练网络的批次？

在这里，我打印了结果，以便在20次迭代的每个倍数中打印整个数据集的批处理训练精度和训练精度。

批量训练准确度始终是10％的倍数

0 个答案: