我正在通过练习来扩充训练数据,然后通过人工神经网络对其进行测试。这个想法是在将更多数据添加到训练集中时测试准确性。
我正在使用mnist数据集。
这是我旋转图像的方式:
def rotate_image(inputs, degree):
## create rotated variations
# rotated anticlockwise by x degrees
inputs_plusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), degree, cval=0.01, order=1, reshape=False)
new_inputs1 = inputs_plusx_img.reshape(784)
# rotated clockwise by x degrees
inputs_minusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), -degree, cval=0.01, order=1, reshape=False)
new_inputs2 = inputs_minusx_img.reshape(784)
return (new_inputs1, new_inputs2)
degree = 10
df = pd.read_csv(train_file)
#print(df.head())
idx = 100
instance = df.iloc[idx:(idx+1), 1:].values
#print(instance.reshape(28,28))
new_image1, new_image2 = rotate_image(instance, degree)
# show rotated image
image_array = np.asfarray(new_image1).flatten().reshape((28,28))
print(new_image1)
# print the grid in grey scale
plt.imshow(image_array, cmap='Greys', interpolation='None')
现在我不确定的是如何将新图像添加到训练数据集中,然后将其添加到ANN类中。
这是我的神经网络:
class neuralNetwork:
"""Artificial Neural Network classifier.
Parameters
------------
lr : float
Learning rate (between 0.0 and 1.0)
ep : int
Number of epochs
bs : int
Size of the training batch to be used when calculating the gradient descent.
batch_size = 1 standard gradient descent
batch_size > 1 stochastic gradient descent
inodes : int
Number of input nodes which is normally the number of features in an instance.
hnodes : int
Number of hidden nodes in the net.
onodes : int
Number of output nodes in the net.
Attributes
-----------
wih : 2d-array
Input2Hidden node weights after fitting
who : 2d-array
Hidden2Output node weights after fitting
E : list
Sum-of-squares error value in each epoch.
Results : list
Target and predicted class labels for the test data.
Functions
---------
activation_function : float (between 1 and -1)
implments the sigmoid function which squashes the node input
"""
def __init__(self, inputnodes=784, hiddennodes=200, outputnodes=10, learningrate=0.1, batch_size=1, epochs=10):
self.inodes = inputnodes
self.hnodes = hiddennodes
self.onodes = outputnodes
#two weight matrices, wih (input to hidden layer) and who (hidden layer to output)
#a weight on link from node i to node j is w_ij
#Draw random samples from a normal (Gaussian) distribution centered around 0.
#numpy.random.normal(loc to centre gaussian=0.0, scale=1, size=dimensions of the array we want)
#scale is usually set to the standard deviation which is related to the number of incoming links i.e.
#1/sqrt(num of incoming inputs). we use pow to raise it to the power of -0.5.
#We have set 0 as the centre of the guassian dist.
# size is set to the dimensions of the number of hnodes, inodes and onodes for each weight matrix
self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
self.who = np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
#set the learning rate
self.lr = learningrate
#set the batch size
self.bs = batch_size
#set the number of epochs
self.ep = epochs
#store errors at each epoch
self.E= []
#store results from testing the model
#keep track of the network performance on each test instance
self.results= []
#define the activation function here
#specify the sigmoid squashing function. Here expit() provides the sigmoid function.
#lambda is a short cut function which is executed there and then with no def (i.e. like an anonymous function)
self.activation_function = lambda x: scipy.special.expit(x)
pass
# function to help management of batching for gradient descent
# size of the batch is controled by self,bs
def batch_input(self, X, y): # (self, train_inputs, targets):
"""Yield consecutive batches of the specified size from the input list."""
for i in range(0, len(X), self.bs):
# yield a tuple of the current batched data and labels
yield (X[i:i + self.bs], y[i:i + self.bs])
#train the neural net
#note the first part is very similar to the query function because they both require the forward pass
def train(self, train_inputs, targets_list):
#def train(self, train_inputs):
"""Training the neural net.
This includes the forward pass ; error computation;
backprop of the error ; calculation of gradients and updating the weights.
Parameters
----------
train_inputs : {array-like}, shape = [n_instances, n_features]
Training vectors, where n_instances is the number of training instances and
n_features is the number of features.
Note this contains all features including the class feature which is in first position
Returns
-------
self : object
"""
for e in range(self.ep):
print("Training epoch#: ", e)
sum_error = 0.0
for (batchX, batchY) in self.batch_input(train_inputs, targets_list):
#creating variables to store the gradients
delta_who = 0
delta_wih = 0
# iterate through the inputs sent in
for inputs, targets in zip(batchX, batchY):
#convert inputs list to 2d array
inputs = np.array(inputs, ndmin=2).T
targets = np.array(targets, ndmin=2).T
#calculate signals into hidden layer
hidden_inputs = np.dot(self.wih, inputs)
#calculate the signals emerging from the hidden layer
hidden_outputs = self.activation_function(hidden_inputs)
#calculate signals into final output layer
final_inputs=np.dot(self.who, hidden_outputs)
#calculate the signals emerging from final output layer
final_outputs = self.activation_function(final_inputs)
#to calculate the error we need to compute the element wise diff between target and actual
output_errors = targets - final_outputs
#Next distribute the error to the hidden layer such that hidden layer error
#is the output_errors, split by weights, recombined at hidden nodes
hidden_errors = np.dot(self.who.T, output_errors)
## for each instance accumilate the gradients from each instance
## delta_who are the gradients between hidden and output weights
## delta_wih are the gradients between input and hidden weights
delta_who += np.dot((output_errors * final_outputs * (1.0 - final_outputs)), np.transpose(hidden_outputs))
delta_wih += np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))
sum_error += np.dot(output_errors.T, output_errors)#this is the sum of squared error accumilated over each batced instance
pass #instance
# update the weights by multiplying the gradient with the learning rate
# note that the deltas are divided by batch size to obtain the average gradient according to the given batch
# obviously if batch size = 1 then we simply end up dividing by 1 since each instance forms a singleton batch
self.who += self.lr * (delta_who / self.bs)
self.wih += self.lr * (delta_wih / self.bs)
pass # batch
self.E.append(np.asfarray(sum_error).flatten())
print("errors (SSE): ", self.E[-1])
pass # epoch
#query the neural net
def query(self, inputs_list):
#convert inputs_list to a 2d array
inputs = np.array(inputs_list, ndmin=2).T
#propogate input into hidden layer. This is the start of the forward pass
hidden_inputs = np.dot(self.wih, inputs)
#squash the content in the hidden node using the sigmoid function (value between 1, -1)
hidden_outputs = self.activation_function(hidden_inputs)
#propagate into output layer and the apply the squashing sigmoid function
final_inputs = np.dot(self.who, hidden_outputs)
final_outputs = self.activation_function(final_inputs)
return final_outputs
#iterate through all the test data to calculate model accuracy
def test(self, test_inputs, test_targets):
self.results = []
#go through each test instances
for inputs, target in zip(test_inputs, test_targets):
#query the network with test inputs
#note this returns 10 output values ; of which the index of the highest value
# is the networks predicted class label
outputs = self.query(inputs)
#get the target which has 0.99 as highest value corresponding to the actual class
target_label = np.argmax(target)
#get the index of the highest output node as this corresponds to the predicted class
predict_label = np.argmax(outputs) #this is the class predicted by the ANN
self.results.append([predict_label, target_label])
pass
pass
self.results = np.asfarray(self.results) # flatten results to avoid nested arrays
用于按过程数据然后训练网络的功能:
def preprocess_data(Xy):
X=[]
y=[]
for instance in Xy:
# split the record by the ',' commas
all_values = instance.split(',')
# scale and shift the inputs
inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
# create the target output values (all 0.01, except the desired label which is 0.99)
targets = np.zeros(output_nodes) + 0.01
# all_values[0] is the target label for this record
targets[int(all_values[0])] = 0.99
X.insert(len(X), inputs)
y.insert(len(y), targets)
pass
return(X,y)
pass
mini_training_data = np.random.choice(train_data_list, 60000, replace = False)
print("Percentage of training data used:", (len(mini_training_data)/len(train_data_list)) * 100)
X_train, y_train = preprocess_data(mini_training_data)
X_test, y_test = preprocess_data(test_data_list)
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size, epochs)
n.train(X_train, y_train)
n.test(X_test, y_test)
#print network performance as an accuracy metric
correct = 0 # number of predictions that were correct
#iteratre through each tested instance and accumilate number of correct predictions
for result in n.results:
if (result[0] == result[1]):
correct += 1
pass
pass
# print the accuracy on test set
print ("Test set accuracy% = ", (100 * correct / len(n.results)))