我对机器/深度学习比较陌生。我在使用Scikit-Learn,Tensor flow和Keras等API来开发监督学习模型方面拥有丰富的经验。因此,我想自己实施一个以获得更好的体验。
我尝试自行解决分类问题的基本深度神经网络算法。我已使用虹膜数据集进行此测试,但是我的实现给我的结果非常糟糕,它严重地拟合了数据,每次运行时我获得的最佳精度是66%,甚至最小是0%从算法的角度来看,即使设置了较低的随机性种子,我也会得到很大的差异。
我选择了tanh激活函数,学习率为0.01,输出层的softmax激活以及输入变量的Standard Scalar归一化。
所以,我想知道我做的数学部分是错误的还是缺少该算法的任何基本部分,如果有人可以运行此代码并指导我进行可能的更改,我将非常感谢。提前非常感谢您。
代码如下:
data = load_iris()
X = data.data
y = data.target
class Neural_Network:
def __init__(self, n_hlayers, n_nodes, lr):
#No. of hidden layers
self.n_layers = n_hlayers
#No. of nodes in each of the hidden layer
self.n_nodes = n_nodes
#Learning rate of the algorithm
self.lr = lr
# Dictionary to hold the node values of all the layers
self.layers = { }
# Dictionary to hold the weight values of all the layers
self.weights = { }
def _softmax(self,values):
'''Function to perform softmax activation on the node values
returns probabilities of each feature'''
exp_scores = np.exp(values)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return probs
def _derivate_tanh(self,values):
'''Function that performs derivative of a tanh activation function'''
#Derivative of tanh is 1 - tanh^2 x
return (1 - np.power(values, 2))
def fit(self,X,y):
'''This function constructs a Neural Network with given hyper parameters and then runs it for
given no. of epochs. No. of nodes in all the hidden layers are the same for simplicity's sake.
returns: None / NA'''
print('Fitting the data ')
try:
X = np.array(X)
y = np.array(y)
except:
print('Could not make sense of the inputs')
# No. of examples and the dimensions of each sample
self.num_examples, self.features = X.shape
#Setting default layers
#Input layer
self.layers['input'] = np.zeros(shape=[1,self.features])
#Hidden layers
for i in range(1, (self.n_layers+ 1 )):
self.layers['layer-1' + str(i)] = np.zeros(shape=[1,self.n_nodes])
#Output layer
self.layers['output'] = np.zeros(shape=[1, len(np.unique(y)) ])
#Setting random weights
for i in range(1, (self.n_layers+2)):
#Weights for first layer
if i == 1:
self.weights['weight-1' + str(i)] = np.random.uniform(low=0.1, high = 0.2, size=[self.features, self.n_nodes])
#Weights for hidden layer
elif i < (self.n_layers+1):
self.weights['weight-1' + str(i)] = np.random.uniform(low =
0.1, high = 0.2, size=[self.n_nodes, self.n_nodes])
#Weights for output layer
else:
self.weights['weight-1' + str(i)] = np.random.uniform(low = 0.1, high = 0.2, size = [self.n_nodes, len(np.unique(y))])
#no. of epochs taken from the user
epochs = int( input('Please choose no.of epochs: '))
#Standard Scaler to normalize the input data
S_s = StandardScaler()
self.X = S_s.fit_transform(X)
self.y = y.reshape(self.num_examples, 1)
for ep in range(epochs):
#Forward propogate on
self._Forward_Propogate()
if ep % 100 == 0:
#Calculating the accuracy of the predictions
self. acc = np.sum (self.y.flatten() == np.argmax( self.layers['output'], axis = 1) ) / self.num_examples
print('Accuracy in epoch ', ep, ' is :', self.acc)
#Backward propogating
self._Backward_Propogation()
def _Forward_Propogate(self):
'''This functions performs forward propogation on the input data through the hidden layers and on the output layer
activations: tanh for all layers except the output layer
returns: None/NA.'''
#Feeding the input layer the normalized inputs
self.layers['input'] = self.X
#Forward propogating
for i in range(1, len(self.layers.keys())):
#Input Layer dot-product with first set of weights
if i == 1:
dp = self.layers['input'].dot(self.weights['weight-1' + str(i)])
#Storing the result in first hidden layer after performing tanh activation on values
self.layers['layer-1' + str(i)] = np.tanh(dp)
#Hidden Layers dot-product with weights for the hidden layer
elif i != (len(self.layers.keys())-1):
dp = self.layers['layer-1' + str(i-1)]. dot(self.weights['weight-1' + str(i)])
#Storing the result in next hidden layer after performing tanh activation on values
self.layers['layer-1'+ str(i)] = np.tanh(dp)
# dot-product of last hidden layer with last set of weights
else:
dp = self.layers['layer-1' + str(i-1)].dot(self.weights['weight-1' + str(i)])
#Storing the result in the output layerafter performing softmax activation on the values
self.layers['output'] = self._softmax(dp)
def _Backward_Propogation(self):
'''This function performs back propogation using normal/ naive gradient descent algorithm on the weights of the output layer
through the hidden layer until the input layer weights
returns:None/NA'''
#Dictionary to hold Delta / Error values of each layer
self.delta = {}
#Dictionary to hold Gradient / Slope values of each layer
self.gradients = {}
#Calculating the error
error = self.y - self.layers['output']
#Adjusting weights of the network starting from weights of the output layer
for i in reversed( range( 1, len(self.weights.keys()) +1 ) ):
#Adjusting weights for the last layer
if i == len(self.weights.keys()):
#Delta for the output layer weights
self.delta['delta_out'] = error * self.lr
#Gradient or slope for the last layer's weights
self.gradients['grad_out'] = self.layers['layer-1' + str(i-1)].T.dot(
self.delta['delta_out'])
#Adjusting the original weights for the output layer
self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (
self.lr * self.gradients['grad_out'])
#Adjusting weights for last but one layer
elif i == len(self.weights.keys()) - 1:
# Delta / error values of the first hidden layer weights seen from the output layer
self.delta['delta_1' + str(i)] = self.delta['delta_out'].dot(
self.weights['weight-1' + str(i+1)].T ) * self._derivate_tanh(self.layers['layer-1' + str(i)])
# Gradient / Slope for the weights of the first hidden layer seen from the output layer
self.gradients['grad_1' + str(i) ] = self.layers['layer-1' + str(i-1)].T.dot(
self.delta['delta_1' + str(i)])
#Adjusting weights of the last but one layer
self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (
self.lr * self.gradients['grad_1' + str(i)])
#Adjusting weights for all other hidden layers
elif i > 1:
#Delta / Error values for the weights in the hidden layers
self.delta['delta_1' + str(i)] = self.delta['delta_1' + str(i+1)].dot(
self.weights['weight-1' + str(i+1)]) * self._derivate_tanh(self.layers['layer-1' + str(i)])
#Gradient / Slope values for the weights of hidden layers
self.gradients['grad_1' + str(i)] = self.layers['layer-1' + str(i-1)].T.dot(
self.delta['delta_1' + str(i)])
#Adjusting weights of the hidden layer
self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (
self.lr * self.gradients['grad_1' + str(i)])
#Adjusting weights which are matrix-multipled with the input layer
else:
# Delta / Error values for the weights that come after the input layer
self.delta['delta_inp'] = self.delta['delta_1' + str(i+1)].dot(
self.weights['weight-1' + str(i+1)]) * self._derivate_tanh( self.layers['layer-1' + str(i)])
#Gradient / Slope values for the weights that come after the input layer
self.gradients['grad_1' + str(i)] = self.layers['input'].T.dot(self.delta['delta_inp'])
#Adjusting weights
self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (
self.lr * self.gradients['grad_1' + str(i)])
以下是示例结果:
ob = Neural_Network(5, 50, 0.01)
ob.fit(X,y)
Please choose no.of epochs: 800
Accuracy in epoch 0 is : 0.17333333333333334
Accuracy in epoch 100 is : 0.18
Accuracy in epoch 200 is : 0.18
Accuracy in epoch 300 is : 0.18
Accuracy in epoch 400 is : 0.18
Accuracy in epoch 500 is : 0.18
Accuracy in epoch 600 is : 0.18
Accuracy in epoch 700 is : 0.18
我的输出层的节点值(来自softmax激活的概率值)在大多数情况下实际上是极端的,它们上升到e ^ -37;我知道不是这种情况,当我检查输出层的权重时,它们并不是那么极端,换句话说,它们的变化程度没有错过局部极小值。因此,我无法找出问题所在。再一次,如果有人可以运行该程序并检查问题,我将非常感谢。
谢谢
答案 0 :(得分:2)
我认为这不是关于拟合不足的问题,您应该更仔细地检查代码。 这是一些建议
1。输出层的增量错误
error = self.y - self.layers['output']
应该为yHat - y
,我认为您不需要在这里将其乘以学习率
self.delta['delta_out'] = error * self.lr
2。注意形状。 IMO,您忘了在这里(也许还有其他地方)转移重量
self.delta['delta_1' + str(i)] = self.delta['delta_1' + str(i+1)].dot(
self.weights['weight-1' + str(i+1)]) * self._derivate_tanh(self.layers['layer-1' + str(i)])
建议:尝试为每个图层使用不同的n_nodes
,在这种情况下,您会立即收到广播错误。
3。在计算所有增量后,更新权重。
self.delta['delta_1' + str(i)] = self.delta['delta_out'].dot(
self.weights['weight-1' + str(i+1)].T ) * self._derivate_tanh(self.layers['layer-1' + str(i)])
self.weights['weight-1' + str(i+1)]
在上一个循环中已经更新,我认为这是不正确的。