我正在用python训练神经网络。但是准确性很低。
precision recall f1-score support
0 0.10 1.00 0.19 1536
1 0.00 0.00 0.00 1517
2 0.00 0.00 0.00 1464
3 0.00 0.00 0.00 1504
4 0.00 0.00 0.00 1535
5 0.00 0.00 0.00 1468
6 0.00 0.00 0.00 1503
7 0.00 0.00 0.00 1499
8 0.00 0.00 0.00 1503
9 0.00 0.00 0.00 1471
accuracy 0.10 15000
macro avg 0.01 0.10 0.02 15000
weighted avg 0.01 0.10 0.02 15000
这是我的梯度下降纪元迭代:
epoch 1: 64/45000 loss: 3.594067: 0%| | 0/704 [00:00<?, ?it/s]/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: divide by zero encountered in log
# This is added back by InteractiveShellApp.init_path()
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in multiply
# This is added back by InteractiveShellApp.init_path()
epoch 1: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 51.58it/s]
mean epoch 1 loss: train: nan val: 2.362836
epoch 2: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 52.35it/s]
mean epoch 2 loss: train: 2.458662 val: 2.362836
epoch 3: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 51.42it/s]
mean epoch 3 loss: train: 2.458662 val: 2.362836
epoch 4: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 52.19it/s]
mean epoch 4 loss: train: 2.458688 val: 2.362836
...........etc
这是我的动量优化器时代:
epoch 1: 64/45000 loss: 3.594067: 0%| | 0/704 [00:00<?, ?it/s]/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: divide by zero encountered in log
# This is added back by InteractiveShellApp.init_path()
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in multiply
# This is added back by InteractiveShellApp.init_path()
epoch 1: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.13it/s]
0%| | 0/704 [00:00<?, ?it/s] mean epoch 1 loss: train: nan val: 2.362836
epoch 2: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.66it/s]
mean epoch 2 loss: train: 2.458662 val: 2.362836
epoch 3: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.40it/s]
mean epoch 3 loss: train: 2.458662 val: 2.362836
epoch 4: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.02it/s]
mean epoch 4 loss: train: 2.458662 val: 2.362836
etc.......
这是我的一些代码:
class Sigmoid(Layer):
def __init__(self):
pass
def forward(self, _input):
return 1 / (1 + np.exp(-1*_input))
raise NotImplementedError()
def backward(self, _input, grad_output):
s = 1 / (1 + np.exp(-_input))
grad_output = s * (1 - s)
return [grad_output]
raise NotImplementedError()
class Softmax(Layer):
def __init__(self):
pass
def forward(self, _input):
#stable softmax : menghindari overflow /nan value
shiftz = _input - np.max(_input, axis=-1, keepdims=True)
exps = np.exp(shiftz)
output = exps / np.sum(exps, axis=-1, keepdims=True)
return output
raise NotImplementedError()
def backward(self, _input, grad_output):
Sz = forward(_input)
grad_output = -np.outer(Sz, Sz) + np.diag(Sz.flatten())
return [grad_output]
class Dense(Layer):
def __init__(self, input_shape, output_shape, weight_initializer='random',learning_rate=0.1):
if weight_initializer not in ['xavier','random']:
raise ValueError('weight_initializer must be either xavier or random')
if weight_initializer == 'random':
self.weights = np.random.randn(input_shape, output_shape)*1e-2
elif weight_initializer == 'xavier':
self.weights = np.random.randn(input_shape, output_shape)*np.sqrt(2/(input_shape + output_shape))
self.bias = np.random.randn(output_shape)
self.learning_rate = learning_rate
def forward(self, _input):
# input shape: [batch, input_units]
# output shape: [batch, output units]
return np.dot(_input,self.weights) + self.bias
def backward(self, _input, grad_output):
grad_input = np.dot(grad_output, self.weights.T)
# compute gradient w.r.t. weights and biases
grad_weights = np.dot(_input.T, grad_output)
grad_bias = grad_output.mean(axis=0)*_input.shape[0]
assert grad_weights.shape == self.weights.shape and grad_bias.shape == self.bias.shape
# Here we perform a stochastic gradient descent step.
self.weights = self.weights - self.learning_rate * grad_weights
self.bias = self.bias - self.learning_rate * grad_bias
return [grad_input, grad_weights, grad_bias]
#Binary log loss
def binary_log_loss(y_true, y_pred):
"""
input: y_true, y_pred (numpy array with shape [batch_size, 1])
output: bin_log_loss (numpy float64)
bin_log_loss_grad (numpy array with shape [batch_size, 1])
"""
m = y_true.shape[1]
cost = (1/m) * np.sum(-y_true*np.log(y_pred) - (1-y_true)*np.log(1-y_pred))
bin_log_loss = np.squeeze(cost)
bin_log_loss_grad = (1/m) * (-(y_true/y_pred) + ((1-y_true)/(1-y_pred)))
return bin_log_loss, bin_log_loss_grad
def binary_log_loss_with_sigmoid(layer_input_output_cache, y_true):
return binary_log_loss(layer_input_output_cache[-1], y_true)
#Softmax cross entropy
def softmax_cross_entropy(y_true, y_pred):
m, k = y_true.shape
softmax_cross_entropy_loss = 0
for i in range(m):
for j in range(k):
softmax_cross_entropy_loss += y_true[i][j] * np.log(y_pred[i][j])
softmax_cross_entropy_loss /= (-1*m)
softmax_cross_entropy_grad = y_pred - y_true
return softmax_cross_entropy_loss, softmax_cross_entropy_grad
def softmax_cross_entropy_with_logits(layer_input_output_cache, y_true, softmax=Softmax()):
logits = layer_input_output_cache[-1]
y_pred = softmax.forward(logits)
return softmax_cross_entropy(y_true, y_pred)
#Momentum Optimizer
class MomentumOptimizer(Optimizer):
def __init__(self, model, loss_func, learning_rate=1e-4, beta=0.9):
super().__init__(model, loss_func)
self.learning_rate = learning_rate
self.beta = beta
self.__reset_velocity()
def __reset_velocity(self):
self.velocity = {}
def update_layer(self, layer, grad_weights, grad_bias):
# initialize velocity to zero
if id(layer) not in self.velocity:
self.velocity[id(layer)] = 0
self.velocity[id(layer)] = (self.beta * self.velocity[id(layer)]) + ((1-self.beta) * grad_weights)
layer.weights = layer.weights - (self.learning_rate * self.velocity[id(layer)])
layer.bias = layer.bias - self.learning_rate * grad_bias
我使用的数据集具有785个属性(pixel1至pixel784),这些属性表示时尚图像的像素。我已经尝试过规范化数据,因此它只包含从0到1的值。