Question

我正在用python训练神经网络。但是准确性很低。

              precision    recall  f1-score   support

           0       0.10      1.00      0.19      1536
           1       0.00      0.00      0.00      1517
           2       0.00      0.00      0.00      1464
           3       0.00      0.00      0.00      1504
           4       0.00      0.00      0.00      1535
           5       0.00      0.00      0.00      1468
           6       0.00      0.00      0.00      1503
           7       0.00      0.00      0.00      1499
           8       0.00      0.00      0.00      1503
           9       0.00      0.00      0.00      1471

    accuracy                           0.10     15000
   macro avg       0.01      0.10      0.02     15000
weighted avg       0.01      0.10      0.02     15000

这是我的梯度下降纪元迭代：

epoch 1: 64/45000 loss: 3.594067:   0%|          | 0/704 [00:00<?, ?it/s]/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: divide by zero encountered in log
  # This is added back by InteractiveShellApp.init_path()
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in multiply
  # This is added back by InteractiveShellApp.init_path()
epoch 1: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 51.58it/s]
    mean epoch 1 loss: train:  nan val: 2.362836
epoch 2: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 52.35it/s]
    mean epoch 2 loss: train: 2.458662 val: 2.362836
epoch 3: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 51.42it/s]
    mean epoch 3 loss: train: 2.458662 val: 2.362836
epoch 4: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:13<00:00, 52.19it/s]
    mean epoch 4 loss: train: 2.458688 val: 2.362836
...........etc

这是我的动量优化器时代：

epoch 1: 64/45000 loss: 3.594067:   0%|          | 0/704 [00:00<?, ?it/s]/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: divide by zero encountered in log
  # This is added back by InteractiveShellApp.init_path()
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in multiply
  # This is added back by InteractiveShellApp.init_path()
epoch 1: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.13it/s]
  0%|          | 0/704 [00:00<?, ?it/s] mean epoch 1 loss: train:  nan val: 2.362836
epoch 2: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.66it/s]
    mean epoch 2 loss: train: 2.458662 val: 2.362836
epoch 3: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.40it/s]
    mean epoch 3 loss: train: 2.458662 val: 2.362836
epoch 4: 45000/45000 loss: 2.716305: 100%|██████████| 704/704 [00:14<00:00, 48.02it/s]
    mean epoch 4 loss: train: 2.458662 val: 2.362836
etc.......

这是我的一些代码：

    class Sigmoid(Layer):

        def __init__(self):
            pass

        def forward(self, _input):
            return 1 / (1 + np.exp(-1*_input))
            raise NotImplementedError()

        def backward(self, _input, grad_output):
            s = 1 / (1 + np.exp(-_input))
            grad_output = s * (1 - s)
            return [grad_output]
            raise NotImplementedError()

    class Softmax(Layer):

        def __init__(self):
            pass

        def forward(self, _input):
            #stable softmax : menghindari overflow /nan value
            shiftz = _input - np.max(_input, axis=-1, keepdims=True)
            exps = np.exp(shiftz)
            output = exps / np.sum(exps, axis=-1, keepdims=True)
            return output
            raise NotImplementedError()

        def backward(self, _input, grad_output):
            Sz = forward(_input)
            grad_output = -np.outer(Sz, Sz) + np.diag(Sz.flatten())
            return [grad_output]

class Dense(Layer):
    def __init__(self, input_shape, output_shape, weight_initializer='random',learning_rate=0.1):

        if weight_initializer not in ['xavier','random']:
            raise ValueError('weight_initializer must be either xavier or random')

        if weight_initializer == 'random':
            self.weights = np.random.randn(input_shape, output_shape)*1e-2

        elif weight_initializer == 'xavier':
            self.weights = np.random.randn(input_shape, output_shape)*np.sqrt(2/(input_shape + output_shape))

        self.bias = np.random.randn(output_shape)
        self.learning_rate = learning_rate


    def forward(self, _input): 
        # input shape: [batch, input_units]
        # output shape: [batch, output units]
        return np.dot(_input,self.weights) + self.bias 

    def backward(self, _input, grad_output):
        grad_input = np.dot(grad_output, self.weights.T)

        # compute gradient w.r.t. weights and biases
        grad_weights = np.dot(_input.T, grad_output)
        grad_bias = grad_output.mean(axis=0)*_input.shape[0]

        assert grad_weights.shape == self.weights.shape and grad_bias.shape == self.bias.shape

        # Here we perform a stochastic gradient descent step. 
        self.weights = self.weights - self.learning_rate * grad_weights
        self.bias = self.bias - self.learning_rate * grad_bias

        return [grad_input, grad_weights, grad_bias]

#Binary log loss
def binary_log_loss(y_true, y_pred):
    """
        input: y_true, y_pred (numpy array with shape [batch_size, 1])
        output: bin_log_loss (numpy float64)
                bin_log_loss_grad (numpy array with shape [batch_size, 1])
    """
    m = y_true.shape[1]
    cost = (1/m) * np.sum(-y_true*np.log(y_pred) - (1-y_true)*np.log(1-y_pred))
    bin_log_loss = np.squeeze(cost)
    bin_log_loss_grad = (1/m) * (-(y_true/y_pred) + ((1-y_true)/(1-y_pred)))

    return bin_log_loss, bin_log_loss_grad

def binary_log_loss_with_sigmoid(layer_input_output_cache, y_true):
    return binary_log_loss(layer_input_output_cache[-1], y_true)

#Softmax cross entropy
def softmax_cross_entropy(y_true, y_pred):
    m, k = y_true.shape
    softmax_cross_entropy_loss = 0
    for i in range(m):
        for j in range(k):
          softmax_cross_entropy_loss += y_true[i][j] * np.log(y_pred[i][j])
    softmax_cross_entropy_loss /= (-1*m)

    softmax_cross_entropy_grad = y_pred - y_true
    return softmax_cross_entropy_loss, softmax_cross_entropy_grad

def softmax_cross_entropy_with_logits(layer_input_output_cache, y_true, softmax=Softmax()):
    logits = layer_input_output_cache[-1]
    y_pred = softmax.forward(logits)
    return softmax_cross_entropy(y_true, y_pred)

#Momentum Optimizer
class MomentumOptimizer(Optimizer):
    def __init__(self, model, loss_func, learning_rate=1e-4, beta=0.9):

        super().__init__(model, loss_func)
        self.learning_rate = learning_rate
        self.beta = beta
        self.__reset_velocity()

    def __reset_velocity(self):
        self.velocity = {}

    def update_layer(self, layer, grad_weights, grad_bias):
        # initialize velocity to zero
        if id(layer) not in self.velocity:
            self.velocity[id(layer)] = 0

        self.velocity[id(layer)] = (self.beta * self.velocity[id(layer)]) + ((1-self.beta) * grad_weights)

        layer.weights =  layer.weights - (self.learning_rate * self.velocity[id(layer)])

        layer.bias = layer.bias - self.learning_rate * grad_bias

我使用的数据集具有785个属性（pixel1至pixel784），这些属性表示时尚图像的像素。我已经尝试过规范化数据，因此它只包含从0到1的值。

在python中对神经网络的准确性非常低

0 个答案: