Question

我一直在用神经网络对手写数字识别上的经典MNIST数据集进行分类，而且每次初始化模型时，似乎都会严重偏向1-3个随机输出。

至关重要的是，即使没有训练，只是使用随机权重，它似乎仍然存在严重偏差。那可能意味着代码中会出现一些简单的错误，但是我和其他人完全无法发现它。

更改学习率，隐藏节点数等参数不会执行任何操作。来自模型的具有随机权重且没有学习的混淆矩阵显然是不正确的，并且随着学习的进行几乎不会改变。

相关类别：

import numpy as np

class neuralnet:
        """one hidden layer neural network"""
        def __init__(self,inputs,targets,nHid,eta,momentum):
                self.nIn = np.shape(inputs)[1]
                self.nHid = nHid
                self.nOut = np.shape(targets)[1]
                self.eta = eta
                self.nData = np.shape(inputs)[0]
                self.momentum = momentum

                # create random weights, including for bias node
                self.inWeights = np.random.rand(self.nHid,self.nIn+1)*0.1 -0.05
                self.outWeights = np.random.rand(self.nOut,self.nHid+1)*0.1 -0.05

                # for momentum calculation
                self.priorIn = self.inWeights
                self.priorOut = self.outWeights

        def reweight(self):
                # create random weights, including for bias node
                self.inWeights = np.random.rand(self.nHid,self.nIn+1)*0.1 -0.05
                self.OutWeights = np.random.rand(self.nOut,self.nHid+1)*0.1 -0.05

                # for momentum calculation
                self.priorIn = self.inWeights
                self.priorOut = self.outWeights

        def train(self, inputs, targets, eta, rounds, reweight = 0):
                # train for fixed number of rounds

                # reweight
                if reweight:
                        self.reweight()

                # add bias input nodes
                inputs = np.concatenate((inputs,np.ones((self.nData,1))),axis=1)

                # train for # of rounds specified, for each data point
                for _ in range(rounds):
                        for i in range(self.nData):
                                self.outputs = self.step(inputs[i],targets[i])

        def step(self, inputs, targets):
                # one epoch

                # input nodes output
                inOut = np.dot(self.inWeights,inputs.T)

                # hidden node activation function
                hidIn = sigmoid(inOut)

                # add the bias hidden node
                hidIn = np.concatenate((hidIn,[1]),axis=0)

                # hidden nodes output
                hidOut = np.dot(self.outWeights,hidIn)

                # output nodes activation function
                outIn = sigmoid(hidOut)

                outputs = outIn


                # update errors and weights

                #save for momentum calculation
                tempIn = self.priorIn
                tempOut = self.priorOut

                self.priorIn = self.inWeights
                self.priorOut = self.outWeights

                # output node errors
                outErrors = outputs*(1.0-outputs)*(targets - outputs)
                # hidden to output node weight update, including momentum (ignoring bias node)
                self.outWeights += self.eta*np.dot(outErrors,self.outWeights) + self.momentum*(self.outWeights - tempOut)

                # hidden node errors
                hidErrors = hidIn*(1.0-hidIn)*np.dot(self.outWeights.T,outErrors)
                # input to hidden node weight update, including momentum (ignoring bias node)
                self.inWeights += self.eta*np.dot(hidErrors[:-1],self.inWeights) + self.momentum*(self.inWeights - tempIn)

                return outputs

        def eval(self, inputs):
                # run without training

                # add bias node
                inputs = np.concatenate((inputs,[1]))

                # input to hidden
                inOut = np.dot(self.inWeights,inputs.T)
                hidIn = sigmoid(inOut)

                # add bias hidden node
                hidIn = np.concatenate((hidIn,[1]),axis=0)

                # hidden to output
                hidOut = np.dot(self.outWeights,hidIn)
                outIn = sigmoid(hidOut)

                return outIn


        def conMat(self, inputs, targets, p=1):
                # calculate (and print) confusion matrix

                # get number of data points to test
                nTests = np.shape(inputs)[0]

                right = 0
                wrong = 0
                conmat = np.zeros((self.nOut,self.nOut),dtype=int)
                for i in range(nTests):
                        # get result without updating weights
                        output = self.eval(inputs[i])
                        outMax = output.argmax() # one-hot of results
                        targMax = targets[i].argmax() #one-hot of inputs
                        if outMax == targMax:
                                right += 1
                        else:
                                wrong += 1
                        conmat[outMax][targMax] += 1
                if p: #print
                        print(right)
                        print(wrong)
                        print(conmat)
                        print("out/tar")
                        print(output)
                        print(targets[i])

                return right/(right + wrong)

由于没有训练并且权重是随机的，因此我希望在学习之前将混淆矩阵均匀地分布在从输入数字标签到输出数字预测的所有映射中，但是会给出类似的结果：

[[   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   4   26   50   62    5   21    1    0   37    1]
 [   0    0    0    0    0    0    0    0    0    0]
 [5919 6716 5908 6069 5837 5400 5917 6265 5814 5948]
 [   0    0    0    0    0    0    0    0    0    0]]

进行培训，

[[   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0   12    5    5    0    8    0    0    1    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [ 980 1123 1027 1005  982  884  958 1028  973 1009]
 [   0    0    0    0    0    0    0    0    0    0]]

用于测试数据。

尽管具体数字略有变化，但培训基本上不会进行培训。

我已经确认数据准备正确，0值为1，1等于0.9的一个热门值。

如何修复神经网络混淆矩阵代码

0 个答案: