我一直在用神经网络对手写数字识别上的经典MNIST数据集进行分类,而且每次初始化模型时,似乎都会严重偏向1-3个随机输出。
至关重要的是,即使没有训练,只是使用随机权重,它似乎仍然存在严重偏差。那可能意味着代码中会出现一些简单的错误,但是我和其他人完全无法发现它。
更改学习率,隐藏节点数等参数不会执行任何操作。来自模型的具有随机权重且没有学习的混淆矩阵显然是不正确的,并且随着学习的进行几乎不会改变。
相关类别:
import numpy as np
class neuralnet:
"""one hidden layer neural network"""
def __init__(self,inputs,targets,nHid,eta,momentum):
self.nIn = np.shape(inputs)[1]
self.nHid = nHid
self.nOut = np.shape(targets)[1]
self.eta = eta
self.nData = np.shape(inputs)[0]
self.momentum = momentum
# create random weights, including for bias node
self.inWeights = np.random.rand(self.nHid,self.nIn+1)*0.1 -0.05
self.outWeights = np.random.rand(self.nOut,self.nHid+1)*0.1 -0.05
# for momentum calculation
self.priorIn = self.inWeights
self.priorOut = self.outWeights
def reweight(self):
# create random weights, including for bias node
self.inWeights = np.random.rand(self.nHid,self.nIn+1)*0.1 -0.05
self.OutWeights = np.random.rand(self.nOut,self.nHid+1)*0.1 -0.05
# for momentum calculation
self.priorIn = self.inWeights
self.priorOut = self.outWeights
def train(self, inputs, targets, eta, rounds, reweight = 0):
# train for fixed number of rounds
# reweight
if reweight:
self.reweight()
# add bias input nodes
inputs = np.concatenate((inputs,np.ones((self.nData,1))),axis=1)
# train for # of rounds specified, for each data point
for _ in range(rounds):
for i in range(self.nData):
self.outputs = self.step(inputs[i],targets[i])
def step(self, inputs, targets):
# one epoch
# input nodes output
inOut = np.dot(self.inWeights,inputs.T)
# hidden node activation function
hidIn = sigmoid(inOut)
# add the bias hidden node
hidIn = np.concatenate((hidIn,[1]),axis=0)
# hidden nodes output
hidOut = np.dot(self.outWeights,hidIn)
# output nodes activation function
outIn = sigmoid(hidOut)
outputs = outIn
# update errors and weights
#save for momentum calculation
tempIn = self.priorIn
tempOut = self.priorOut
self.priorIn = self.inWeights
self.priorOut = self.outWeights
# output node errors
outErrors = outputs*(1.0-outputs)*(targets - outputs)
# hidden to output node weight update, including momentum (ignoring bias node)
self.outWeights += self.eta*np.dot(outErrors,self.outWeights) + self.momentum*(self.outWeights - tempOut)
# hidden node errors
hidErrors = hidIn*(1.0-hidIn)*np.dot(self.outWeights.T,outErrors)
# input to hidden node weight update, including momentum (ignoring bias node)
self.inWeights += self.eta*np.dot(hidErrors[:-1],self.inWeights) + self.momentum*(self.inWeights - tempIn)
return outputs
def eval(self, inputs):
# run without training
# add bias node
inputs = np.concatenate((inputs,[1]))
# input to hidden
inOut = np.dot(self.inWeights,inputs.T)
hidIn = sigmoid(inOut)
# add bias hidden node
hidIn = np.concatenate((hidIn,[1]),axis=0)
# hidden to output
hidOut = np.dot(self.outWeights,hidIn)
outIn = sigmoid(hidOut)
return outIn
def conMat(self, inputs, targets, p=1):
# calculate (and print) confusion matrix
# get number of data points to test
nTests = np.shape(inputs)[0]
right = 0
wrong = 0
conmat = np.zeros((self.nOut,self.nOut),dtype=int)
for i in range(nTests):
# get result without updating weights
output = self.eval(inputs[i])
outMax = output.argmax() # one-hot of results
targMax = targets[i].argmax() #one-hot of inputs
if outMax == targMax:
right += 1
else:
wrong += 1
conmat[outMax][targMax] += 1
if p: #print
print(right)
print(wrong)
print(conmat)
print("out/tar")
print(output)
print(targets[i])
return right/(right + wrong)
由于没有训练并且权重是随机的,因此我希望在学习之前将混淆矩阵均匀地分布在从输入数字标签到输出数字预测的所有映射中,但是会给出类似的结果:
[[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 4 26 50 62 5 21 1 0 37 1]
[ 0 0 0 0 0 0 0 0 0 0]
[5919 6716 5908 6069 5837 5400 5917 6265 5814 5948]
[ 0 0 0 0 0 0 0 0 0 0]]
进行培训,
[[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 12 5 5 0 8 0 0 1 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 980 1123 1027 1005 982 884 958 1028 973 1009]
[ 0 0 0 0 0 0 0 0 0 0]]
用于测试数据。
尽管具体数字略有变化,但培训基本上不会进行培训。
我已经确认数据准备正确,0值为1,1等于0.9的一个热门值。