我正在使用python中的反向传播算法来实现神经网络。所使用的方法类似于Andrew Ng在其机器学习课程中教授的方法。 但是神经网络在训练和测试时对每个输入预测相同的类别和几乎相似的值。对于每个输入集,具有最高概率的输出类别是相同的。
我尝试添加偏差单位,标准化输入,正则化。似乎什么都没用。所使用的方法与Andrew Ng讲授的方法相似,并且我已将向量化用于运算。
iris = datasets.load_iris()
X = iris.data[:,:]
Y = iris.target
a=np.amax(X,axis=0)#column wise maximum of input data
b=np.amin(X,axis=0)#column wise minimum
for row in X:#divide each values by difference of max-min for that column
row[0]=row[0]/(a[0]-b[0])
row[1]=row[1]/(a[1]-b[1])
row[2]=row[2]/(a[2]-b[2])
row[3]=row[3]/(a[3]-b[3])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
test_size=0.3,
random_state=1)
biasa2=np.random.rand(5,1) #defining bias nodes for hidden layer a2
biasa3=np.random.rand(3,1) #defining bias nodes for output layes a3
def sigmoid(z):
return 1/(1+np.exp(-z)) #returns sigmoid of given array
def gradient_descent(theta,delta):
num_iterations=50000
learning_rate=.07
for i in range(0,num_iterations):
t heta=np.subtract(theta,(learning_rate*delta)/2)
return theta
def back_prop():
a1=zeros([4,1])#input layer
a2=zeros([5,1])#hidden layer
a3=zeros([3,1])#output layer
theta1=np.random.rand(5,4) #input to hidden layer weights
theta2=np.random.rand(3,5) #hidden t output layer weights
#print(theta1)
#print(theta2)
delta1=zeros([5,4]) #diffrential of cost function wrt every
#weight in input to hidden layer
delta2=zeros([3,5]) #diffrential of cost function in hidden to
#output layer
for i in range(0,len(X_train)):
y=zeros([3,1]) #store expected output for input
a1=X_train[i,:] #input matrix
a1= a1.reshape(len(X_train[i]),1)
#a1=a1/np.amax(a1,axis=0)
print("a1 is")
print(a1)
z=np.matmul(theta1,a1)#
z=z+biasa2
a2=sigmoid(z) #hidden layer node values
#a2=z
a2=a2.reshape(len(a2),1)
print("a2 is ")
print(z)
a2[0,0]=1
z=np.matmul(theta2,a2)
z=z+biasa3
a3=sigmoid(z) #output layer values
#a3=z
a3=a3.reshape(len(a3),1)
print("a3 is ")
print(z)
y[Y_train[i],0]=1 #setting index with output to 1,rest are
zero
prev_del=np.subtract(y,a3) #expected -current output
prev_del=np.multiply(prev_del,np.multiply(a3,1-
a3)) #multipling with diffrential
#print("Error in output is")
#print(prev_del)
temp=np.matmul(prev_del,a2.transpose())
delta2=np.add(delta2,temp) #calculating diffrential for layer
#2
_del=np.matmul(theta2.transpose(),prev_del) #calculating error
#for layer 2
temp2=ones([5,1])
temp2=np.subtract(temp2,a2)
_del=np.multiply(_del,np.multiply(temp2,a2)) #error of layer 2
delta1=np.add(delta1,np.matmul(_del,a1.transpose()))
#diffrential for hidden layer
d1=zeros([5,4]) #array to store final diffrentiation of cost
wrt hidden layer weights
d2=zeros([3,5]) #array to store final diffrentiation of cost
wrt output layer weights
lmbda=.004#regularization parameter
d1=(delta1+lmbda*theta1)/len(X_train)
d2=(delta2+lmbda*theta2)/len(X_train)
theta=hstack((theta1.flatten(),theta2.flatten())) #flattening
#theta matrices for gradient descent
delta=hstack((d1.flatten(),d2.flatten())) #flattening delta
#matrices for gd
theta= gradient_descent(theta,delta)
#print(theta)
return theta #return final theta values after training data
如果在运行代码并在训练循环的每个迭代中打印矩阵a1,a2,a3,则输出类中具有最高值的索引对于每个输入都是相同的。有时a3 [0]对于每个输入都是最高的,有时a3 [ 2]最高。 这些是不应用S型函数的矩阵值。