您好,我目前对AI感兴趣,我参加了由Andrew Ng在课程时代教授的AI课程。我目前正在尝试使用numpy和scipy优化在python中构建神经网络。问题是尽管我的反向传播算法没有为我提供2个隐藏层的正确渐变,而只是为我提供了输出层的正确渐变。我试图以向量化的方式进行迭代,但仍然没有任何效果。任何帮助将非常感激。
import numpy as np
from scipy import optimize as opt
import matplotlib.pyplot as plt
def loadData(file):
with open(file) as f:
data = f.readlines()
#Features 1:Survived, 2:Pclass, 5:Sex, 6:Age, 12:Embarked
index = [1, 2, 5, 6, 12]
m = np.zeros([1, len(index)-1])
y = []
temp_y = np.NaN
for d in data[1:]:
temp = d.replace("\n","").split(",")
l = []
valid = True
for i in index:
if temp[i] == "":
valid = False
break
if i == 5:
if temp[i] == "male":
temp[i] = 0
else:
temp[i] = 1
if i == 12:
if temp[i] == "Q":
temp[i] = 0
elif temp[i] == "S":
temp[i] = 1
else:
temp[i] = 2
if i != 1:
l.append(temp[i])
if(valid):
m = np.vstack([m, l])
y.append(temp[1])
return [np.delete(m, 0, 0), np.array(y).astype(float).tolist()]
def Len(x):
print(len(x), len(x[0]))
class NN(object):
#Initializes objects
def __init__(self, fileName):
temp = loadData(fileName)
self.features = temp[0].astype(float)
self.y = temp[1]
self.m = len(self.features)
self.Lambda = 0.5
self.Theta1 = np.random.random((10, 5))
self.Theta2 = np.random.random((10, 11))
self.Theta3 = np.random.random((1, 11))
def __reformat__(self, theta):
size1 = len(self.Theta1)*len(self.Theta1[0])
size2 = len(self.Theta2)*len(self.Theta2[0])
size3 = len(self.Theta3)*len(self.Theta3[0])
theta1 = np.array([theta[i] for i in range(size1)]).reshape(len(self.Theta1), len(self.Theta1[0]))
theta2 = np.array([theta[i] for i in range(size1, size1+size2)]).reshape(len(self.Theta2), len(self.Theta2[0]))
theta3 = np.array([theta[i] for i in range(size2, size2+size3)]).reshape(len(self.Theta3), len(self.Theta3[0]))
return theta1, theta2, theta3
def theta(self):
theta = np.append(self.Theta1, self.Theta2)
theta = np.append(theta, self.Theta3)
return theta
#Sigmoid function
def sigmoid(self, z):
return 1/(1+np.exp(-z))
#Gets the gradient of the sigmoid function
def sigmoidGrad(self, z):
a = self.sigmoid(z)
b = 1 - self.sigmoid(z)
return a * b
#Does the forward propagation
def feedForward(self, features, ax = 1):
a1 = np.insert(features, 0, 1, axis = ax)
z2 = np.dot(a1, self.Theta1.T)
a2 = self.sigmoid(z2)
a2 = np.insert(a2, 0 , 1, axis = ax)
z3 = np.dot(a2, self.Theta2.T)
a3 = self.sigmoid(z3)
a3 = np.insert(a3, 0, 1, axis = ax)
z4 = np.dot(a3, self.Theta3.T)
a4 = self.sigmoid(z4).T[0]
return [a4, z2, a2, z3, a3, z4, a1]
#Regularizes theta
def regularization(self):
k = 0
temp = self.Theta1.tolist() + self.Theta2.tolist() + self.Theta3.tolist()
for row in temp:
for theta in row:
k+=theta**2
return k*(self.Lambda)/(2*self.m)
#Computes the cost function
def cost(self, h_t):
j = 0
i = 0
for h in h_t:
j += (-self.y[i]*np.log(h) - (1.0 - self.y[i])*np.log(1 - h))
i+=1
return j/(i+1) + self.regularization()
#Computes the back propagation algorithm
def nnCost(self, theta):
theta1, theta2, theta3 = self.__reformat__(theta)
'''
a4, z2, a2, z3, a3, z4, a1 = self.feedForward(self.features)
d4 = np.array([(a4[i] - self.y[i]) for i in range(len(a4))]).reshape(len(a4), 1)
d3 = np.dot(d4, theta3) * np.insert(self.sigmoidGrad(z3), 0, 1, axis = 1)
d2 = np.dot(np.delete(d3, 0, axis = 1), theta2) * np.insert(self.sigmoidGrad(z2), 0, 1, axis = 1)
reg3 = (self.Lambda/self.m)*np.insert(np.delete(theta3, 0, axis = 1), 0, 0, axis = 1)
reg2 = (self.Lambda/self.m)*np.insert(np.delete(theta2, 0, axis = 1), 0, 0, axis = 1)
reg1 = (self.Lambda/self.m)*np.insert(np.delete(theta1, 0, axis = 1), 0, 0, axis = 1)
d3 = np.delete(d3, 0, axis = 1)
d2 = np.delete(d2, 0, axis = 1)
theta3_grad = np.dot(d4.T, a3)/self.m + reg3
theta2_grad = np.dot(d3.T, a2)/self.m + reg2
theta1_grad = np.dot(d2.T, a1)/self.m + reg1
grad = np.append(theta1_grad, theta2_grad)
grad = np.append(grad, theta3_grad)
'''
theta3_grad = np.zeros([len(self.Theta3), len(self.Theta3[0])])
theta2_grad = np.zeros([len(self.Theta2), len(self.Theta2[0])])
theta1_grad = np.zeros([len(self.Theta1), len(self.Theta1[0])])
i = 0
for X in self.features:
a4, z2, a2, z3, a3, z4, a1 = self.feedForward(X, ax=0)
d4 = a4 - self.y[i]
d3 = np.dot(d4, theta3)[0] * np.insert(self.sigmoidGrad(z3), 0, 1, axis = 0)
d2 = np.dot(np.delete(d3, 0, axis = 0), theta2)[0] * np.insert(self.sigmoidGrad(z2), 0, 1, axis = 0)
d3 = np.delete(d3, 0, axis = 0)
d2 = np.delete(d2, 0, axis = 0)
theta3_grad += np.dot(d4, a3)
theta2_grad += np.dot(d3.reshape(len(d3), 1), a2.reshape(1, len(a2)))
theta1_grad += np.dot(d2.reshape(len(d2), 1), a1.reshape(1, len(a1)))
i+=1
reg1 = (self.Lambda/self.m)*np.insert(np.delete(theta1, 0, axis = 1), 0, 0, axis = 1)
reg2 = (self.Lambda/self.m)*np.insert(np.delete(theta2, 0, axis = 1), 0, 0, axis = 1)
reg3 = (self.Lambda/self.m)*np.insert(np.delete(theta3, 0, axis = 1), 0, 0, axis = 1)
theta1_grad = (1/self.m)*theta1_grad + reg1
theta2_grad = (1/self.m)*theta2_grad + reg2
theta3_grad = (1/self.m)*theta3_grad + reg3
return theta1_grad, theta2_grad, theta3_grad
def gradCheck(self, theta = 3):
l = []
e = 0.05
if theta == 1:
for i in range(len(self.Theta1)):
m = []
for j in range(len(self.Theta1[0])):
self.Theta1[i][j] += e
h_p = self.feedForward(self.features)[0]
self.Theta1[i][j] -= 2.0*e
h_n = self.feedForward(self.features)[0]
self.Theta1[i][j] += e
j_p = self.cost(h_p)
j_n = self.cost(h_n)
m.append((j_p-j_n)/(2.0*e))
l.append(m)
elif theta == 2:
for i in range(len(self.Theta2)):
m = []
for j in range(len(self.Theta2[0])):
self.Theta2[i][j] += e
h_p = self.feedForward(self.features)[0]
self.Theta2[i][j] -= 2.0*e
h_n = self.feedForward(self.features)[0]
self.Theta2[i][j] += e
j_p = self.cost(h_p)
j_n = self.cost(h_n)
m.append((j_p-j_n)/(2.0*e))
l.append(m)
else:
for i in range(len(self.Theta3)):
m = []
for j in range(len(self.Theta3[0])):
self.Theta3[i][j] += e
h_p = self.feedForward(self.features)[0]
self.Theta3[i][j] -= 2.0*e
h_n = self.feedForward(self.features)[0]
self.Theta3[i][j] += e
j_p = self.cost(h_p)
j_n = self.cost(h_n)
m.append((j_p-j_n)/(2.0*e))
l.append(m)
return np.array(l)
def train(self):
theta = np.append(self.Theta1, self.Theta2)
theta = np.append(theta, self.Theta3)
newTheta = opt.fmin_tnc(self.nnCost, theta)
self.Theta1, self.Theta2, self.Theta3 = self.__reformat__(newTheta[0])
return self.cost(self.feedForward(self.features)[0])
def plotData(x, y):
plt.plot(x, y)
plt.title("Cost vs Iterations")
plt.xlabel("Iterations")
plt.ylabel("Cost(theta)")
plt.show()
nn = NN("train.csv")
t = nn.theta()
"""
iterations = [i for i in range(10)]
cost = []
for i in range(10):
cost.append(nn.train())
plotData(iterations, cost)
"""