Question

您好，我目前对AI感兴趣，我参加了由Andrew Ng在课程时代教授的AI课程。我目前正在尝试使用numpy和scipy优化在python中构建神经网络。问题是尽管我的反向传播算法没有为我提供2个隐藏层的正确渐变，而只是为我提供了输出层的正确渐变。我试图以向量化的方式进行迭代，但仍然没有任何效果。任何帮助将非常感激。

import numpy as np
from scipy import optimize as opt
import matplotlib.pyplot as plt

def loadData(file):
    with open(file) as f:
        data = f.readlines()
        #Features 1:Survived, 2:Pclass, 5:Sex, 6:Age, 12:Embarked
        index = [1, 2, 5, 6, 12]
        m = np.zeros([1, len(index)-1])
        y = []
        temp_y = np.NaN
        for d in data[1:]: 
            temp = d.replace("\n","").split(",")
            l = []
            valid = True
            for i in index:
                if temp[i] == "":
                    valid = False
                    break
                if i == 5:
                    if temp[i] == "male":
                        temp[i] = 0
                    else:
                        temp[i] = 1
                if i == 12:
                    if temp[i] == "Q":
                        temp[i] = 0
                    elif temp[i] == "S":
                        temp[i] = 1
                    else:
                        temp[i] = 2
                if i != 1:
                    l.append(temp[i])
            if(valid):
                m = np.vstack([m, l])
                y.append(temp[1])
    return [np.delete(m, 0, 0), np.array(y).astype(float).tolist()]

def Len(x):
    print(len(x), len(x[0]))

class NN(object):
    #Initializes objects
    def __init__(self, fileName):
        temp = loadData(fileName)
        self.features = temp[0].astype(float)
        self.y = temp[1]
        self.m = len(self.features)
        self.Lambda = 0.5
        self.Theta1 = np.random.random((10, 5))
        self.Theta2 = np.random.random((10, 11))
        self.Theta3 = np.random.random((1, 11))

    def __reformat__(self, theta):
        size1 = len(self.Theta1)*len(self.Theta1[0])
        size2 = len(self.Theta2)*len(self.Theta2[0])
        size3 = len(self.Theta3)*len(self.Theta3[0])

        theta1 = np.array([theta[i] for i in range(size1)]).reshape(len(self.Theta1), len(self.Theta1[0]))
        theta2 = np.array([theta[i] for i in range(size1, size1+size2)]).reshape(len(self.Theta2), len(self.Theta2[0]))
        theta3 = np.array([theta[i] for i in range(size2, size2+size3)]).reshape(len(self.Theta3), len(self.Theta3[0]))
        return theta1, theta2, theta3

    def theta(self):
        theta = np.append(self.Theta1, self.Theta2)
        theta = np.append(theta, self.Theta3)
        return theta

    #Sigmoid function
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))

    #Gets the gradient of the sigmoid function
    def sigmoidGrad(self, z):
        a = self.sigmoid(z)
        b = 1 - self.sigmoid(z)
        return a * b

    #Does the forward propagation
    def feedForward(self, features, ax = 1):
        a1 = np.insert(features, 0, 1, axis = ax)

        z2 = np.dot(a1, self.Theta1.T)
        a2 = self.sigmoid(z2)
        a2 = np.insert(a2, 0 , 1, axis = ax)

        z3 = np.dot(a2, self.Theta2.T)
        a3 = self.sigmoid(z3)
        a3 = np.insert(a3, 0, 1, axis  = ax)

        z4 = np.dot(a3, self.Theta3.T)
        a4 = self.sigmoid(z4).T[0]
        return [a4, z2, a2, z3, a3, z4, a1]

    #Regularizes theta 
    def regularization(self):
        k = 0
        temp = self.Theta1.tolist() + self.Theta2.tolist() + self.Theta3.tolist()
        for row in temp:
            for theta in row:
                k+=theta**2
        return k*(self.Lambda)/(2*self.m)

    #Computes the cost function
    def cost(self, h_t):
        j = 0
        i = 0
        for h in h_t:
            j += (-self.y[i]*np.log(h) - (1.0 - self.y[i])*np.log(1 - h))
            i+=1
        return j/(i+1) + self.regularization() 

    #Computes the back propagation algorithm
    def nnCost(self, theta):
        theta1, theta2, theta3 = self.__reformat__(theta)
        '''
        a4, z2, a2, z3, a3, z4, a1 = self.feedForward(self.features)

        d4 = np.array([(a4[i] - self.y[i]) for i in range(len(a4))]).reshape(len(a4), 1)

        d3 = np.dot(d4, theta3) * np.insert(self.sigmoidGrad(z3), 0, 1, axis = 1)

        d2 = np.dot(np.delete(d3, 0, axis = 1), theta2) * np.insert(self.sigmoidGrad(z2), 0, 1, axis = 1)

        reg3 = (self.Lambda/self.m)*np.insert(np.delete(theta3, 0, axis = 1), 0, 0, axis = 1)
        reg2 = (self.Lambda/self.m)*np.insert(np.delete(theta2, 0, axis = 1), 0, 0, axis = 1)
        reg1 = (self.Lambda/self.m)*np.insert(np.delete(theta1, 0, axis = 1), 0, 0, axis = 1)

        d3 = np.delete(d3, 0, axis = 1)
        d2 = np.delete(d2, 0, axis = 1)

        theta3_grad = np.dot(d4.T, a3)/self.m + reg3
        theta2_grad = np.dot(d3.T, a2)/self.m + reg2
        theta1_grad = np.dot(d2.T, a1)/self.m + reg1

        grad = np.append(theta1_grad, theta2_grad)
        grad = np.append(grad, theta3_grad)
        '''
        theta3_grad = np.zeros([len(self.Theta3), len(self.Theta3[0])])
        theta2_grad = np.zeros([len(self.Theta2), len(self.Theta2[0])])
        theta1_grad = np.zeros([len(self.Theta1), len(self.Theta1[0])])
        i = 0
        for X in self.features:
            a4, z2, a2, z3, a3, z4, a1 = self.feedForward(X, ax=0)
            d4 = a4 - self.y[i]
            d3 = np.dot(d4, theta3)[0] * np.insert(self.sigmoidGrad(z3), 0, 1, axis = 0)
            d2 = np.dot(np.delete(d3, 0, axis = 0), theta2)[0] * np.insert(self.sigmoidGrad(z2), 0, 1, axis = 0)
            d3 = np.delete(d3, 0, axis = 0)
            d2 = np.delete(d2, 0, axis = 0)
            theta3_grad += np.dot(d4, a3)
            theta2_grad += np.dot(d3.reshape(len(d3), 1), a2.reshape(1, len(a2)))
            theta1_grad += np.dot(d2.reshape(len(d2), 1), a1.reshape(1, len(a1)))
            i+=1
        reg1 = (self.Lambda/self.m)*np.insert(np.delete(theta1, 0, axis = 1), 0, 0, axis = 1)
        reg2 = (self.Lambda/self.m)*np.insert(np.delete(theta2, 0, axis = 1), 0, 0, axis = 1)
        reg3 = (self.Lambda/self.m)*np.insert(np.delete(theta3, 0, axis = 1), 0, 0, axis = 1)

        theta1_grad = (1/self.m)*theta1_grad + reg1
        theta2_grad = (1/self.m)*theta2_grad + reg2
        theta3_grad = (1/self.m)*theta3_grad + reg3
        return theta1_grad, theta2_grad, theta3_grad

    def gradCheck(self, theta = 3):
        l = []
        e = 0.05
        if theta == 1:
            for i in range(len(self.Theta1)):
                m = []
                for j in range(len(self.Theta1[0])):
                    self.Theta1[i][j] += e
                    h_p = self.feedForward(self.features)[0]
                    self.Theta1[i][j] -= 2.0*e
                    h_n = self.feedForward(self.features)[0]
                    self.Theta1[i][j] += e
                    j_p = self.cost(h_p)
                    j_n = self.cost(h_n)
                    m.append((j_p-j_n)/(2.0*e))
                l.append(m)

        elif theta == 2:
            for i in range(len(self.Theta2)):
                m = []
                for j in range(len(self.Theta2[0])):
                    self.Theta2[i][j] += e
                    h_p = self.feedForward(self.features)[0]
                    self.Theta2[i][j] -= 2.0*e
                    h_n = self.feedForward(self.features)[0]
                    self.Theta2[i][j] += e
                    j_p = self.cost(h_p)
                    j_n = self.cost(h_n)
                    m.append((j_p-j_n)/(2.0*e))
                l.append(m)

        else:
            for i in range(len(self.Theta3)):
                m = []
                for j in range(len(self.Theta3[0])):
                    self.Theta3[i][j] += e
                    h_p = self.feedForward(self.features)[0]
                    self.Theta3[i][j] -= 2.0*e
                    h_n = self.feedForward(self.features)[0]
                    self.Theta3[i][j] += e
                    j_p = self.cost(h_p)
                    j_n = self.cost(h_n)
                    m.append((j_p-j_n)/(2.0*e))
                l.append(m)

        return np.array(l)

    def train(self):
        theta = np.append(self.Theta1, self.Theta2)
        theta = np.append(theta, self.Theta3)
        newTheta = opt.fmin_tnc(self.nnCost, theta)
        self.Theta1, self.Theta2, self.Theta3 = self.__reformat__(newTheta[0])
        return self.cost(self.feedForward(self.features)[0])

def plotData(x, y):
    plt.plot(x, y)
    plt.title("Cost vs Iterations")
    plt.xlabel("Iterations")
    plt.ylabel("Cost(theta)")
    plt.show()

nn = NN("train.csv")
t = nn.theta()
"""
iterations = [i for i in range(10)]
cost = []
for i in range(10):
    cost.append(nn.train())
plotData(iterations, cost)
"""

反向传播无法正常工作

0 个答案: