编写了一些代码来使用python + numpy实现一个简单的NN,并使用scipy中的fmincg进行优化。认为梯度函数存在一些问题 - 也尝试使用较小的NN。但无法弄清楚出了什么问题。尝试优化 - 不收敛。已查找了许多类似的实现,但找不到任何有意义的差异。非常感谢这些人的一些帮助。提前谢谢!
import numpy as np
from numpy import linalg as la
import time
from scipy import optimize
def sigmoid(x):
return (1/(1+np.exp(-x)))
def pack_thetas(t1, t2):
return np.concatenate((t1.reshape(-1), t2.reshape(-1)))
def unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels):
t1_start = 0
t1_end = hidden_layer_size * (input_layer_size + 1)
t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))
t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))
return t1, t2
def cost(params, x, y, lambdap, n, h):
cost = 0.0
m, num_labels = np.shape(y)
'''unroll thetas'''
t1, t2 = unpack_thetas(params, n, h, num_labels)
a2 = sigmoid(x.dot(t1.T))
x2 = np.concatenate((np.ones((m,1)), a2), axis = 1)
yi = sigmoid(x2.dot(t2.T))
for i in range(m):
for j in range(num_labels):
yout = yi[i, j].copy()
yin = y[i, j].copy()
cost -= yin * np.log(yout) + (1-yin)*np.log(1-yout)
cost += (sum(sum(t1[:,1:] * t1[:,1:])) + sum(sum(t2[:,1:] * t2[:,1:]))) * lambdap / 2
'''Not considering first column as bias weights are not regularized'''
return(cost/m)
def grad(params, x, y, lambdap, n, h):
m, num_labels = np.shape(y)
'''unroll thetas'''
t1, t2 = unpack_thetas(params, n, h, num_labels)
#t1 = np.reshape(params[0:h*(n+1)],(h, n+1))
#t2 = np.reshape(params[h*(n+1):],(num_labels, h+1))
t1g = np.zeros(np.shape(t1))
t2g = np.zeros(np.shape(t2))
for i in range(m):
a1 = np.reshape(x[i,:],(1,n+1))
z2 = np.dot(a1, t1.T)
a2 = np.concatenate((np.ones((1,1)), sigmoid(z2)), axis = 1)
yout = sigmoid(np.dot(a2, t2.T))
error3 = yout - y[i,:]
gz2 = sigmoid(z2)*(1-sigmoid(z2))
error2 = np.dot(t2[:,1:].T,error3.T) * (gz2.T)
t2g = t2g + np.dot(error3.T,a2)
t1g = t1g + np.dot(error2,a1)
t1g[:,1:] = (1/m) * t1g[:,1:] + (lambdap/m)*t1[:,1:]
t2g[:,1:] = (1/m) * t2g[:,1:] + (lambdap/m)*t2[:,1:]
gradient = pack_thetas(t1g, t2g)
return(gradient.flatten())
x = np.array(np.loadtxt("x.txt"))
y = np.array(np.loadtxt("y.txt"))
m, n = np.shape(x)
#adding x0 to x
xb = np.concatenate((np.ones((m,1)),x), axis = 1)
#one-hot encoding y
yb = np.zeros((m,10), dtype="f")
for i in range(m):
yb[i, y[i]-1] = 1
print("Done loading data...")
h = 25
num_labels = 10
epsilon = 0.12
lambdap = 1
theta1 = np.random.random((h, n+1)) * 2 * epsilon - epsilon
theta2 = np.random.random((num_labels, h+1)) * 2 * epsilon - epsilon
params = pack_thetas(theta1, theta2)
#params = np.concatenate((np.reshape(theta1,((h*(n+1)),1)),np.reshape(theta2,((num_labels*(h+1)),1))),axis=0)
#cost at initial thetas
print(cost(params, xb, yb, 0.0, n, h))
print(cost(params, xb, yb, lambdap, n, h))
optiparams = params.flatten()
optiparams = optimize.fmin_cg(cost, optiparams, fprime=grad, args=(xb, yb, lambdap, n, h), maxiter=100)
#print("Cost at optimum = " + cost(optiparams, xb, yb, lambdap, n, h))
accuracy = 0
t1, t2 = unpack_thetas(optiparams, n, h, num_labels)
a1 = np.reshape(xb,(1,n+1))
a2 = sigmoid(a1.dot(t1.T))
x2 = np.concatenate((np.ones((m,1)), a2), axis = 1)
yi = sigmoid(x2.dot(t2.T))
for i in range(m):
maxp = np.max(yi[i],axis=0)
for j in range(num_labels):
if yi[i,j] == maxp:
if yb[i,j] == 1:
accuracy += 1
print(accuracy)