这是我尝试使用softmax作为激活函数并将mndigit数字数据集作为训练和测试集在python中实现多类logistic回归。
import numpy as np
def softmax(z):
return np.array([(np.exp(el)/np.sum(np.exp(el))) for el in z])
def cost(W,F,L):
m = F.shape[0] #get number of rows
mul = np.dot(F, W)
sm_mul_T = softmax(mul)
return -(1/m) * np.sum(L * np.log(sm_mul_T))
def gradient(W,F,L):
m = F.shape[0] # get number of rows
mul = np.dot(F, W)
sm_mul_T = softmax(mul)
return -(1 / m) * np.dot(F.T , (L - sm_mul_T))
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./datasets/MNIST_data/", one_hot=True)
W = np.zeros((785, 10)) #784 features + 1 bias
for _ in range(10000):
F, L = mnist.train.next_batch(100)
F = np.insert(F,0, values=1, axis=1)
total_cost = cost(W,F,L)
print("Total cost is {}".format(total_cost))
gradients = gradient(W,F,L)
W = W - (0.1 * gradients)
FU = mnist.test.images
FU = np.insert(FU,0, values=1, axis=1)
LU = mnist.test.labels
mulU = np.dot(FU, W)
sm_mulU = softmax(mulU)
OK=0
NOK=0
for i in range(10000):
a1 = np.argmax(sm_mulU[i])
a2 = np.argmax(LU[i])
if a1 == a2:
OK = OK + 1
else:
NOK = NOK + 1
print("{} OK vs {} NOK".format(OK, NOK))
print("accur {}%".format((OK/(NOK+OK))*100))
我想做的基本上是尝试自己实现并尝试获得与使用Tensor flow相似的结果。但是问题在于,张量流实现的最终精度为91%,而我只能达到70%。而且,似乎我的模型出现了分歧,成本开始迅速增加。
我的实现是错误的,还是由于TensorFlow实现中使用了更高级的算法?