我试图制作一个可以学习玩井字游戏的神经网络。它具有9个输入层,9个隐藏层和9个输出。神经网络获得最大的输出,并使用它来移动工件。我已经使用矩阵对网络进行了初始化,并使用梯度下降来训练网络,但是,即使我对权重进行了更改,经过10000次迭代,代价也没有改变。
我曾尝试在程序的不同部分进行故障排除,但费用似乎从未减少,而且报告不正确
# Creating board and setting up imports
import numpy as np
import matplotlib.pyplot as plt
cell = [[1,1,0],
[1,2,0],
[1,3,0],
[2,1,0],
[2,2,0],
[2,3,0],
[3,1,0],
[3,2,0],
[3,3,0]]
def print_board():
numberord = len(cell)-1
for i in range(3):
print (cell[numberord-2],cell[numberord-1],cell[numberord])
numberord = numberord - 3
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_p(x):
return sigmoid(x)*(1 -sigmoid(x))
print_board()
import numpy as np
INPUT_LAYER_SIZE = 9
HIDDEN_LAYER_SIZE = 9
OUTPUT_LAYER_SIZE = 9
data = [0,0,0,0,0,0,0,0,0]
answer = 7
def Wh():
Wh = np.random.randn(INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE) * \
np.sqrt(2.0/INPUT_LAYER_SIZE)
return Wh
Wh = Wh()
def Wo():
Wo = np.random.randn(HIDDEN_LAYER_SIZE, OUTPUT_LAYER_SIZE) * \
np.sqrt(2.0/HIDDEN_LAYER_SIZE)
return Wo
Wo = Wo()
def Bh():
Bh = np.full((1, HIDDEN_LAYER_SIZE), 0.1)
return Bh
Bh = Bh()
def Bo():
Bo = np.full((1, OUTPUT_LAYER_SIZE), 0.1)
return Bo
Bo = Bo()
def feed_forward(data,Wh,Wo,Bh,Bo):
Zh = np.dot(data, Wh) + Bh
H = sigmoid(Zh)
Zo = np.dot(H, Wo) + Bo
return Zo
def index():
output = feed_forward(data,Wh,Wo,Bh,Bo)
max_val = 0
for i in range(9):
if output[0][i] > max_val:
max_val = output[0][i]
for i in range(9):
if output[0][i] == max_val:
index = i+1
break
return index
def output_func():
output = sigmoid(feed_forward(data,Wh,Wo,Bh,Bo))
max_val = 0
for i in range(9):
if output[0][i] > max_val:
max_val = output[0][i]
return output
index = index()
output_var = output_func()
target = answer
prediction = index
data = [[0,0,0,0,0,0,0,0,0,5]]
c = 0
for _ in range(10000):
data = [[0,0,0,0,0,0,0,0,0,5]]
costs = []
dz_dWh = [0]*len(data[:-1])
for i in range(iterations):
ri = np.random.randint(len(data))
point = data[ri]
allbutans = point[:-1]
z = feed_forward(allbutans,Wh,Wo,Bh,Bo)
x = sigmoid(z)
max_val = 0
for i in range(9):
if x[0][i] > max_val:
max_val = x[0][i]
for i in range(9):
if x[0][i] == max_val:
index = i+1
break
pred = index
target = point[-1]
cost = np.square(pred - target)
costs.append(cost)
dcost_dpred = 2 * (pred - target)
dpred_dz = sigmoid_p(z)
dz_dWh = point[:-1]
dz_dWo = np.dot(point[:-1],Wh)
dz_dBh = 1
dz_dBo = 1
dcost_dz = dcost_dpred * dpred_dz
dcost_dWh = np.dot(dcost_dz, dz_dWh)
dcost_dWo = np.dot(dcost_dz,dz_dWo)
dcost_dBh = np.dot(dcost_dz,dz_dBh)
dcost_dBo = np.dot(dcost_dz,dz_dBo)
Wh = Wh - np.dot(learning_rate,dcost_dWh)
Wo = Wo - np.dot(learning_rate,dcost_dWo)
Bh = Bh - np.dot(learning_rate,dcost_dBh)
Bo = Bo - np.dot(learning_rate,dcost_dBo)
print("Hidden weights",Wh)
print("Output weights",Wo)
print("Hidden biases",Bh)
print("Output biases",Bo)
print(costs)
I expected there to be many costs but only two values appeared and the cost did not decrease