Question

我试图制作一个可以学习玩井字游戏的神经网络。它具有9个输入层，9个隐藏层和9个输出。神经网络获得最大的输出，并使用它来移动工件。我已经使用矩阵对网络进行了初始化，并使用梯度下降来训练网络，但是，即使我对权重进行了更改，经过10000次迭代，代价也没有改变。

我曾尝试在程序的不同部分进行故障排除，但费用似乎从未减少，而且报告不正确

# Creating board and setting up imports
import numpy as np 
import matplotlib.pyplot as plt
cell = [[1,1,0],
        [1,2,0],
        [1,3,0],
        [2,1,0],
        [2,2,0],
        [2,3,0],
        [3,1,0],
        [3,2,0],
        [3,3,0]]
def print_board():
    numberord = len(cell)-1
    for i in range(3):
        print (cell[numberord-2],cell[numberord-1],cell[numberord])
        numberord = numberord - 3

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_p(x):
    return sigmoid(x)*(1 -sigmoid(x))

print_board()

import numpy as np
INPUT_LAYER_SIZE = 9
HIDDEN_LAYER_SIZE = 9
OUTPUT_LAYER_SIZE = 9

data = [0,0,0,0,0,0,0,0,0]
answer = 7

def Wh():
    Wh = np.random.randn(INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE) * \
                np.sqrt(2.0/INPUT_LAYER_SIZE)
    return Wh

Wh = Wh()

def Wo():
    Wo = np.random.randn(HIDDEN_LAYER_SIZE, OUTPUT_LAYER_SIZE) * \
                np.sqrt(2.0/HIDDEN_LAYER_SIZE)
    return Wo

Wo = Wo()

def Bh():
    Bh = np.full((1, HIDDEN_LAYER_SIZE), 0.1)
    return Bh

Bh = Bh()

def Bo():
    Bo = np.full((1, OUTPUT_LAYER_SIZE), 0.1)
    return Bo

Bo = Bo()

def feed_forward(data,Wh,Wo,Bh,Bo):

    Zh = np.dot(data, Wh) + Bh
    H = sigmoid(Zh)
    Zo = np.dot(H, Wo) + Bo
    return Zo

def index():
    output = feed_forward(data,Wh,Wo,Bh,Bo)
    max_val = 0
    for i in range(9):
        if output[0][i] > max_val:
            max_val = output[0][i]
    for i in range(9):
        if output[0][i] == max_val:
            index = i+1
            break
    return index

def output_func():
    output = sigmoid(feed_forward(data,Wh,Wo,Bh,Bo))
    max_val = 0
    for i in range(9):
        if output[0][i] > max_val:
            max_val = output[0][i]
    return output

index = index()
output_var = output_func()

target = answer
prediction = index




data = [[0,0,0,0,0,0,0,0,0,5]]

c = 0

for _ in range(10000):
    data = [[0,0,0,0,0,0,0,0,0,5]]
    costs = [] 
    dz_dWh = [0]*len(data[:-1])
    for i in range(iterations):
        ri = np.random.randint(len(data))
        point = data[ri]
        allbutans = point[:-1]
        z = feed_forward(allbutans,Wh,Wo,Bh,Bo)
        x = sigmoid(z)
        max_val = 0
        for i in range(9):
            if x[0][i] > max_val:
                max_val = x[0][i]
        for i in range(9):
            if x[0][i] == max_val:
                index = i+1
                break
        pred = index
        target = point[-1]
        cost = np.square(pred - target)
        costs.append(cost)

        dcost_dpred = 2 * (pred - target)
        dpred_dz = sigmoid_p(z)


        dz_dWh = point[:-1]
        dz_dWo = np.dot(point[:-1],Wh)
        dz_dBh = 1
        dz_dBo = 1

        dcost_dz = dcost_dpred * dpred_dz


        dcost_dWh = np.dot(dcost_dz, dz_dWh)
        dcost_dWo = np.dot(dcost_dz,dz_dWo)
        dcost_dBh = np.dot(dcost_dz,dz_dBh)
        dcost_dBo = np.dot(dcost_dz,dz_dBo)

        Wh = Wh - np.dot(learning_rate,dcost_dWh)
        Wo = Wo - np.dot(learning_rate,dcost_dWo)
        Bh = Bh - np.dot(learning_rate,dcost_dBh)
        Bo = Bo - np.dot(learning_rate,dcost_dBo)

print("Hidden weights",Wh)
print("Output weights",Wo)
print("Hidden biases",Bh)
print("Output biases",Bo)
print(costs)

I expected there to be many costs but only two values appeared and the cost did not decrease

如何解决不减少神经网络成本的问题？

0 个答案: