我正在尝试对神经网络进行编程,以进行棋盘游戏(也称为井字游戏)。当我训练它时,它可以很好地对抗并减少损失函数,但只能达到一定程度,之后才达到稳定。我已经尝试过使用它了,但它仍然没有比随机机器人好多少了。
我已经尝试过调整学习速度和隐藏层的大小。我以前也曾尝试在对抗它的游戏中对其进行训练,以及仅基于胜利的方面对其进行训练(有趣的是,这比我当前的版本更好地将损失功能最小化,但只是尝试攻击而已,并不知道它必须阻止我尝试赢球)。除此之外,我还尝试过更改学习率,以使每100个自玩游戏的学习率降低5%或10%。
我在线上看过,但没有找到井字脚的任何python神经网络,我可以将其与我进行比较以进行调试。
import random
def bot_go(player_to_move, game_over, board):
played = False
iteration = 0
while played is False:
move, input_layer, hidden_layer, output_layer = neural_net_move(iteration, board)
if board[int(move[0])][int(move[1])] == "-":
played = True
board[int(move[0])][int(move[1])] = player_to_move
if check_for_win(player_to_move, board) is True:
game_over = True
elif check_for_draw(board) is True:
game_over = True
if player_to_move == "X":
player_to_move = "O"
else:
player_to_move = "X"
iteration += 1
return game_over, player_to_move, move, input_layer, hidden_layer, output_layer
def neural_net_move(iteration, board):
neural_network_input = {}
neural_network_hidden_layer = {}
neural_network_output = []
layer_1_weights, layer_2_weights, bias = get_neural_network()
# initialises the input layer
for i in range(9):
if board[i // 3][i % 3] == "X":
neural_network_input[i] = 0
elif board[i // 3][i % 3] == "O":
neural_network_input[i] = 1
else:
neural_network_input[i] = 0.5
# calculates the hidden layer neuron values
for i in range(Global_variables.hidden_layer_size):
net_total = 0
for j in range(9):
net_total += neural_network_input[j]*layer_1_weights[str(j) + str(i)]
neural_network_hidden_layer[i] = (1/(1 + 2.718**(-net_total)))
# calculates neural network output
for i in range(9):
net_total = 0
for j in range(Global_variables.hidden_layer_size):
net_total += neural_network_hidden_layer[j] * layer_2_weights[str(j) + str(i)]
net_total += bias * layer_2_weights[str(Global_variables.hidden_layer_size) + str(i)]
neural_network_output.append(1 / (1 + (2.718 ** (-net_total))))
# finds output value by ordering the outputs in terms of size
order_of_size = [0]
for i in range(1, len(neural_network_output)):
inserted = False
for j in range(len(order_of_size)):
if neural_network_output[i] > neural_network_output[order_of_size[j]] and inserted is False:
order_of_size.insert(j, i)
inserted = True
elif j == len(order_of_size) - 1 and inserted is False:
order_of_size.append(i)
move = [order_of_size[iteration] // 3, order_of_size[iteration] % 3]
return move, neural_network_input, neural_network_hidden_layer, neural_network_output
def train_neural_network(input_layer, hidden_layer, output_layer, actual_move):
layer_1_weights, layer_2_weights, bias = get_neural_network()
new_l1_weights = {}
new_l2_weights = {}
# calculates total error
total_error = 0
for i in range(len(output_layer)):
if actual_move[0] * 3 + actual_move[1] == i:
total_error += ((1 - output_layer[i])**2)/2
else:
total_error += 0.5*((output_layer[i])**2)
# adjusts second layer weights
for i in range((hidden_layer_size + 1)*9):
if actual_move[0] * 3 + actual_move[1] == i % 9:
d_error_by_d_output_node = output_layer[i % 9] - 1
else:
d_error_by_d_output_node = output_layer[i % 9]
d_output_node_by_d_node_net_value = output_layer[i % 9]*(1 - output_layer[i % 9])
if i // 9 != hidden_layer_size:
d_node_net_value_by_d_weight = hidden_layer[i // 9]
else:
d_node_net_value_by_d_weight = bias
d_error_by_d_weight = d_error_by_d_output_node*d_output_node_by_d_node_net_value*d_node_net_value_by_d_weight
new_l2_weights[str(i // 9) + str(i % 9)] = \
layer_2_weights[str(i // 9) + str(i % 9)] - learning_rate*d_error_by_d_weight
# adjusts bias
d_error_by_d_bias = 0
for i in range(9):
d_node_net_value_by_d_bias = layer_2_weights[str(hidden_layer_size) + str(i)]
d_output_node_by_d_node_net_value = output_layer[i]*(1 - output_layer[i])
if actual_move[0] * 3 + actual_move[1] == i:
d_error_by_d_output_node = output_layer[i] - 1
else:
d_error_by_d_output_node = output_layer[i]
d_error_by_d_bias += d_node_net_value_by_d_bias * d_output_node_by_d_node_net_value * d_error_by_d_output_node
bias = bias - learning_rate * d_error_by_d_bias
# adjusts first layer weights
for i in range(hidden_layer_size*9):
d_error_by_d_weight = 0
if i // 9 != hidden_layer_size:
d_output_of_node_by_d_node_net_value = \
hidden_layer[i % hidden_layer_size]*(1 - hidden_layer[i % hidden_layer_size])
else:
d_output_of_node_by_d_node_net_value = \
bias * (1 - bias)
d_node_net_value_by_d_weight = input_layer[i // Global_variables.hidden_layer_size]
for j in range(9):
d_output_node_net_value_by_d_output_of_node = layer_2_weights[str(i // 9) + str(j)]
d_output_node_by_d_output_node_net_value = output_layer[j]*(1 - output_layer[j])
if actual_move[0] * 3 + actual_move[1] == i:
d_error_by_d_output_node = output_layer[j] - 1
else:
d_error_by_d_output_node = output_layer[j]
d_error_by_d_weight += d_output_of_node_by_d_node_net_value * d_node_net_value_by_d_weight * \
d_output_node_net_value_by_d_output_of_node * d_output_node_by_d_output_node_net_value * \
d_error_by_d_output_node
new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] = \
layer_1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] - \
d_error_by_d_weight * learning_rate
network_file = open("neural network", "w")
line = ""
for i in range(9 * hidden_layer_size):
line += str(new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)]) + " "
network_file.write(line + "\n")
line = ""
for i in range(9 * (hidden_layer_size + 1)):
line += str(new_l2_weights[str(i // 9) + str(i % 9)]) + " "
network_file.write(line + "\n")
network_file.write(str(bias))
network_file.close()
return total_error
def get_neural_network():
layer_1_weights = {}
layer_2_weights = {}
# opens text file holding neural network
network_file = open("neural network", "r")
network = network_file.readlines()
network_file.close()
# gets the neural network weights from the text file
weight_list = network[0].split()
for i in range(len(weight_list)):
layer_1_weights[str(i // Global_variables.hidden_layer_size) + str(i % Global_variables.hidden_layer_size)] = float(weight_list[i])
weight_list = network[1].split()
for i in range(len(weight_list)):
layer_2_weights[str(i // 9) + str(i % 9)] = float(weight_list[i])
bias = 1
return layer_1_weights, layer_2_weights, bias
def make_up_neural_net():
network_file = open("neural network", "w")
line = ""
for i in range(9*Global_variables.hidden_layer_size):
line += str(random.random()) + " "
network_file.write(line + "\n")
line = ""
for i in range(9*(Global_variables.hidden_layer_size + 1)):
line += str(random.random()) + " "
network_file.write(line + "\n")
network_file.write(str(random.random()))
network_file.close()
def main():
error = 0
make_up_neural_net()
for i in range(100):
for j in range(100):
game_over = False
winner = ""
player_to_move = "X"
board = set_up_board()
o_moves = []
x_moves = []
while game_over is False:
if player_to_move == "X":
game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
x_moves.append([move, input_layer, hidden_layer, output_layer])
if game_over is True:
winner = "X"
else:
game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
o_moves.append([move, input_layer, hidden_layer, output_layer])
if game_over is True:
winner = "O"
if winner == "X":
for move in x_moves:
error = train_neural_network(move[1], move[2], move[3], move[0])
for move in o_moves:
error = un_train_neural_network(move[1], move[2], move[3], move[0])
else:
for move in o_moves:
error = train_neural_network(move[1], move[2], move[3], move[0])
for move in x_moves:
error = un_train_neural_network(move[1], move[2], move[3], move[0])
print(error)
main()
我希望该代码在每100次自玩游戏后打印出损失函数的结果,并且该值会随着时间的推移而减小。 但是,它趋向于稳定在至少0.45的值,而我认为它应该小几个数量级(当我训练它时,损失函数的值大约是10到负5)。在我身上)。
我认为我是有道理的,因为它在播放时也无济于事。
我想知道这是因为我的代码存在问题,还是因为神经网络不够复杂,无法对问题进行建模并且需要另一层。
注意:很抱歉,代码量很大,但是我真的找不到缩短它的方法。我已经删除了赢得/抽签检查以缩短它,以及“取消训练”功能,该功能只是训练功能,但是将学习率乘以导数而不是减去。如果有人想测试代码而不给自己编写函数带来麻烦,我可以将它们重新添加。