我正试图制作一个Flappy Bird AI,代理人试图通过遗传算法和神经网络学习通过管道。
我的实现是我使用的神经网络有两个输入(距管道的水平距离和管道开口的垂直距离),一个隐藏的5个神经元层和一个输出层。
遗传算法通过不断改变每代神经网络的一个权重来演化代理。 (基于this GA implementation)
然而,我注意到那个脆弱的鸟类代理人未能学习,甚至从未尝试过一次(整个时间,它始终在每一代的开始下降)直到第485代(其中“超出最大递归深度”错误发生的地方)
遗传算法+神经网络功能:
def flap(playery, playerFlapAcc):
playerVelY = playerFlapAcc
playerFlapped = True
if sound:
SOUNDS['wing'].play()
return playerVelY, playerFlapped
def mutate(n):
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
layer_select = random.uniform(0,2)
print 'Changed:'
if layer_select == 0:
selector = int(random.uniform(0,5))
print in_to_hidden.params[selector]
in_to_hidden.params[selector] = in_to_hidden.params[selector] + random.uniform(-5,5)
print in_to_hidden.params[selector]
elif layer_select == 1:
selector = int(random.uniform(0,5))
print hidden_to_hidden.params[selector]
hidden_to_hidden2.params[selector] = hidden_to_hidden2.params[selector] + random.uniform(-5,5)
print hidden_to_hidden.params[selector]
else:
selector = int(random.uniform(0,3))
print hidden_to_out.params[selector]
hidden_to_out.params[selector] = hidden_to_out.params[selector] + random.uniform(-5,5)
print hidden_to_out.params[selector]
return n
def predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i):
global jumped
if i % 10 == 0:
pred = n.activate([rangex, error]).argmax()
if pred == 1:
jumped = True
playerVelY, playerFlapped = flap(playery, playerFlapAcc)
return playerVelY, playerFlapped
def initalize_nn():
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
# Old code (regression)
n = FeedForwardNetwork()
# n = buildNetwork( 2, 3, data.outdim, outclass=SoftmaxLayer )
inLayer = LinearLayer(2)
hiddenLayer = SigmoidLayer(5)
hiddenLayer2 = SigmoidLayer(5)
outLayer = LinearLayer(1)
n.addInputModule(inLayer)
n.addModule(hiddenLayer)
n.addModule(hiddenLayer2)
n.addOutputModule(outLayer)
in_to_hidden = FullConnection(inLayer, hiddenLayer)
hidden_to_hidden2 = FullConnection(hiddenLayer, hiddenLayer2)
hidden_to_out = FullConnection(hiddenLayer2, outLayer)
n.addConnection(in_to_hidden)
n.addConnection(hidden_to_hidden2)
n.addConnection(hidden_to_out)
n.sortModules()
return n
def fitness_fun(score, x_distance, error):
# Fitness function was designed so that the largest distance is
# the most fit. Before going through the first pipe, total distance traveled is the fitness.
# Once agent passed through the first pipe and earned a point,
# the amount of points it gained is the main determinant of the fitness score
if error != 0:
fitval = abs((100*score) + (x_distance/(2*abs(error))))
else:
fitval = abs(100*score) + x_distance*2
return fitval
游戏中的示例实现:
def mainGame(movementInfo):
global fitness
global old_fitness
global num_nn_parameters
global score
global disx
global first_time
global n
global old_n
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
global generation
global jumped
print 'generation: ', generation
generation = generation + 1
if first_time:
### Initalizing the neural network
n = initalize_nn()
ds = ClassificationDataSet(2, nb_classes=2)
z = 0
for val in in_to_hidden.params:
in_to_hidden.params[z] = random.uniform(-2,2)
z = z + 1
num_nn_parameters = z
old_nn = n
else:
# create new nn (but with old_nn saved)
n = mutate(old_n)
disx = 0
score = 0
first_time = False
# Print weights
print_all_weights()
####
'''
NOTES:
playerx = player's x position (57)
playery = player's height
upper_gap
lower_gap
center_cord
'''
pipeHeight = IMAGES['pipe'][0].get_height()
upper_gap = newPipe1[0]['y'] + pipeHeight
lower_gap = upper_gap + PIPEGAPSIZE
center_cord = upper_gap + ((lower_gap - upper_gap)/2)
########### The main loop ###########
#playerx = 140
while True:
i = i + 1
disx = disx + 1
# Error is determined by comparing the agent's y distance from the pipe opening
error = playery - center_cord
for event in pygame.event.get():
if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE):
pygame.quit()
sys.exit()
if event.type == KEYDOWN and (event.key == K_SPACE or event.key == K_UP):
if playery > -2 * IMAGES['player'][0].get_height():
playerVelY = playerFlapAcc
playerFlapped = True
if sound:
SOUNDS['wing'].play()
# check for crash here
crashTest = checkCrash({'x': playerx, 'y': playery, 'index': playerIndex},
upperPipes, lowerPipes)
if crashTest[0]:
fitness = fitness_fun(score, disx, error)
print '------------------- Game Over ---------------------'
print 'fitness: [', fitness, ']'
print 'old fit: [', old_fitness, ']'
print ''
print ''
print 'error: ', error
#print 'score: ', score
print 'range_x', rangex
print 'player_x: ', disx
print '----------------------------------------------------'
print ''
print ''
print ''
print ''
print ''
# If it turns out the old nn was better
if old_fitness > fitness:
# prevents the old but good nn from being overwritten
n = old_n
fitness = old_fitness
else:
print 'Better fitness discovered'
# store the good nn as the old_nn
old_n = n
old_fitness = fitness
return {
'y': playery,
'groundCrash': crashTest[1],
'basex': basex,
'upperPipes': upperPipes,
'lowerPipes': lowerPipes,
'score': score,
'playerVelY': playerVelY,
}
rangex = upperPipes[0]['x'] - 92
# Make prediction
playerVelY, playerFlapped = predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i)
有谁知道这个的原因以及如何解决这个问题?