遗传算法和神经网络无法学习

时间:2015-12-29 04:25:11

标签: neural-network genetic-algorithm pybrain evolutionary-algorithm genetic-programming

我正试图制作一个Flappy Bird AI,代理人试图通过遗传算法和神经网络学习通过管道。

我的实现是我使用的神经网络有两个输入(距管道的水平距离和管道开口的垂直距离),一个隐藏的5个神经元层和一个输出层。

遗传算法通过不断改变每代神经网络的一个权重来演化代理。 (基于this GA implementation

然而,我注意到那个脆弱的鸟类代理人未能学习,甚至从未尝试过一次(整个时间,它始终在每一代的开始下降)直到第485代(其中“超出最大递归深度”错误发生的地方)

遗传算法+神经网络功能:

def flap(playery, playerFlapAcc):

        playerVelY = playerFlapAcc
        playerFlapped = True
        if sound:
            SOUNDS['wing'].play()

        return playerVelY, playerFlapped


def mutate(n):
    global in_to_hidden
    global hidden_to_hidden2
    global hidden_to_out

    layer_select = random.uniform(0,2)

    print 'Changed:'
    if layer_select == 0:
        selector = int(random.uniform(0,5))
        print in_to_hidden.params[selector]
        in_to_hidden.params[selector] = in_to_hidden.params[selector] + random.uniform(-5,5)
        print in_to_hidden.params[selector]
    elif layer_select == 1:
        selector = int(random.uniform(0,5))
        print hidden_to_hidden.params[selector]
        hidden_to_hidden2.params[selector] = hidden_to_hidden2.params[selector] + random.uniform(-5,5)
        print hidden_to_hidden.params[selector]
    else:
        selector = int(random.uniform(0,3))
        print hidden_to_out.params[selector]
        hidden_to_out.params[selector] = hidden_to_out.params[selector] + random.uniform(-5,5)
        print hidden_to_out.params[selector]
    return n

def predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i):
    global jumped


    if i % 10 == 0:
        pred = n.activate([rangex, error]).argmax()
        if pred == 1:
            jumped = True
            playerVelY, playerFlapped = flap(playery, playerFlapAcc)
    return playerVelY, playerFlapped

def initalize_nn():
    global in_to_hidden
    global hidden_to_hidden2
    global hidden_to_out

    # Old code (regression)        
    n = FeedForwardNetwork()
    # n = buildNetwork( 2, 3, data.outdim, outclass=SoftmaxLayer )

    inLayer = LinearLayer(2)
    hiddenLayer = SigmoidLayer(5)
    hiddenLayer2 = SigmoidLayer(5)
    outLayer = LinearLayer(1)

    n.addInputModule(inLayer)
    n.addModule(hiddenLayer)
    n.addModule(hiddenLayer2)
    n.addOutputModule(outLayer)


    in_to_hidden = FullConnection(inLayer, hiddenLayer)
    hidden_to_hidden2 = FullConnection(hiddenLayer, hiddenLayer2)
    hidden_to_out = FullConnection(hiddenLayer2, outLayer)

    n.addConnection(in_to_hidden)
    n.addConnection(hidden_to_hidden2)
    n.addConnection(hidden_to_out)

    n.sortModules()
    return n

def fitness_fun(score, x_distance, error):
    # Fitness function was designed so that the largest distance is
    # the most fit. Before going through the first pipe, total distance traveled is the fitness. 

    # Once agent passed through the first pipe and earned a point, 
    # the amount of points it gained is the main determinant of the fitness score

    if error != 0:
        fitval = abs((100*score) + (x_distance/(2*abs(error))))
    else:
        fitval = abs(100*score) + x_distance*2
    return fitval

游戏中的示例实现:

def mainGame(movementInfo):
    global fitness
    global old_fitness
    global num_nn_parameters
    global score
    global disx
    global first_time
    global n
    global old_n
    global in_to_hidden
    global hidden_to_hidden2
    global hidden_to_out
    global generation

    global jumped


    print 'generation: ', generation

    generation = generation + 1


    if first_time:
        ### Initalizing the neural network        
        n = initalize_nn()
        ds = ClassificationDataSet(2, nb_classes=2)
        z = 0
        for val in in_to_hidden.params:

            in_to_hidden.params[z] = random.uniform(-2,2)
            z = z + 1

        num_nn_parameters = z

        old_nn = n

    else:
        # create new nn (but with old_nn saved)
        n = mutate(old_n)

        disx = 0
        score = 0

    first_time = False

    # Print weights
    print_all_weights()



    ####    

    '''
    NOTES:
    playerx = player's x position (57)
    playery = player's height
    upper_gap
    lower_gap
    center_cord


    '''
    pipeHeight = IMAGES['pipe'][0].get_height() 
    upper_gap = newPipe1[0]['y'] + pipeHeight
    lower_gap = upper_gap + PIPEGAPSIZE
    center_cord = upper_gap + ((lower_gap - upper_gap)/2)




 ###########  The main loop ###########

    #playerx = 140

    while True:
        i = i + 1
        disx = disx + 1

        # Error is determined by comparing the agent's y distance from the pipe opening
        error = playery - center_cord





        for event in pygame.event.get():
            if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE):
                pygame.quit()
                sys.exit()
            if event.type == KEYDOWN and (event.key == K_SPACE or event.key == K_UP):
                if playery > -2 * IMAGES['player'][0].get_height():
                    playerVelY = playerFlapAcc
                    playerFlapped = True
                    if sound:
                        SOUNDS['wing'].play()



        # check for crash here
        crashTest = checkCrash({'x': playerx, 'y': playery, 'index': playerIndex},
                               upperPipes, lowerPipes)

        if crashTest[0]:

            fitness = fitness_fun(score, disx, error)






            print '------------------- Game Over ---------------------'
            print 'fitness: [', fitness, ']'
            print 'old fit: [', old_fitness, ']'
            print ''
            print ''
            print 'error: ', error
            #print 'score: ', score
            print 'range_x', rangex
            print 'player_x: ', disx
            print '----------------------------------------------------'
            print ''
            print ''
            print ''
            print ''
            print ''


            # If it turns out the old nn was better
            if old_fitness > fitness:

                # prevents the old but good nn from being overwritten
                n = old_n
                fitness = old_fitness
            else:
                print 'Better fitness discovered'
            # store the good nn as the old_nn


            old_n = n


            old_fitness = fitness



            return {
                'y': playery,
                'groundCrash': crashTest[1],
                'basex': basex,
                'upperPipes': upperPipes,
                'lowerPipes': lowerPipes,
                'score': score,
                'playerVelY': playerVelY,
            }



        rangex = upperPipes[0]['x'] - 92



        # Make prediction
        playerVelY, playerFlapped = predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i)

有谁知道这个的原因以及如何解决这个问题?

0 个答案:

没有答案