Question

我讨厌问这样的问题，因为我对问题的根源一无所知，但是我已经尝试解决了2个星期，而我和我的技术老师都无法解决。

我试图建立一个张量流模型来控制蛇游戏，同时运行评估函数或生成种群函数，我注意到大约400步之后，我显示蛇的屏幕开始没有响应20秒，然后杀死即使那条蛇表现良好，也移到下一条。

具体来说，我的问题是使用张量流模型预测运动是否会导致其跳到下一个情况，以及原因。如果没有，那么我的python渲染器是否有问题导致该问题，或者只是一些古怪的编码。

我再次为如此具体的问题表示歉意，但我确实很傻。我已确保我的计算机分配了足够的内存，并且已在线进行检查，原因是tensorflow可能会滞后。我还使用了线路分析器，并且一切都花了正常的时间，所以任何帮助或建议都将不胜感激。

from Snake import Game as game
import pygame
from pygame.locals import *
env = game()
env.reset()
action = -1
import random

goal_steps = 300
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import numpy as np


LR = 1e-3
goal_steps = 100
score_requirement = 999
initial_games = 5000


def some_random_games_first():
    #following info each for an individual game

    for episode in range(10):
        env = game()
        env.reset()
        first = True
        for _ in range(goal_steps):
            action = random.randrange(0,3)
            if first:
                first = False
                action = 2

                # do it! render the previous view
            env.render()
            #done = env.step(action)
            observation, reward, done, info = env.step(action)
            #observation, done = env.step(action)
            #print (observation)
            a = 0
            if done: break


def generate_population(model):
    # [OBS, MOVES]
    global score_requirement
    global goal_steps

    training_data = []
    #array for scores
    scores  = []
    #scores meeting cutoff threshold
    accepted_scores = []
    #iterate through x number of games
    #print('Score Requirement:', score_requirement)
    for _ in range(initial_games):
        print('Simulation ', _, " out of", str(initial_games), '\r', end = ' ')
        # reset playing environment
        env.reset()

        score = 0
        #moves specific to this iteration of game
        game_memory = []
        #previous observations
        prev_observation = []
        #for each frame in 200
        iter = goal_steps
        req = 1000
        i = 0
        while i < iter:
            #choose action 0 or 1
            if len(prev_observation) == 0:
                action = random.randrange(0, 3)
            else:
                if not model:
                    action = random.randrange(0,3)
                else:
                    prediction = model.predict(prev_observation.reshape(-1, len(prev_observation), 1))
                    action = np.argmax(prediction[0])

            #run game 
            observation, reward, done, info = env.step(action)
            #Observation is preserved in game data in order to have the steps accessible as prev_obserations
            #is returned. Each observation is paired with action
            if len(prev_observation) > 0:
                game_memory.append([prev_observation, action])
            #updates observations
            prev_observation = observation
            #fitness rating
            score += reward
            if score > req:
                iter += 70 #70 chosen as it only takes sixty steps for snake to navigate full screen, 10 more for errors
                req += 1000 #Must get more food to get more steps
            if done: break
            i+=1


        #save moves which are past the score threshold
        if score >= score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                #output layer of neural network
                action_sample = [0, 0, 0]
                action_sample[data[1]] = 1
                output = action_sample
                #save training data
                training_data.append([data[0], output])

        #save overall scores
        scores.append(score)

    #stat print for neural network
    print('Average accepted score:', mean(accepted_scores))
    print('Score Requirement: aaa', score_requirement)
    print('Median score for accepted scores:', median(accepted_scores))
    print(Counter(accepted_scores))
    #updates score requirement
    score_requirement = mean(accepted_scores)# + 1000
    print("Score bbb", score_requirement)
    #saves data in a separate file
    training_data_save = np.array([training_data, score_requirement])
    np.save('saved.npy', training_data_save)

    return training_data


def create_dummy_model(training_data):
    shape_second_parameter = len(training_data[0][0])
    x = np.array([i[0] for i in training_data])
    X = x.reshape(-1, shape_second_parameter, 1)
    y = [i[1] for i in training_data]
    model = create_neural_network_model(input_size=len(X[0]), output_size = len(y[0]))
    return model


def create_neural_network_model(input_size, output_size):
#input layer
    network = input_data(shape=[None, input_size, 1], name = 'input')
    network = tflearn.fully_connected(network, 32)
    network = tflearn.fully_connected(network, 32)
    network = fully_connected(network, output_size, activation='softmax')
    #output layer
    network = regression(network, name = 'targets')
    model = tflearn.DNN(network, tensorboard_dir='tflearn_logs')
    return model

def train_model(training_data, model=False):
#def train_model(training_data, model):
    shape_second_parameter = len(training_data[0][0])
    x = np.array([i[0] for i in training_data])
    X = x.reshape(-1, shape_second_parameter, 1)
    y = [i[1] for i in training_data]

    model.fit({'input': X}, {'targets': y}, n_epoch=10, batch_size=16, show_metric=True)
    model.save('minisnake_trained.tflearn')

    return model



def evaluate(model, generation):
    #evaluate actual training model, shows played games
    global goal_steps
    scores = []
    choices = []
    for each_game in range(20):
        print('Evaluation ', each_game, " out of 20 ", '\r', end = ' ')
        score = 0
        #game_memory = []
        prev_obs = []
        env.reset()
        iter = goal_steps
        req = 1000
        i = 0
        while i < iter:
            #env.render() #to see the evaluation
            if len(prev_obs) == 0:
                action = random.randrange(0, 3)
            else:
                prediction = model.predict(prev_obs.reshape(-1, len(prev_obs), 1))
                action = np.argmax(prediction[0])

            choices.append(action)
            #print ('b')
            new_observation, reward, done, info = env.step(action)
            prev_obs = new_observation
            #game_memory.append([new_observation, action])
            score += reward
            #Increments steps snake is allowed to go based on food it gets
            if score > req:
                iter += 70
                req += 1000
            if done: break
            i+= 1

        scores.append(score)
    print('')
    print('Average Score: ', sum(scores) / len(scores))
    print('choice 1:{}  choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices)))
    print('Score Requirement: ccc', score_requirement)




if __name__ == "__main__":
    some_random_games_first()
    #initial population
    training_data = generate_population(None)
    #create a dummy model
    model = create_dummy_model(training_data)
    #training with first data set
    model = train_model(training_data, model)
    #evaluating
    generation = 1
    evaluate(model, generation)


    #recursion
    generation = 1
    while True:
        generation +=  1 #moves to next generation

        print('Generation: ', generation)
        #adds more training data by generating new population
        training_data = np.append(training_data, generate_population(model), axis=0))
        print('generation: ', generation, ' initial population: ', len(training_data))
        if len(training_data) == 0:
            break
        model = train_model(training_data, model)
        evaluate(model, generation)

Tensorflow评估使冻结

0 个答案: