我讨厌问这样的问题,因为我对问题的根源一无所知,但是我已经尝试解决了2个星期,而我和我的技术老师都无法解决。
我试图建立一个张量流模型来控制蛇游戏,同时运行评估函数或生成种群函数,我注意到大约400步之后,我显示蛇的屏幕开始没有响应20秒,然后杀死即使那条蛇表现良好,也移到下一条。
具体来说,我的问题是使用张量流模型预测运动是否会导致其跳到下一个情况,以及原因。如果没有,那么我的python渲染器是否有问题导致该问题,或者只是一些古怪的编码。
我再次为如此具体的问题表示歉意,但我确实很傻。我已确保我的计算机分配了足够的内存,并且已在线进行检查,原因是tensorflow可能会滞后。我还使用了线路分析器,并且一切都花了正常的时间,所以任何帮助或建议都将不胜感激。
from Snake import Game as game
import pygame
from pygame.locals import *
env = game()
env.reset()
action = -1
import random
goal_steps = 300
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import numpy as np
LR = 1e-3
goal_steps = 100
score_requirement = 999
initial_games = 5000
def some_random_games_first():
#following info each for an individual game
for episode in range(10):
env = game()
env.reset()
first = True
for _ in range(goal_steps):
action = random.randrange(0,3)
if first:
first = False
action = 2
# do it! render the previous view
env.render()
#done = env.step(action)
observation, reward, done, info = env.step(action)
#observation, done = env.step(action)
#print (observation)
a = 0
if done: break
def generate_population(model):
# [OBS, MOVES]
global score_requirement
global goal_steps
training_data = []
#array for scores
scores = []
#scores meeting cutoff threshold
accepted_scores = []
#iterate through x number of games
#print('Score Requirement:', score_requirement)
for _ in range(initial_games):
print('Simulation ', _, " out of", str(initial_games), '\r', end = ' ')
# reset playing environment
env.reset()
score = 0
#moves specific to this iteration of game
game_memory = []
#previous observations
prev_observation = []
#for each frame in 200
iter = goal_steps
req = 1000
i = 0
while i < iter:
#choose action 0 or 1
if len(prev_observation) == 0:
action = random.randrange(0, 3)
else:
if not model:
action = random.randrange(0,3)
else:
prediction = model.predict(prev_observation.reshape(-1, len(prev_observation), 1))
action = np.argmax(prediction[0])
#run game
observation, reward, done, info = env.step(action)
#Observation is preserved in game data in order to have the steps accessible as prev_obserations
#is returned. Each observation is paired with action
if len(prev_observation) > 0:
game_memory.append([prev_observation, action])
#updates observations
prev_observation = observation
#fitness rating
score += reward
if score > req:
iter += 70 #70 chosen as it only takes sixty steps for snake to navigate full screen, 10 more for errors
req += 1000 #Must get more food to get more steps
if done: break
i+=1
#save moves which are past the score threshold
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
#output layer of neural network
action_sample = [0, 0, 0]
action_sample[data[1]] = 1
output = action_sample
#save training data
training_data.append([data[0], output])
#save overall scores
scores.append(score)
#stat print for neural network
print('Average accepted score:', mean(accepted_scores))
print('Score Requirement: aaa', score_requirement)
print('Median score for accepted scores:', median(accepted_scores))
print(Counter(accepted_scores))
#updates score requirement
score_requirement = mean(accepted_scores)# + 1000
print("Score bbb", score_requirement)
#saves data in a separate file
training_data_save = np.array([training_data, score_requirement])
np.save('saved.npy', training_data_save)
return training_data
def create_dummy_model(training_data):
shape_second_parameter = len(training_data[0][0])
x = np.array([i[0] for i in training_data])
X = x.reshape(-1, shape_second_parameter, 1)
y = [i[1] for i in training_data]
model = create_neural_network_model(input_size=len(X[0]), output_size = len(y[0]))
return model
def create_neural_network_model(input_size, output_size):
#input layer
network = input_data(shape=[None, input_size, 1], name = 'input')
network = tflearn.fully_connected(network, 32)
network = tflearn.fully_connected(network, 32)
network = fully_connected(network, output_size, activation='softmax')
#output layer
network = regression(network, name = 'targets')
model = tflearn.DNN(network, tensorboard_dir='tflearn_logs')
return model
def train_model(training_data, model=False):
#def train_model(training_data, model):
shape_second_parameter = len(training_data[0][0])
x = np.array([i[0] for i in training_data])
X = x.reshape(-1, shape_second_parameter, 1)
y = [i[1] for i in training_data]
model.fit({'input': X}, {'targets': y}, n_epoch=10, batch_size=16, show_metric=True)
model.save('minisnake_trained.tflearn')
return model
def evaluate(model, generation):
#evaluate actual training model, shows played games
global goal_steps
scores = []
choices = []
for each_game in range(20):
print('Evaluation ', each_game, " out of 20 ", '\r', end = ' ')
score = 0
#game_memory = []
prev_obs = []
env.reset()
iter = goal_steps
req = 1000
i = 0
while i < iter:
#env.render() #to see the evaluation
if len(prev_obs) == 0:
action = random.randrange(0, 3)
else:
prediction = model.predict(prev_obs.reshape(-1, len(prev_obs), 1))
action = np.argmax(prediction[0])
choices.append(action)
#print ('b')
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
#game_memory.append([new_observation, action])
score += reward
#Increments steps snake is allowed to go based on food it gets
if score > req:
iter += 70
req += 1000
if done: break
i+= 1
scores.append(score)
print('')
print('Average Score: ', sum(scores) / len(scores))
print('choice 1:{} choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices)))
print('Score Requirement: ccc', score_requirement)
if __name__ == "__main__":
some_random_games_first()
#initial population
training_data = generate_population(None)
#create a dummy model
model = create_dummy_model(training_data)
#training with first data set
model = train_model(training_data, model)
#evaluating
generation = 1
evaluate(model, generation)
#recursion
generation = 1
while True:
generation += 1 #moves to next generation
print('Generation: ', generation)
#adds more training data by generating new population
training_data = np.append(training_data, generate_population(model), axis=0))
print('generation: ', generation, ' initial population: ', len(training_data))
if len(training_data) == 0:
break
model = train_model(training_data, model)
evaluate(model, generation)