我试图使用openAI健身房玩Space Invaders。下面的代码用于创建基于比赛的训练数据,并在分数高于分数要求时从中学习。我没有从生成数据中得到任何错误,并且已经确认它具有不同的y /目标值。训练模型也没有错误。但是,在testModel函数中,游戏继续播放相同的动作。实际上,即使我为NN提供了一个充满随机整数的正确大小的数组,它仍会继续预测相同的值。
我非常感谢有关如何继续的任何建议/想法。感谢。
import gym
import random
import numpy as np
import tflearn
from tqdm import tqdm
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import mean, median
from collections import Counter
LR = 1e-3
env = gym.make('SpaceInvaders-v0')
env.reset()
goal_steps = 25000
score_requirement = 300
initial_games = 2000
def some_random_games_first():
# for episode in range(5):
env.reset()
score = 0
actions = []
for t in range(goal_steps):
# env.render()
action = env.action_space.sample()
if action not in actions:
actions.append(action)
observation, reward, done, info = env.step(action)
score+=reward
if done:
print("step:", t)
break
print("observation:", observation)
observation = observation.flatten()
print(observation)
def initial_population():
training_data = []
scores = []
accepted_scores = []
for _ in tqdm(range(initial_games)):
score = 0
game_memory = []
prev_observation = []
for _ in range(goal_steps):
action = random.randrange(0,6)
#env.render()
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
prev_observation = prev_observation.flatten()
game_memory.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
break
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
if data[1] == 0:
output = [1,0,0,0,0,0]
elif data[1] == 1:
output = [0,1,0,0,0,0]
elif data[1] == 2:
output = [0,0,1,0,0,0]
elif data[1] == 3:
output = [0,0,0,1,0,0]
elif data[1] == 4:
output = [0,0,0,0,1,0]
elif data[1] == 5:
output = [0,0,0,0,0,1]
training_data.append([data[0], output])
env.reset()
scores.append(score)
training_data_save = np.array(training_data)
np.save('saved.npy', training_data_save)
print('Average accepted score:', mean(accepted_scores))
print('Median accepted score:', median(accepted_scores))
print(Counter(accepted_scores))
return training_data
def neural_network_model(input_size):
input_layer = input_data(shape = [None, input_size, 1], name='input')
fc1 = fully_connected(input_layer, 128, activation='relu')
fc1 = dropout(fc1, 0.8)
fc2 = fully_connected(fc1, 128, activation='relu')
fc2 = dropout(fc2, 0.8)
# fc3 = fully_connected(fc2, 512, activation='relu')
# fc3 = dropout(fc3, 0.8)
#
# fc4 = fully_connected(fc3, 256, activation='relu')
# fc4 = dropout(fc4, 0.8)
#
# fc5 = fully_connected(fc4, 128, activation='relu')
# fc5 = dropout(fc5, 0.8)
fc6 = fully_connected(fc2, 6, activation='softmax')
network = regression(fc6, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
y = [i[1] for i in training_data]
if not model:
model = neural_network_model(input_size = len(X[0]))
model.fit({'input':X}, {'targets':y}, n_epoch=1, snapshot_step=500, show_metric=True, run_id='openaistuff')
return model
def generateData():
training_data = initial_population()
print("training data:",training_data[0])
def countActions():
training_data = np.load('saved.npy')
X = np.array([np.argmax(i[1]) for i in training_data])
print(Counter(X))
def testModel(model):
scores = []
choices = []
for each_game in range(1):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(goal_steps):
if len(prev_obs) == 0:
action = random.randrange(0,6)
else:
prev_obs = prev_obs.flatten()
action = np.argmax(model.predict(prev_obs.reshape(-1, len(prev_obs), 1)))
choices.append(action)
# env.render()
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([new_observation.flatten(), action])
score += reward
if done:
break
print('Score of game {} was {}'.format(each_game,score))
scores.append(score)
print('Average Score', sum(scores)/len(scores))
training_data = np.load('saved.npy')
model = neural_network_model(input_size = len(training_data[0][0]))
model.load('fresh.model')
# print(len(training_data))
# training_data = initial_population()
# model = train_model(training_data)
testModel(model)
# model.save('fresh.model')
答案 0 :(得分:0)
你接受过多长时间的训练? 根据我使用OpenAI的经验,神经网络通常需要很长时间才能学习任何东西。这通常意味着演员只会采取一个动作。