无论输入是什么,TFLearn DNN都有相同的预测

时间:2017-11-18 22:55:21

标签: python machine-learning tensorflow neural-network tflearn

我试图使用openAI健身房玩Space Invaders。下面的代码用于创建基于比赛的训练数据,并在分数高于分数要求时从中学习。我没有从生成数据中得到任何错误,并且已经确认它具有不同的y /目标值。训练模型也没有错误。但是,在testModel函数中,游戏继续播放相同的动作。实际上,即使我为NN提供了一个充满随机整数的正确大小的数组,它仍会继续预测相同的值。

我非常感谢有关如何继续的任何建议/想法。感谢。

import gym
import random
import numpy as np
import tflearn
from tqdm import tqdm
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import mean, median
from collections import Counter

LR  = 1e-3
env = gym.make('SpaceInvaders-v0')
env.reset()
goal_steps = 25000
score_requirement = 300
initial_games = 2000

def some_random_games_first():
    # for episode in range(5):
    env.reset()
    score = 0
    actions = []
    for t in range(goal_steps):
        # env.render()
        action = env.action_space.sample()
        if action not in actions:
            actions.append(action)

        observation, reward, done, info = env.step(action)
        score+=reward
        if done:
            print("step:", t)
            break
    print("observation:", observation)
    observation = observation.flatten()
    print(observation)

def initial_population():
    training_data = []
    scores = []
    accepted_scores = []
    for _ in tqdm(range(initial_games)):
        score = 0
        game_memory = []
        prev_observation = []
        for _ in range(goal_steps):
            action = random.randrange(0,6)
            #env.render()
            observation, reward, done, info = env.step(action)

            if len(prev_observation) > 0:
                prev_observation = prev_observation.flatten()
                game_memory.append([prev_observation, action])

            prev_observation = observation
            score += reward
            if done:
                break

        if score >= score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                if data[1] == 0:
                    output = [1,0,0,0,0,0]
                elif data[1] == 1:
                    output = [0,1,0,0,0,0]
                elif data[1] == 2:
                    output = [0,0,1,0,0,0]
                elif data[1] == 3:
                    output = [0,0,0,1,0,0]
                elif data[1] == 4:
                    output = [0,0,0,0,1,0]
                elif data[1] == 5:
                    output = [0,0,0,0,0,1]

                training_data.append([data[0], output])

        env.reset()
        scores.append(score)

    training_data_save = np.array(training_data)
    np.save('saved.npy', training_data_save)

    print('Average accepted score:', mean(accepted_scores))
    print('Median accepted score:', median(accepted_scores))
    print(Counter(accepted_scores))

    return training_data

def neural_network_model(input_size):
    input_layer = input_data(shape = [None, input_size, 1], name='input')

    fc1 = fully_connected(input_layer, 128, activation='relu')
    fc1 = dropout(fc1, 0.8)

    fc2 = fully_connected(fc1, 128, activation='relu')
    fc2 = dropout(fc2, 0.8)

    # fc3 = fully_connected(fc2, 512, activation='relu')
    # fc3 = dropout(fc3, 0.8)
    #
    # fc4 = fully_connected(fc3, 256, activation='relu')
    # fc4 = dropout(fc4, 0.8)
    #
    # fc5 = fully_connected(fc4, 128, activation='relu')
    # fc5 = dropout(fc5, 0.8)

    fc6 = fully_connected(fc2, 6, activation='softmax')
    network = regression(fc6, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

    model = tflearn.DNN(network, tensorboard_dir='log')

    return model

def train_model(training_data, model=False):
    X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
    y = [i[1] for i in training_data]

    if not model:
        model = neural_network_model(input_size = len(X[0]))

    model.fit({'input':X}, {'targets':y}, n_epoch=1, snapshot_step=500, show_metric=True, run_id='openaistuff')

    return model

def generateData():
    training_data = initial_population()
    print("training data:",training_data[0])

def countActions():
    training_data = np.load('saved.npy')
    X = np.array([np.argmax(i[1]) for i in training_data])
    print(Counter(X))

def testModel(model):
    scores = []
    choices = []

    for each_game in range(1):
        score = 0
        game_memory = []
        prev_obs = []
        env.reset()
        for _ in range(goal_steps):
            if len(prev_obs) == 0:
                action = random.randrange(0,6)
            else:
                prev_obs = prev_obs.flatten()
                action = np.argmax(model.predict(prev_obs.reshape(-1, len(prev_obs), 1)))

            choices.append(action)

            # env.render()
            new_observation, reward, done, info = env.step(action)
            prev_obs = new_observation
            game_memory.append([new_observation.flatten(), action])
            score += reward
            if done:
                break

        print('Score of game {} was {}'.format(each_game,score))

        scores.append(score)

    print('Average Score', sum(scores)/len(scores))

training_data = np.load('saved.npy')
model = neural_network_model(input_size = len(training_data[0][0]))
model.load('fresh.model')

# print(len(training_data))
# training_data = initial_population()
# model = train_model(training_data)
testModel(model)
# model.save('fresh.model')

1 个答案:

答案 0 :(得分:0)

你接受过多长时间的训练? 根据我使用OpenAI的经验,神经网络通常需要很长时间才能学习任何东西。这通常意味着演员只会采取一个动作。