您必须使用dtype float来输入占位符张量'input_1 / X'的值

时间:2019-08-18 14:09:01

标签: python tensorflow tflearn

我已经尝试过OpenAi的cartPole问题,但遇到了问题。 问题是,当我在没有任何先前模型训练的情况下运行它时,效果很好。 但是当我在主要方法中使用(model = True)尝试时。它告诉我错误。

我真的不知道该怎么做。 到处都在寻找占位符,但找不到有效的东西。

import gym
import numpy as np
import random
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import pickle
import tensorflow as tf
import operator

tf.logging.set_verbosity(tf.logging.ERROR)

env = gym.make('CartPole-v0')
goal_steps = 10000



def main() : 
    training_data = get_random_samples(model=False)
    model = train_network(training_data)

    #training_data = get_random_samples(model=True)
    #model = train_network(training_data)

    input("Press Enter to continue...")
    demo(model)



def get_random_samples(model=False) :
    if model : 
        model = neural_model()
        model.load("model.tfl")
    values  = 10
    data    = []
    scores  = []
    games = []
    for _ in range(values) : 
        game_data = []
        previous_observation = []
        score = 0
        env.reset()
        for t in range(1000) :
            #env.render()

            # Get 20% random moves to not overfit model.
            chance = random.randrange(0,6)
            if (chance == 0) : 
                action = random.randrange(0,2)
                observation, reward, done, info = env.step(action)

            else : 
                if (len(previous_observation)==0):
                    action = random.randrange(0,2)
                elif (model):
                    action = np.argmax(model.predict(previous_observation.reshape(-1,len(previous_observation),1))[0])
                else : 
                    action = random.randrange(0,2)

                observation, reward, done, info = env.step(action)

                if (len(previous_observation) > 0) :
                    game_data.append([previous_observation, action])

                previous_observation = observation

            if done :
                score = t
                break

        scores.append(score)
        if ( len(scores) % 100 == 1) : 
            print("generated {} values".format(len(scores)))
        games.append(game_data)

    data = get_highest_score(games,scores)
    data = clean_data(data)

    scores.sort()
    scores = scores[int(len(scores)*9/10) : ]

    print("Number of generated games: ", len(scores))
    print('Average accepted score:', mean(scores))
    print('Median score for accepted scores:', median(scores))
    print(Counter(scores))

    env.close()
    np.save("random_sample.npy", data)

    return data


def neural_model() :
    network = input_data(shape=[None, 4, 1], name='input')

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 512, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 2, activation='softmax')
    network = regression(network, optimizer='adam', learning_rate=1e-3, loss='categorical_crossentropy', name='targets')
    model = tflearn.DNN(network, tensorboard_dir='log')

    return model


def train_network(training_data) :
    X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1)
    y = [i[1] for i in training_data]


    model = neural_model()

    model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=500, show_metric=True, run_id='openai_learning')

    # Saving model
    model.save("model.tfl")

    return model


def get_highest_score(data, scores) :
    # Returns the top 20% of all scores generated
    clean_data = []
    list_of_tuples = list(zip(scores, data))
    list_of_tuples.sort(key = operator.itemgetter(0))
    list_of_tuples = list_of_tuples[int(len(list_of_tuples)*(9/10)) : ]
    clean_data = [ i[1] for i in list_of_tuples ]
    return clean_data


def clean_data(data): 
    # Turns our action into a [0,1] or [1,0] and concatinate the result
    clean_data = []
    for game in data : 
        for d in game :
            if (d[1] == 1):
                output = [0 ,1]

            elif d[1] == 0:
                output = [1 ,0]


            clean_data.append([d[0], output])
    return clean_data


def demo(model) :
    scores = []
    choices = []
    for each_game in range(5):
        score = 0
        game_memory = []
        prev_obs = []
        env.reset()
        for _ in range(goal_steps):
            env.render()

            if len(prev_obs)==0:
                action = random.randrange(0,2)
            else:
                action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])

            choices.append(action)

            new_observation, reward, done, info = env.step(action)
            prev_obs = new_observation
            game_memory.append([prev_obs, action])
            score+=reward
            if done: break

        scores.append(score)

    print('Average Score:',sum(scores)/len(scores))
    print('choice 1: {}  choice 0: {}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))


main()

据我所知,预期结果将是我使用上一个模型的预测结果来生成新样本来训练新样本。 这样可以更好地解决我的问题。

错误:

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
     [[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

0 个答案:

没有答案