我已经尝试过OpenAi的cartPole问题,但遇到了问题。 问题是,当我在没有任何先前模型训练的情况下运行它时,效果很好。 但是当我在主要方法中使用(model = True)尝试时。它告诉我错误。
我真的不知道该怎么做。 到处都在寻找占位符,但找不到有效的东西。
import gym
import numpy as np
import random
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import pickle
import tensorflow as tf
import operator
tf.logging.set_verbosity(tf.logging.ERROR)
env = gym.make('CartPole-v0')
goal_steps = 10000
def main() :
training_data = get_random_samples(model=False)
model = train_network(training_data)
#training_data = get_random_samples(model=True)
#model = train_network(training_data)
input("Press Enter to continue...")
demo(model)
def get_random_samples(model=False) :
if model :
model = neural_model()
model.load("model.tfl")
values = 10
data = []
scores = []
games = []
for _ in range(values) :
game_data = []
previous_observation = []
score = 0
env.reset()
for t in range(1000) :
#env.render()
# Get 20% random moves to not overfit model.
chance = random.randrange(0,6)
if (chance == 0) :
action = random.randrange(0,2)
observation, reward, done, info = env.step(action)
else :
if (len(previous_observation)==0):
action = random.randrange(0,2)
elif (model):
action = np.argmax(model.predict(previous_observation.reshape(-1,len(previous_observation),1))[0])
else :
action = random.randrange(0,2)
observation, reward, done, info = env.step(action)
if (len(previous_observation) > 0) :
game_data.append([previous_observation, action])
previous_observation = observation
if done :
score = t
break
scores.append(score)
if ( len(scores) % 100 == 1) :
print("generated {} values".format(len(scores)))
games.append(game_data)
data = get_highest_score(games,scores)
data = clean_data(data)
scores.sort()
scores = scores[int(len(scores)*9/10) : ]
print("Number of generated games: ", len(scores))
print('Average accepted score:', mean(scores))
print('Median score for accepted scores:', median(scores))
print(Counter(scores))
env.close()
np.save("random_sample.npy", data)
return data
def neural_model() :
network = input_data(shape=[None, 4, 1], name='input')
network = fully_connected(network, 128, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 128, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=1e-3, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def train_network(training_data) :
X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1)
y = [i[1] for i in training_data]
model = neural_model()
model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=500, show_metric=True, run_id='openai_learning')
# Saving model
model.save("model.tfl")
return model
def get_highest_score(data, scores) :
# Returns the top 20% of all scores generated
clean_data = []
list_of_tuples = list(zip(scores, data))
list_of_tuples.sort(key = operator.itemgetter(0))
list_of_tuples = list_of_tuples[int(len(list_of_tuples)*(9/10)) : ]
clean_data = [ i[1] for i in list_of_tuples ]
return clean_data
def clean_data(data):
# Turns our action into a [0,1] or [1,0] and concatinate the result
clean_data = []
for game in data :
for d in game :
if (d[1] == 1):
output = [0 ,1]
elif d[1] == 0:
output = [1 ,0]
clean_data.append([d[0], output])
return clean_data
def demo(model) :
scores = []
choices = []
for each_game in range(5):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(goal_steps):
env.render()
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
choices.append(action)
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([prev_obs, action])
score+=reward
if done: break
scores.append(score)
print('Average Score:',sum(scores)/len(scores))
print('choice 1: {} choice 0: {}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
main()
据我所知,预期结果将是我使用上一个模型的预测结果来生成新样本来训练新样本。 这样可以更好地解决我的问题。
错误:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]