我正在跑步this code是为了训练模型来扮演太空侵略者。但是,我的公羊已经用完了。另外,它以100%使用我的两个CPU内核,而GPU最多只能使用约40%。我结束培训后没有释放RAM,必须终止进程以释放它。
我已经看到人们在使用数据加载器,但是我不知道如何将其适应我的代码。任何帮助将不胜感激。
这是我的训练循环
tmp: 12 | token trials | strcmp -81
...
这是我的内存加载器类
saver = tf.train.Saver()
training = True
if training == True:
with tf.device("/device:GPU:0"):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
decay_step = 0
rewards_list = []
print("1 got here")
for episode in range(total_episodes):
print("2 got here")
step = 0
episode_rewards = []
state = env.reset()
state, stacked_frames = stack_frames(stacked_frames, state, True)
while step < max_steps:
step += 1
decay_step += 1
action, explore_probability = predict_action(explore_start, explore_stop, decay_rate, decay_step, state, possible_actions)
next_state, reward, done, _ = env.step(action)
if episode_render:
env.render()
episode_rewards.append(reward)
if done:
print("3 got here")
next_state = np.zeros((110,84), dtype=np.int)
next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
step = max_steps
total_reward = np.sum(episode_rewards)
print("Episode: {} Total reward: {} Training loss: {:.4f} Explore P: {:.4f}".format(episode, total_reward, loss, explore_probability))
rewards_list.append((episode, total_reward))
memory.add((state, action, reward, next_state, done))
else:
next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
memory.add((state, action, reward, next_state, done))
state = next_state
batch = memory.sample(batch_size)
states_mb = np.array([each[0] for each in batch], ndmin=3)
actions_mb = np.array([each[1] for each in batch])
rewards_mb = np.array([each[2] for each in batch])
next_states_mb = np.array([each[3] for each in batch], ndmin=3)
dones_mb = np.array([each[4] for each in batch])
target_Qs_batch = []
# print("Episode: {} Total reward: {} Training loss: {:.4f} Explore P: {:.4f}".format(episode, total_reward, loss, explore_probability))
Qs_next_state = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: next_states_mb})
for i in range(0, len(batch)):
terminal = dones_mb[i]
if terminal:
target_Qs_batch.append(rewards_mb[i])
else:
target = rewards_mb[i] + gamma * np.max(Qs_next_state[i])
target_Qs_batch.append(target)
targets_mb = np.array([each for each in target_Qs_batch])
loss, _ = sess.run([DQNetwork.loss, DQNetwork.optimizer],
feed_dict={DQNetwork.inputs_: states_mb,
DQNetwork.target_Q: targets_mb,
DQNetwork.actions_: actions_mb})
summary = sess.run(write_op, feed_dict={DQNetwork.inputs_: states_mb,
DQNetwork.target_Q: targets_mb,
DQNetwork.actions_: actions_mb})
writer.add_summary(summary, episode)
writer.flush()
print("Episode: {} Total reward: {} Training loss: {:.4f} Explore P: {:.4f}".format(episode, total_reward, loss, explore_probability))
if episode % 5 == 0:
gc.collect()
save_path = saver.save(sess, './models/model.ckpt')
print('Model Saved!')
print("training done")
sess.close()
cuda.close()
gc.collect()
及其用法
class Memory():
def __init__(self, max_size):
self.buffer = deque(maxlen=max_size)
def add(self, experience):
self.buffer.append(experience)
def sample(self, batch_size):
buffer_size = len(self.buffer)
index = np.random.choice(np.arange(buffer_size),
size = batch_size,
replace=False)
return [self.buffer[i] for i in index]
关于TF,我是一个完全菜鸟,所以我显然做错了事。