我正在尝试在Google Colab(RAM-12.73 GB)上为Atari训练DQN。在Training期间,培训停止并显示消息“您的会话在使用所有可用RAM后崩溃”。
以下是座席类
class my_agent:
def __init__(self,env):
self.env = env
self.state_input = Input((105,80,1), name='state_input')
#self.action_input = action
self.gamma = 0.6
self.epsilon = 0.1
self.epsilon_decay = 0.00001
self.memory = deque(maxlen=50000)
self.q_net = self.main_model()
self.target_net = self.main_model()
self.alighn_target_model()
def main_model(self):
x = Conv2D(16,8,(4,4), activation='relu')(self.state_input)
x = Conv2D(32,4,(2,2), activation = 'relu')(x)
x = Flatten()(x)
x = Dense(256)(x)
y = Dense(env.action_space.n)(x)
#q_val = tf.reduce_sum(tf.multiply(y, self.actions_input))
model = Model(inputs=[self.state_input], outputs= y)
optimizer = Adam(learning_rate=0.01)
huber = Huber()
model.compile(optimizer, loss=huber)
return model
def store(self, state, action, reward, next_state, terminated):
self.memory.append((state, action, reward, next_state, terminated))
def _update_epsilon(self):
self.epsilon -= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
def alighn_target_model(self):
self.target_net.set_weights(self.q_net.get_weights())
def act(self, state):
if np.random.rand() <= self.epsilon:
return self.env.action_space.sample()
state= state
q_values = self.q_net.predict(state)
a = np.argmax(q_values[0])
#print(a)
return np.array([a])
def retrain(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, terminated in minibatch:
state = state
next_state = next_state
target = self.q_net.predict(state)
if terminated:
target[0][action] = reward
else:
t = self.target_net.predict(next_state)
target[0][action] = reward + self.gamma * np.amax(t)
self.q_net.fit(state, target, epochs=1, verbose=0)
以下是训练循环:
for e in tqdm.tqdm(range(0, num_of_episodes)):
state = env.reset()
state , stacked_frames = stack_state(stacked_frames, state, True)
reward = 0
terminated = False
for timestep in range(timesteps_per_episode):
env.render()
state = state
action = agent.act(state)
#action = np.array([1])
print(action)
next_state, reward, terminated, info = env.step(action)
next_state , stacked_frames = stack_state(stacked_frames, next_state , False)
agent.store(state, action, reward, next_state, terminated)
state = next_state
if terminated:
print("Total reward is {}".format(rewards))
agent.alighn_target_model()
break
if len(agent.memory) > batch_size:
agent.retrain(batch_size)
总情节为1000,每个情节的步数为1000 训练在第一集本身中停止。
https://github.com/abhisheksuran/Atari_DQN/blob/master/Atari_DQN_image.ipynb