我正在尝试将我的tensorflow代码转换为热切的tensorflow。问题在于前向传递在急切模式下仅针对不同的输入值预测相同的动作。带有图表的普通张量流代码可以正常工作。我只更改了网络。该代理与我在正常张量流中使用的代理相同。网络可能是什么问题?前向传递在函数get_probs()
中
另一个问题是热切的网络非常慢。我认为图形执行速度快了2-3倍。
...
[0.31471518 0.33622807 0.34905672]
[0.31472355 0.3363353 0.34894115]
[0.31482834 0.33600125 0.34917045]
[0.31461707 0.33643782 0.34894508]
[0.31466153 0.33620775 0.34913075]
[0.31461093 0.33637658 0.3490125 ]
[0.31452385 0.33623937 0.34923682]
[0.31438416 0.33645296 0.3491629 ]
[0.31471425 0.3363982 0.34888753]
[0.314866 0.33610862 0.34902537]
[0.31489033 0.33622313 0.34888652]
...
...
[0.25704077 0.46056205 0.28239718]
[0.20610097 0.49288744 0.30101162]
[0.24638997 0.5338215 0.2197885 ]
[0.22581507 0.51206875 0.2621162 ]
[0.19064051 0.5398092 0.26955026]
[0.24399564 0.4424694 0.313535 ]
[0.25321653 0.48051655 0.26626688]
[0.2241595 0.43447506 0.3413655 ]
[0.20665398 0.5128011 0.28054494]
[0.2943201 0.39530927 0.3103706 ]
...
import tensorflow as tf
from keras.layers import *
import numpy as np
tf.enable_eager_execution()
print(tf.executing_eagerly())
class PGEagerAtariNetwork:
def __init__(self, state_space, action_space, lr):
self.state_space = state_space
self.action_space = action_space
self.model = tf.keras.Sequential()
self.model.add(InputLayer(input_shape=(84, 84, 4)))
# Conv
self.model.add(Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu', name='conv1'))
self.model.add(Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
self.model.add(Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
# Flatten
self.model.add(Flatten(name='flatten'))
# Fully connected
self.model.add(Dense(units=512, activation='relu', name='fc1'))
# Logits
self.model.add(Dense(units=self.action_space, activation=None, name='logits'))
self.model.summary()
# Optimizer
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def get_probs(self, s):
s = s[np.newaxis, :]
logits = self.model(s)
probs = tf.nn.softmax(logits).numpy().squeeze()
return probs
def update_policy(self, s, r, a):
with tf.GradientTape() as tape:
loss = self.calc_loss(s, r, a)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables),
global_step=tf.train.get_or_create_global_step())
def calc_loss(self, s, r, a):
logits = self.model(s)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
policy_loss = tf.reduce_mean(policy_loss * tf.stop_gradient(r))
loss = tf.reduce_mean(policy_loss)
return loss