我正在尝试使用here中Ian Goodfellow的想法使用TensorFlow获得单个样本梯度,但是我知道这个错误,我知道这与self.per_example_gradients_for_current_weights = tf.gradients(self.cost_function, self.weight_copies)
的行有关。
class PGNetwork:
def __init__(self, state_size, action_size, learning_rate):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
self.weights = []
self.per_example_gradients = []
with tf.name_scope("inputs"):
self.inputs_= tf.placeholder(tf.float32, [None, *state_size], name="inputs_")
self.actions = tf.placeholder(tf.int32, [None, action_size], name="actions")
self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ], name="discounted_episode_rewards_")
with tf.variable_scope('PGNetwork'):
self.logits = policy(self.inputs_)
with tf.name_scope("sample_gradient"):
self.split_inputs = tf.unstack(self.inputs_, num = 1500, axis=0)
self.split_actions = tf.unstack(self.actions, num = 1500, axis = 0)
self.intermediate = [tf.expand_dims(self.split_inputs[i], 0) for i in range(1500)] #exapnd dim from [84,84,4] to [1, 84,84,4]
with tf.variable_scope('PGNetwork', reuse = True):
self.output = tf.stack([policy(self.intermediate[i]) for i in range(1500)])
self.cost_function = cross_entropy(labels = self.split_actions, logits = self.output)
for w in tf.trainable_variables():
self.weight_copies = [tf.identity(w) for x in self.split_inputs]
self.per_example_gradients_for_current_weights = tf.gradients(self.cost_function, self.weight_copies)
self.per_example_gradients.append(self.per_example_gradients_for_current_weights)
我也将功能策略定义为:
def policy(inputs_):
with tf.variable_scope("conv1"):
# Input is 84x84x4
conv1 = tf.layers.conv2d(inputs = inputs_,
filters = 32,
kernel_size = [8,8],
strides = [4,4],
padding = "VALID",
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
)
conv1_batchnorm = tf.layers.batch_normalization(conv1,
training = True,
trainable = True,
epsilon = 1e-5,
name = 'batch_norm1')
conv1_out = tf.nn.relu(conv1_batchnorm, name="conv1_out")
## --> [20, 20, 32]
with tf.variable_scope("conv2"):
conv2 = tf.layers.conv2d(inputs = conv1_out,
filters = 64,
kernel_size = [4,4],
strides = [2,2],
padding = "VALID",
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
)
conv2_batchnorm = tf.layers.batch_normalization(conv2,
training = True,
trainable = False,
epsilon = 1e-5,
name = 'batch_norm2')
conv2_out = tf.nn.relu(conv2_batchnorm, name="conv2_out")
## --> [9, 9, 64]
with tf.variable_scope("conv3"):
conv3 = tf.layers.conv2d(inputs = conv2_out,
filters = 64,
kernel_size = [3,3],
strides = [1,1],
padding = "VALID",
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
)
conv3_batchnorm = tf.layers.batch_normalization(conv3,
training = True,
trainable = False,
epsilon = 1e-5,
name = 'batch_norm3')
conv3_out = tf.nn.relu(conv3_batchnorm, name="conv3_out")
## --> [7, 7, 64]
with tf.name_scope("flatten"):
flatten = tf.contrib.layers.flatten(conv3_out)
## --> [3136]
with tf.name_scope("fc1"):
fc = tf.layers.dense(inputs = flatten,
units = 512,
activation = tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
name="fc1")
with tf.name_scope("logits"):
logits = tf.layers.dense(inputs = fc,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
units = action_size,
activation=None, name = "logits")
return logits
有人知道问题出在哪里吗?