获取TypeError:获取参数None在TensorFlow中具有无效的类型<class'nonetype'=“”>

时间:2018-08-20 18:24:47

标签: python tensorflow

我正在尝试使用here中Ian Goodfellow的想法使用TensorFlow获得单个样本梯度,但是我知道这个错误,我知道这与self.per_example_gradients_for_current_weights = tf.gradients(self.cost_function, self.weight_copies)的行有关。

class PGNetwork:
def __init__(self, state_size, action_size, learning_rate):
    self.state_size = state_size
    self.action_size = action_size
    self.learning_rate = learning_rate
    self.weights = []
    self.per_example_gradients = []


    with tf.name_scope("inputs"):

        self.inputs_= tf.placeholder(tf.float32, [None, *state_size], name="inputs_")
        self.actions = tf.placeholder(tf.int32, [None, action_size], name="actions")
        self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ], name="discounted_episode_rewards_")

    with tf.variable_scope('PGNetwork'):    
        self.logits = policy(self.inputs_)



    with tf.name_scope("sample_gradient"):

        self.split_inputs = tf.unstack(self.inputs_, num = 1500, axis=0)
        self.split_actions = tf.unstack(self.actions, num = 1500, axis = 0)
        self.intermediate = [tf.expand_dims(self.split_inputs[i], 0) for i in range(1500)] #exapnd dim from [84,84,4] to [1, 84,84,4]
        with tf.variable_scope('PGNetwork', reuse = True):
            self.output = tf.stack([policy(self.intermediate[i]) for i in range(1500)])  

            self.cost_function = cross_entropy(labels = self.split_actions, logits = self.output)
        for w in tf.trainable_variables():
            self.weight_copies = [tf.identity(w) for x in self.split_inputs]

            self.per_example_gradients_for_current_weights = tf.gradients(self.cost_function, self.weight_copies)
            self.per_example_gradients.append(self.per_example_gradients_for_current_weights)

我也将功能策略定义为:

def policy(inputs_):

with tf.variable_scope("conv1"):
    # Input is 84x84x4
    conv1 = tf.layers.conv2d(inputs = inputs_,
                                 filters = 32,
                                 kernel_size = [8,8],
                                 strides = [4,4],
                                 padding = "VALID",
                                 kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 )

    conv1_batchnorm = tf.layers.batch_normalization(conv1,
                                                        training = True,
                                                        trainable = True,
                                                        epsilon = 1e-5,
                                                        name = 'batch_norm1')
    conv1_out = tf.nn.relu(conv1_batchnorm, name="conv1_out")
    ## --> [20, 20, 32]

with tf.variable_scope("conv2"):
    conv2 = tf.layers.conv2d(inputs = conv1_out,
                                 filters = 64,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                 kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 )

    conv2_batchnorm = tf.layers.batch_normalization(conv2,
                                                        training = True,
                                                        trainable = False,
                                                        epsilon = 1e-5,
                                                        name = 'batch_norm2')

    conv2_out = tf.nn.relu(conv2_batchnorm, name="conv2_out")
    ## --> [9, 9, 64]

with tf.variable_scope("conv3"):
    conv3 = tf.layers.conv2d(inputs = conv2_out,
                                 filters = 64,
                                 kernel_size = [3,3],
                                 strides = [1,1],
                                 padding = "VALID",
                                 kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 )
    conv3_batchnorm = tf.layers.batch_normalization(conv3,
                                                        training = True,
                                                        trainable = False,
                                                        epsilon = 1e-5,
                                                        name = 'batch_norm3')
    conv3_out = tf.nn.relu(conv3_batchnorm, name="conv3_out")
        ## --> [7, 7, 64]

with tf.name_scope("flatten"):
    flatten = tf.contrib.layers.flatten(conv3_out)
            ## --> [3136]

with tf.name_scope("fc1"):
    fc = tf.layers.dense(inputs = flatten,
                             units = 512,
                             activation = tf.nn.relu,
                             kernel_initializer=tf.contrib.layers.xavier_initializer(),
                             name="fc1")

with tf.name_scope("logits"):
    logits = tf.layers.dense(inputs = fc, 
                                 kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 units = action_size, 
                                 activation=None, name = "logits")
return logits

有人知道问题出在哪里吗?

0 个答案:

没有答案