我正在尝试建立一个隐式分位数网络。我建立了一个自定义损失函数,但没有起作用。我收到“没有可用的渐变”错误,但我相信我只使用应该提供渐变的函数,例如tf.tile和东西。我没有在我的loss_kv_iq()函数中明确地投射某些内容。
下面,我提供了我的自定义层(IQNlayer),我使用的网络(IQN)和我的自定义丢失功能的代码。主程序中的一小段代码也应该能够重现错误。
TF版本:2.1.0
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
class IQN(keras.Model):
def __init__(self, quantile_dims, fc_dims, n_actions, n_quantiles):
super(IQN, self).__init__()
self.n_quantiles = n_quantiles
initializer = keras.initializers.he_uniform()
self.iq = IQNlayer(quantile_dims, n_quantiles)
self.dense = keras.layers.Dense(fc_dims, activation='relu', kernel_initializer = initializer)
self.out = keras.layers.Dense(n_actions, activation = None)
def call(self, state, tau):
batch_size, state_size = state.shape
x = self.iq(state, tau)
x = self.dense(x)
x = self.out(x)
x = tf.transpose(tf.split(x, batch_size, axis=0), perm=[0, 2, 1])
return x
class IQNlayer(keras.layers.Layer):
def __init__(self, quantile_dims, n_quantiles):
super(IQNlayer, self).__init__()
self.quantile_dims = quantile_dims
self.n_quantiles = n_quantiles
self.fc1 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.selu)
self.fc2 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.relu)
def call(self, state, tau):
batch_size, state_size = state.shape
state_tile = tf.tile(state, [1, self.n_quantiles])
state_reshape = tf.reshape(state_tile, [-1, state_size])
state_net = self.fc1(state_reshape)
tau = tf.reshape(tau, [-1, 1])
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
cos_tau = tf.cos(tf.matmul(tau, pi_mtx))
phi = self.fc2(cos_tau)
net = tf.multiply(state_net, phi)
return net
def loss_kv_iq(x, tau, action_hot, theta_target):
expand_dim_action = tf.expand_dims(action_hot, -1)
main_support = tf.reduce_sum(x * expand_dim_action, axis=1)
theta_loss_tile = tf.tile(tf.expand_dims(main_support, axis=2), [1, 1, N_QUANTILES])
logit_valid_tile = tf.tile(tf.expand_dims(theta_target, axis=1), [1, N_QUANTILES, 1])
Huber_loss = hloss(logit_valid_tile, theta_loss_tile)
inv_tau = 1 - tau
tau = tf.tile(tf.expand_dims(tau, axis=1), [1, N_QUANTILES, 1])
inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, N_QUANTILES, 1])
error_loss = logit_valid_tile - theta_loss_tile
Loss = tf.where(tf.less(error_loss, 0.0), inv_tau * Huber_loss, tau * Huber_loss)
loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))
return loss
if __name__ == '__main__':
hloss = tf.keras.losses.Huber(reduction = tf.keras.losses.Reduction.NONE)
N_QUANTILES = 10
BATCH_SIZE = 2
ACTION_SIZE = 5
STATE_SIZE = 16
# FOR EXAMPLE: RANDOM BATCH
cs = np.random.rand(BATCH_SIZE,STATE_SIZE)
a = np.random.randint(0,5,size=(2))
r = np.random.randint(0,500,size=(2))
ns = np.random.rand(BATCH_SIZE,STATE_SIZE)
tau = np.random.uniform(size=(BATCH_SIZE, N_QUANTILES))
tau = tau.astype('float32')
iq = IQN(128,128,ACTION_SIZE,N_QUANTILES)
action_hot = np.zeros((BATCH_SIZE,ACTION_SIZE), dtype = np.float32)
action_hot[np.arange(BATCH_SIZE), a] = 1
Q = iq(ns, tau)
theta_target = np.random.rand(BATCH_SIZE,N_QUANTILES)
theta_target = theta_target.astype('float32')
optimizer = tf.keras.optimizers.Adam(lr = 1e-3)
with tf.GradientTape() as tape:
loss = loss_kv_iq(Q, tau, action_hot, theta_target)
grads = tape.gradient(loss, iq.trainable_weights)
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
错误:
Traceback (most recent call last):
File "C:\Users\rensj\.spyder-py3\Thesis\test.py", line 106, in <module>
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 426, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 1039, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['iqn_4/iq_nlayer_4/dense_16/kernel:0', 'iqn_4/iq_nlayer_4/dense_16/bias:0', 'iqn_4/iq_nlayer_4/dense_17/kernel:0', 'iqn_4/iq_nlayer_4/dense_17/bias:0', 'iqn_4/dense_18/kernel:0', 'iqn_4/dense_18/bias:0', 'iqn_4/dense_19/kernel:0', 'iqn_4/dense_19/bias:0'].
编辑: 正如Agrawal先生所指出的,我在pi_mtx中使用了numpy操作。我将这些更改为它们的tensorflow对应项,并在同一行中进行了一些其他小的更改,变为:
pi_mtx = tf.constant(tf.expand_dims(tf.constant(np.pi) * tf.range(0, 64, dtype=tf.float32), axis=0), dtype=tf.float32)
但是,我仍然遇到相同的ValueError:未提供渐变
答案 0 :(得分:1)
在线
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
您正在使用numpy函数。将它们更改为张量流副本。
np.expand_dims-> tf.expand_dims
np.arange-> tf.keras.backend.arange或tf.range
您可以使用np.pi
,因为这是一个常量,而不是操作。