我正在尝试将Pat-Coady的代码(https://github.com/pat-coady/trpo)从模型子类转换为功能模型或顺序模型,因为模型子类化还带来了许多其他我想做的事情颈部。但是我认为我的代码中有些东西阻止了梯度的计算。这是错误:
from flask_restful import Api
api = Api(app)
api.add_resource(MyResource,
'/<version>/my_end_point',
endpoint='my_end_point')
from flask_restful import Resource, reqparse
class MyResource(Resource):
def __init__(self, **kwargs):
assert db_session.is_active
def get(self, version):
parser = reqparse.RequestParser()
if version == 'v0':
#do something
else if version == 'v1':
#do another thing
else:
#alert the user the version is incorrect
这是我一直在尝试使用的Pat-Coady代码的修改版本:
Traceback (most recent call last):
File "train.py", line 346, in <module>
main(**vars(args))
File "train.py", line 313, in main
policy.update(observes, actions, advantages, logger) # update policy
File "/home/ryan/trpo_fractal8NN/trpo/policy.py", line 87, in update
old_means, old_logvars, old_logp])
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 973, in train_on_batch
class_weight=class_weight, reset_metrics=reset_metrics)
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py", line 264, in train_on_batch
output_loss_metrics=model._output_loss_metrics)
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 311, in train_on_batch
output_loss_metrics=output_loss_metrics))
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 272, in _process_single_batch
model.optimizer.apply_gradients(zip(grads, trainable_weights))
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 427, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "/home/ryan/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 1025, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['dense_4/kernel:0', 'dense_4/bias:0', 'dense_5/kernel:0', 'dense_5/bias:0', 'dense_6/kernel:0', 'dense_6/bias:0', 'dense_7/kernel:0', 'dense_7/bias:0', 'Variable:0'].
有人可以告诉我如何使渐变起作用吗?
更新
我在某处读到,使用tf.trainable_variables可以进行健全性检查。所以我做到了,这就是我所得到的,这似乎很好。
"""
NN Policy with KL Divergence Constraint
Written by Patrick Coady (pat-coady.github.io)
"""
import tensorflow.keras.backend as K
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Layer, Input
from tensorflow.keras.optimizers import Adam
import numpy as np
class Policy(object):
def __init__(self, obs_dim, act_dim, kl_targ, hid1_mult, init_logvar):
"""
Args:
obs_dim: num observation dimensions (int)
act_dim: num action dimensions (int)
kl_targ: target KL divergence between pi_old and pi_new
hid1_mult: size of first hidden layer, multiplier of obs_dim
init_logvar: natural log of initial policy variance
"""
self.beta = 1.0 # dynamically adjusted D_KL loss multiplier
eta = 50 # multiplier for D_KL-kl_targ hinge-squared loss
self.kl_targ = kl_targ
self.epochs = 20
self.lr_multiplier = 1.0 # dynamically adjust lr when D_KL out of control
self.trpo = TRPO(obs_dim, act_dim, hid1_mult, kl_targ, init_logvar, eta)
self.policy = self.trpo.get_policy()
self.pol_model = self.policy.get_pol_model()
self.lr = self.policy.get_lr() # lr calculated based on size of PolicyNN
self.logprob_calc = LogProb()
self.run_trpo_once = 'run'
def sample(self, obs):
"""Draw sample from policy."""
act_means, act_logvars = self.policy.call_PolNN(obs)
act_stddevs = np.exp(act_logvars / 2)
return np.random.normal(act_means, act_stddevs).astype(np.float32)
def update(self, observes, actions, advantages, logger):
""" Update policy based on observations, actions and advantages
Args:
observes: observations, shape = (N, obs_dim)
actions: actions, shape = (N, act_dim)
advantages: advantages, shape = (N,)
logger: Logger object, see utils.py
"""
if self.run_trpo_once == 'run':
# run to initialize loss in pol_model first
old_means = np.ones(len(observes))
old_means = old_means[:,np.newaxis]
old_logvars = [[-1.]]
old_logp = np.ones(len(observes))
old_logp = old_logp[:,np.newaxis]
kl, entropy = self.trpo.call_TRPO([observes, actions, advantages,
old_means, old_logvars, old_logp])
# then compile
self.pol_model.compile(optimizer=Adam(self.lr * self.lr_multiplier))
self.run_trpo_once == 'ran'
K.set_value(self.pol_model.optimizer.lr, self.lr * self.lr_multiplier)
# K.set_value(self.pol_model.beta, self.beta)
old_means, old_logvars = self.policy.call_PolNN(observes)
old_means = old_means.numpy()
old_logvars = old_logvars.numpy()
old_logp = self.logprob_calc.call_LogP([actions, old_means, old_logvars])
old_logp = old_logp.numpy()
loss, kl, entropy = 0, 0, 0
for e in range(self.epochs):
# observes - states
# actions - the 8x1 vector of values
# advantages - how much better a particular policy surface is from the old one
# old_means, old_logvars - parameters that specify the old policy surface
# old_logp - the old log probs for the actions
# train_on_batch - Single gradient update or model evaluation over one batch of samples
print('trainable')
print(self.pol_model.trainable_variables)
input('')
loss = self.pol_model.train_on_batch([observes, actions, advantages,
old_means, old_logvars, old_logp])
# predict_on_batch - Returns predictions for a single batch of samples - output of the model
kl, entropy = self.pol_model.predict_on_batch([observes, actions, advantages,
old_means, old_logvars, old_logp])
kl, entropy = np.mean(kl), np.mean(entropy)
if kl > self.kl_targ * 4: # early stopping if D_KL diverges badly
break
# TODO: too many "magic numbers" in next 8 lines of code, need to clean up
if kl > self.kl_targ * 2: # servo beta to reach D_KL target
self.beta = np.minimum(35, 1.5 * self.beta) # max clip beta
if self.beta > 30 and self.lr_multiplier > 0.1:
self.lr_multiplier /= 1.5 # if kl is too large, reduce the learning rate so that the new weights move at a slower rate
elif kl < self.kl_targ / 2:
self.beta = np.maximum(1 / 35, self.beta / 1.5) # min clip beta
if self.beta < (1 / 30) and self.lr_multiplier < 10:
self.lr_multiplier *= 1.5 # if kl is too small, increase the learning rate so that the new weights move at a faster rate
logger.log({'PolicyLoss': loss,
'PolicyEntropy': entropy,
'KL': kl,
'Beta': self.beta,
'_lr_multiplier': self.lr_multiplier})
class PolicyNN():
""" Neural net for policy approximation function.
Policy parameterized by Gaussian means and variances. NN outputs mean
action based on observation. Trainable variables hold log-variances
for each action dimension (i.e. variances not determined by NN).
"""
def __init__(self, obs_dim, act_dim, hid1_mult, init_logvar, **kwargs):
super(PolicyNN, self).__init__(**kwargs)
self.obs_dim = obs_dim
self.act_dim = act_dim
self.hid1_mult = hid1_mult
self.batch_sz = 1
self.init_logvar = init_logvar
self.pol_model = self.build_model()
def build(self, input_shape):
self.batch_sz = input_shape[0]
def build_model(self):
obs = Input(shape=(self.obs_dim,), dtype='float32')
hid1_units = self.obs_dim * self.hid1_mult
hid3_units = self.act_dim * 10 # 10 empirically determined
hid2_units = int(np.sqrt(hid1_units * hid3_units))
self.lr = 9e-4 / np.sqrt(hid2_units) # 9e-4 empirically determined
# heuristic to set learning rate based on NN size (tuned on 'Hopper-v1')
self.dense1 = Dense(hid1_units, activation='tanh', input_shape=(self.obs_dim,))
self.dense2 = Dense(hid2_units, activation='tanh', input_shape=(hid1_units,))
self.dense3 = Dense(hid3_units, activation='tanh', input_shape=(hid2_units,))
self.dense4 = Dense(self.act_dim, input_shape=(hid3_units,))
y = self.dense1(obs)
y = self.dense2(y)
y = self.dense3(y)
means = self.dense4(y)
self.model = Model(inputs=obs, outputs=means)
# logvar_speed increases learning rate for log-variances.
# heuristic sets logvar_speed based on network size.
logvar_speed = (10 * hid3_units) // 48
self.logvars = self.model.add_weight(shape=(logvar_speed, self.act_dim),
trainable=True, initializer='zeros')
print('Policy Params -- h1: {}, h2: {}, h3: {}, lr: {:.3g}, logvar_speed: {}'
.format(hid1_units, hid2_units, hid3_units, self.lr, logvar_speed))
return self.model
def call_PolNN(self, inputs, **kwargs):
# y = self.dense1(inputs)
# y = self.dense2(y)
# y = self.dense3(y)
# means = self.dense4(y)
means = self.model(inputs)
logvars = K.sum(self.logvars, axis=0, keepdims=True) + self.init_logvar
logvars = K.tile(logvars, (self.batch_sz, 1))
return [means, logvars]
def get_pol_model(self):
return self.pol_model
def get_lr(self):
return self.lr
class KLEntropy():
"""
Layer calculates:
1. KL divergence between old and new distributions
2. Entropy of present policy
https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Kullback.E2.80.93Leibler_divergence
https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Entropy
"""
def __init__(self, **kwargs):
super(KLEntropy, self).__init__(**kwargs)
self.act_dim = None
def build(self, input_shape):
self.act_dim = input_shape[0][1]
def call_KLE(self, inputs, **kwargs):
old_means, old_logvars, new_means, new_logvars = inputs
log_det_cov_old = K.sum(old_logvars, axis=-1, keepdims=True)
log_det_cov_new = K.sum(new_logvars, axis=-1, keepdims=True)
trace_old_new = K.sum(K.exp(old_logvars - new_logvars), axis=-1, keepdims=True)
kl = 0.5 * (log_det_cov_new - log_det_cov_old + trace_old_new +
K.sum(K.square(new_means - old_means) /
K.exp(new_logvars), axis=-1, keepdims=True) -
np.float32(self.act_dim))
entropy = 0.5 * (np.float32(self.act_dim) * (np.log(2 * np.pi) + 1.0) +
K.sum(new_logvars, axis=-1, keepdims=True))
return [kl, entropy]
class LogProb():
"""Layer calculates log probabilities of a batch of actions."""
def __init__(self, **kwargs):
super(LogProb, self).__init__(**kwargs)
def call_LogP(self, inputs, **kwargs):
# actions - the actual values for actions
# act_means - the current guess for the mean of the Gaussian dist for actions at a specific state
# act_logvars - the current guess for the variance of the Gaussian dist for actions at a specific state
actions, act_means, act_logvars = inputs
logp = -0.5 * K.sum(act_logvars, axis=-1, keepdims=True)
logp += -0.5 * K.sum(K.square(actions - act_means) / K.exp(act_logvars),
axis=-1, keepdims=True)
return logp
class TRPO():
def __init__(self, obs_dim, act_dim, hid1_mult, kl_targ, init_logvar, eta, **kwargs):
super(TRPO, self).__init__(**kwargs)
self.kl_targ = kl_targ
self.eta = eta
self.policy = PolicyNN(obs_dim, act_dim, hid1_mult, init_logvar)
self.logprob = LogProb()
self.kl_entropy = KLEntropy()
self.pol_model = self.policy.get_pol_model()
# self.beta = self.pol_model.add_weight('beta', initializer='zeros', trainable=False)
self.beta = 1.0
def call_TRPO(self, inputs):
# obs - states
# act - action (vector of length 8)
# adv - advantage of the new policy surface compared to the old one
# old_means, old_logvars - the old policy surface (states X actions X probability of that state/action pair)
# old_logp - log probabilities from the old policy surface
obs, act, adv, old_means, old_logvars, old_logp = inputs
new_means, new_logvars = self.policy.call_PolNN(obs) # the new policy surface
new_logp = self.logprob.call_LogP([act, new_means, new_logvars]) # the log probabilities of the new actions
kl, entropy = self.kl_entropy.call_KLE([old_means, old_logvars, # kl is the distance from the old policy surface to the new policy surface
new_means, new_logvars])
loss1 = -K.mean(adv * K.exp(new_logp - old_logp)) # rewards for if there are advantages for the new policy surface
loss2 = K.mean(self.beta * kl) # we are putting higher cost on a higher change in the policy surface
# TODO - Take mean before or after hinge loss?
loss3 = self.eta * K.square(K.maximum(0.0, K.mean(kl) - 2.0 * self.kl_targ)) # more loss on the kl distance
self.pol_model.add_loss(lambda: loss1 + loss2 + loss3)
return [kl, entropy]
def get_policy(self):
return self.policy
更新2
我不确定这是否相关,但是当我按原样运行Patrick的代码而不进行修改时,会收到以下警告:
[<tf.Variable 'dense_4/kernel:0' shape=(5, 50) dtype=float32, numpy=
array([[-0.09056644, -0.1542418 , 0.14267331, -0.24423055, 0.1723414 ,
0.3041622 , -0.0463421 , 0.21160346, -0.21636385, 0.1460262 ,
-0.22153336, -0.01990247, -0.0657078 , -0.06834307, 0.09444442,
0.31193972, 0.25180137, 0.18253523, -0.07325894, -0.01935831,
0.23026413, 0.15255731, 0.31393945, 0.22253716, -0.30187184,
0.2828905 , 0.2921043 , -0.0833555 , -0.06862688, 0.09303367,
-0.12205629, 0.2264409 , 0.11403796, 0.01715118, -0.21517408,
-0.00991654, -0.3283836 , 0.1230278 , -0.16390005, -0.24062645,
-0.22984275, -0.06327075, 0.00160274, -0.0950896 , -0.3208313 ,
-0.0777106 , 0.01054186, 0.06994534, -0.1572856 , -0.25565267],
[-0.29657018, 0.10311651, 0.25462747, 0.28720784, 0.20768481,
0.08098608, -0.04480952, -0.2515493 , 0.10938856, 0.13960958,
-0.11424798, -0.07550961, -0.23944163, 0.17502749, -0.20357312,
-0.10799168, 0.23189247, 0.01713836, -0.21623212, 0.09898609,
-0.01473641, -0.28967652, -0.07317525, -0.07820797, 0.00804615,
-0.30267325, 0.23593342, 0.3260886 , 0.2622831 , -0.11407788,
-0.15899323, -0.1558573 , -0.17527379, -0.12064067, -0.04928762,
-0.30735955, -0.04887021, -0.03110918, 0.16053236, 0.3232907 ,
-0.15790062, -0.2053532 , 0.3083428 , -0.15291262, -0.13507862,
-0.31072426, -0.20016117, 0.3182075 , 0.05987966, 0.0068413 ],
[ 0.31588495, -0.15379554, -0.30932415, 0.11509198, 0.09408471,
0.11176044, 0.108096 , 0.10760236, 0.19085187, -0.11106887,
0.2734455 , 0.12039426, -0.19658205, -0.187198 , -0.02033138,
-0.3271312 , -0.30559656, 0.10069346, -0.06773978, 0.00541088,
-0.3004477 , 0.10801697, -0.22464041, 0.2935514 , -0.19635628,
0.08925194, 0.01256189, 0.27345437, 0.09166899, -0.2508183 ,
0.09553751, 0.11365145, 0.27909482, 0.04935196, -0.23593573,
-0.24586502, -0.0267795 , -0.22389066, -0.23130517, -0.12579764,
0.10728317, -0.22764713, -0.2434549 , 0.24598336, 0.0659402 ,
-0.22934839, -0.02001011, -0.01827681, 0.04900295, -0.22410737],
[ 0.07093802, 0.18189949, 0.1480552 , 0.28981942, -0.07200962,
-0.12835346, 0.25633413, 0.2548911 , -0.09774235, 0.12453139,
0.17621398, -0.21271431, 0.18934709, 0.21672541, -0.01774147,
0.29658848, 0.08517903, -0.28381458, -0.29605904, 0.24040258,
0.18663913, -0.27694625, -0.02210006, -0.1107527 , -0.04736763,
0.02987686, 0.19644791, -0.10151976, 0.26521844, -0.20637487,
0.27239293, 0.24622858, 0.26905107, -0.2069313 , -0.29157495,
-0.12751064, -0.16774015, 0.26694185, -0.28063375, -0.27921575,
-0.11992846, 0.25850093, -0.26377344, 0.08367363, 0.11008573,
-0.05474818, 0.20202738, -0.07400334, -0.11110282, -0.26100045],
[-0.30549955, 0.1639213 , -0.05721235, -0.29220885, -0.00241897,
-0.25379792, -0.11406669, -0.15763249, 0.3283986 , -0.14822206,
0.26523232, -0.13218679, -0.09447199, -0.17987877, -0.2925096 ,
-0.10008036, 0.32219553, -0.27833134, -0.2046496 , 0.26184374,
-0.30065283, 0.2289908 , 0.24528086, -0.19125906, -0.16941053,
-0.04826045, -0.21731395, 0.17271656, -0.06635788, 0.14376953,
0.05973405, 0.01962817, -0.21876532, -0.03228575, -0.11053056,
-0.20984864, 0.29313022, 0.19884443, -0.08914514, -0.17515655,
0.30815238, 0.30967182, -0.0381991 , 0.23873138, 0.15161055,
-0.1815734 , 0.00992835, -0.29296932, -0.16643286, 0.31162924]],
dtype=float32)>, <tf.Variable 'dense_4/bias:0' shape=(50,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
dtype=float32)>, <tf.Variable 'dense_5/kernel:0' shape=(50, 22) dtype=float32, numpy=
array([[-0.07641463, 0.10372171, 0.10552439, ..., 0.11076915,
0.14088157, 0.16406503],
[ 0.2394414 , 0.20726404, -0.21690719, ..., 0.27921987,
0.22056776, 0.13663283],
[-0.11289462, -0.01605877, -0.16357526, ..., -0.14471851,
0.10905829, -0.1720635 ],
...,
[-0.10842827, -0.01651275, 0.27903205, ..., -0.12529065,
0.15989399, -0.21786682],
[-0.2625265 , 0.21676713, -0.01570749, ..., -0.24360116,
-0.21392182, 0.09337282],
[ 0.19884145, 0.03818652, 0.07829756, ..., -0.06611854,
0.08981985, -0.12613025]], dtype=float32)>, <tf.Variable 'dense_5/bias:0' shape=(22,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'dense_6/kernel:0' shape=(22, 10) dtype=float32, numpy=
array([[-0.40856412, -0.17267984, -0.06019008, 0.24091133, 0.11042729,
0.1819137 , 0.41578355, -0.0113236 , 0.07515469, -0.3979596 ],
[ 0.33533886, -0.0071061 , 0.02106577, -0.1920557 , -0.05181074,
-0.02865377, 0.36775115, -0.14140442, -0.3922563 , -0.19282979],
[-0.40323848, -0.25250965, -0.12841642, -0.10407037, 0.09953728,
-0.3219217 , 0.03381827, -0.32069772, -0.42095384, 0.31233302],
[ 0.04473892, 0.1319389 , -0.01422426, -0.15412244, 0.31174704,
0.42587462, -0.23316012, 0.02517083, -0.4313327 , -0.33657917],
[ 0.33222046, 0.03015807, 0.34203622, 0.33677903, -0.23909096,
0.2565439 , 0.3234037 , 0.26996323, 0.08275196, -0.30814457],
[ 0.02566928, 0.13890317, -0.2744131 , 0.4059628 , -0.22979487,
0.3754643 , -0.0935671 , -0.2432846 , 0.19283941, 0.3187817 ],
[-0.0143365 , 0.3315598 , -0.16881037, -0.27712643, 0.00921267,
-0.24051926, -0.37810996, -0.3584606 , 0.35849884, 0.23191777],
[-0.11688998, 0.42203775, 0.27100554, 0.2886248 , 0.24194714,
0.2009575 , -0.29112542, -0.15900955, 0.4291233 , 0.07567766],
[ 0.21306023, -0.25940856, 0.08226374, -0.43016136, -0.30990994,
0.0405336 , -0.23634988, 0.41889063, -0.2543061 , 0.27451584],
[ 0.16138205, -0.16484591, 0.4219564 , -0.11415985, 0.24130508,
0.2740794 , 0.12109253, -0.08202952, 0.25594994, 0.08034489],
[ 0.17085841, 0.2799497 , 0.09622118, 0.25506493, -0.35615116,
0.3786846 , 0.19689342, -0.01329184, 0.41111007, 0.1168994 ],
[-0.16537306, -0.19038779, 0.10380569, 0.2214146 , 0.40884617,
0.38833138, 0.03334215, 0.05614519, -0.40251178, -0.34908825],
[ 0.25611022, 0.3102651 , 0.20395175, 0.3488123 , 0.4308013 ,
-0.2004465 , 0.41541925, 0.01705289, -0.2788509 , 0.02355093],
[ 0.09964207, 0.37552074, 0.31556478, 0.19723824, -0.21952018,
0.0967997 , 0.37155876, 0.02843317, -0.41630563, -0.13705778],
[ 0.25888482, 0.08312878, 0.12222609, -0.3417793 , -0.08671033,
0.29865667, 0.3769764 , 0.4186385 , -0.21739729, 0.10427055],
[ 0.28892097, -0.20018087, 0.2822549 , 0.07999769, 0.07810709,
-0.3895913 , 0.01223671, -0.21305199, 0.08630905, -0.00646561],
[ 0.03103518, 0.27201387, 0.4280201 , -0.37634748, 0.32758763,
-0.42886654, -0.32346493, 0.12441936, 0.08367553, 0.34698758],
[-0.13605791, 0.22087207, 0.05319434, -0.26699317, 0.3439472 ,
-0.02392432, -0.05433875, -0.18744622, -0.09437671, -0.15985653],
[-0.30821744, -0.42376068, -0.10014907, 0.28974316, 0.31066623,
-0.41475388, -0.15639979, -0.09500295, -0.3692003 , 0.00055945],
[-0.02606261, -0.02383772, 0.11596909, -0.0998278 , -0.39468765,
-0.02615437, 0.18446007, -0.34470618, 0.26801637, -0.40834618],
[ 0.32470265, 0.41725615, 0.40010652, 0.33240584, -0.34855223,
0.18146905, 0.34161422, 0.32351837, -0.42381912, 0.38510588],
[-0.195674 , -0.28567111, -0.29372156, -0.29233867, 0.28171977,
-0.0916695 , 0.4139649 , -0.3569014 , -0.06345901, 0.33335575]],
dtype=float32)>, <tf.Variable 'dense_6/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'dense_7/kernel:0' shape=(10, 1) dtype=float32, numpy=
array([[ 0.27339226],
[ 0.48728067],
[-0.14849728],
[-0.23854476],
[ 0.32655948],
[-0.16893917],
[-0.2876924 ],
[-0.7253326 ],
[ 0.17003566],
[-0.41358858]], dtype=float32)>, <tf.Variable 'dense_7/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=
array([[0.],
[0.]], dtype=float32)>]
更新3
我不确定,但是我想知道是否在add_loss中使用lambda术语作为“ add_loss(lambda:loss1 + loss2 + loss3)”来避免此错误可能导致我的问题。
WARNING:tensorflow:Output output_1 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to output_1.
WARNING:tensorflow:Output output_2 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to output_2.
更新4
好吧,我发现了一个问题。我的self.act_dim返回的是nan,因此,loss2返回的是nans。我放入了正确的act_dim(对于CartPole为1),但仍然给出相同的错误。