我尝试使用自定义损失功能运行模块 这是我的损失函数:
def softargmax(x, beta=1e10):
x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
return tf.cast(tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1), tf.int32)
def convert2(lang, tensor):
result = ''
for t in tensor:
if t == 1 or t==2:continue
if t!=0:
result += lang.index_word[t]
return result
def getScalar(inp, tar):
inpS, tarS, losses = [], [], []
for i in inp.numpy():
inpS.append(convert2(lang, i))
for i in tar.numpy():
tarS.append(convert2(lang, i))
for i, t in zip(inpS, tarS):
losses.append(1-Levenshtein.ratio(i, t))
return tf.convert_to_tensor(sum(losses)/len(losses), dtype=tf.float32)
def loss(inp, tar):
return tf.py_function(getScalar, [inp, tar], tf.float32)
然后我用我的模块进行训练
@tf.function def train_step(inp, tar):
tar_inp = tar[:, :-1]
tar_real = tar[:, 1:]
enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
with tf.GradientTape() as g_tape, tf.GradientTape() as d_tape:
predictions, _ = gen(inp, tar_inp,
True,
enc_padding_mask,
combined_mask,
dec_padding_mask)
predicted_id = softargmax(predictions)
pred_score = dis(predicted_id)
real_score = dis(tar_real)
#loss_g = distLoss(tar_real, predicted_id)
loss_g = loss(tar_real, predicted_id)
loss_d = discriminator_loss(real_score, pred_score)
g_gradients = g_tape.gradient(loss_g, gen.trainable_variables)
d_gradients = d_tape.gradient(loss_d, dis.trainable_variables)
g_optimizer.apply_gradients(zip(g_gradients, gen.trainable_variables))
d_optimizer.apply_gradients(zip(d_gradients, dis.trainable_variables))
return loss_g, loss_d
但是训练时间出错
ValueError:没有为任何变量提供渐变: ValueError:用户代码中:
<ipython-input-161-be38d41d405a>:30 train_step *
g_optimizer.apply_gradients(zip(g_gradients, gen.trainable_variables))
C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:513 apply_gradients **
grads_and_vars = _filter_grads(grads_and_vars)
C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:1271 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['transformer_18/encoder_18/embedding_46/embeddings:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_886/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_886/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_887/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_887/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_888/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_888/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_889/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_889/bias:0', 'dense_890/kernel:0', 'dense_890/bias:0', 'dense_891/kernel:0', 'dense_891/bias:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_265/gamma:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_265/beta:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_266/gamma:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_266/beta:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_892/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_892/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_893/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_893/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_894/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_894/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_895/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_895/bias:0', 'dense_896/kernel:0', 'dense_896/bias:0', 'dense_897/kernel:0', 'dense_897/bias:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_267/gamma:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_267/beta:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_268/gamma:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_268/beta:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_898/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_898/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_899/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_899/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_900/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_900/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_901/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_901/bias:0', 'dense_902/kernel:0', 'dense_902/bias:0', 'dense_903/kernel:0', 'dense_903/bias:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_269/gamma:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_269/beta:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_270/gamma:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_270/beta:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_904/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_904/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_905/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_905/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_906/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_906/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_907/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_907/bias:0', 'dense_908/kernel:0', 'dense_908/bias:0', 'dense_909/kernel:0', 'dense_909/bias:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_271/gamma:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_271/beta:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_272/gamma:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_272/beta:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_910/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_910/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_911/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_911/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_912/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_912/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_913/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_913/bias:0', 'dense_914/kernel:0', 'dense_914/bias:0', 'dense_915/kernel:0', 'dense_915/bias:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_273/gamma:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_273/beta:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_274/gamma:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_274/beta:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_916/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_916/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_917/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_917/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_918/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_918/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_919/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_919/bias:0', 'dense_920/kernel:0', 'dense_920/bias:0', 'dense_921/kernel:0', 'dense_921/bias:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_275/gamma:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_275/beta:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_276/gamma:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_276/beta:0', 'transformer_18/decoder_18/embedding_47/embeddings:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_922/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_922/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_923/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_923/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_924/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_924/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_925/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_925/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_926/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_926/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_927/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_927/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_928/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_928/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_929/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_929/bias:0', 'dense_930/kernel:0', 'dense_930/bias:0', 'dense_931/kernel:0', 'dense_931/bias:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_277/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_277/beta:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_278/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_278/beta:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_279/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_279/beta:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_932/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_932/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_933/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_933/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_934/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_934/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_935/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_935/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_936/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_936/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_937/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_937/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_938/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_938/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_939/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_939/bias:0', 'dense_940/kernel:0', 'dense_940/bias:0', 'dense_941/kernel:0', 'dense_941/bias:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_280/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_280/beta:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_281/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_281/beta:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_282/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_282/beta:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_942/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_942/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_943/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_943/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_944/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_944/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_945/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_945/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_946/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_946/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_947/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_947/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_948/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_948/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_949/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_949/bias:0', 'dense_950/kernel:0', 'dense_950/bias:0', 'dense_951/kernel:0', 'dense_951/bias:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_283/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_283/beta:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_284/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_284/beta:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_285/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_285/beta:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_952/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_952/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_953/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_953/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_954/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_954/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_955/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_955/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_956/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_956/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_957/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_957/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_958/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_958/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_959/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_959/bias:0', 'dense_960/kernel:0', 'dense_960/bias:0', 'dense_961/kernel:0', 'dense_961/bias:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_286/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_286/beta:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_287/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_287/beta:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_288/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_288/beta:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_962/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_962/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_963/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_963/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_964/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_964/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_965/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_965/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_966/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_966/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_967/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_967/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_968/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_968/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_969/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_969/bias:0', 'dense_970/kernel:0', 'dense_970/bias:0', 'dense_971/kernel:0', 'dense_971/bias:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_289/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_289/beta:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_290/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_290/beta:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_291/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_291/beta:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_972/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_972/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_973/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_973/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_974/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_974/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_975/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_975/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_976/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_976/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_977/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_977/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_978/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_978/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_979/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_979/bias:0', 'dense_980/kernel:0', 'dense_980/bias:0', 'dense_981/kernel:0', 'dense_981/bias:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_292/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_292/beta:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_293/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_293/beta:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_294/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_294/beta:0', 'transformer_18/dense_982/kernel:0', 'transformer_18/dense_982/bias:0'].
我不知道如何调试,我尝试删除.numpy(),但是它没有用,每个人都可以帮我吗?谢谢。
现在我从softargmax中删除了tf.cast
def softargmax(x, beta=1e10):
x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
return tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1)
,但收到警告:
警告:tensorflow:调用GradientTape.gradient时,源张量的dtype必须为浮点型(例如tf.float32)
和错误
TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
[[node gradient_tape/EagerPyFunc (defined at <ipython-input-311-3c818a224e1b>:31) ]]
(1) Invalid argument: TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
我对损耗和梯度之间的dtype感到困惑
回溯:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
Traceback (most recent call last):
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 242, in __call__
return func(device, token, args)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in __call__
for (x, dtype) in zip(ret, self._out_dtypes)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in <listcomp>
for (x, dtype) in zip(ret, self._out_dtypes)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 119, in _convert
return constant_op.constant(0.0, dtype=dtype)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
allow_broadcast=True)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 275, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 300, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
[[node gradient_tape/EagerPyFunc (defined at <ipython-input-468-88e1bc3a3f24>:30) ]]
(1) Invalid argument: TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
Traceback (most recent call last):
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 242, in __call__
return func(device, token, args)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in __call__
for (x, dtype) in zip(ret, self._out_dtypes)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in <listcomp>
for (x, dtype) in zip(ret, self._out_dtypes)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 119, in _convert
return constant_op.constant(0.0, dtype=dtype)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
allow_broadcast=True)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 275, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 300, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
[[node gradient_tape/EagerPyFunc (defined at <ipython-input-468-88e1bc3a3f24>:30) ]]
[[GroupCrossDeviceControlEdges_0/Adam/Adam/Const/_85]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_step_1355962]
Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/EagerPyFunc:
EagerPyFunc (defined at <ipython-input-459-043bdf351e86>:20)
Input Source operations connected to node gradient_tape/EagerPyFunc:
EagerPyFunc (defined at <ipython-input-459-043bdf351e86>:20)
Function call stack:
train_step -> train_step