参考,我注意到“ NaN”仅存在于“ batch_norm”层中。为什么会发生这种情况?
培训是否存在一些问题-例如“消失梯度”或“梯度爆炸”?还是NaN仅是偏差的占位符?
(只是一个前提,我正在尝试训练一个具有三重损失的暹罗网络,如此处Keras Model Weights for some layers become all "NaNs"(架构和损失)所述)
此处列出的样品重量-
Yes batch_normalization_11
[array([-0.08248743, -0.19357444, 0.06189843, -0.03510274, 0.00484655,
-0.00720728, 0.04206916, -0.08482166, 0.04828389, -0.07205907,
-0.06575502, -0.08964766, -0.05986991, -0.03338304, -0.11991593,
0.02784944, 0.0775785 , -0.15297122, 0.07871342, 0.12960596,
0.0105051 , 0.00689312, -0.03168776, -0.01650888, -0.01664159,
-0.0392631 , 0.01485326, 0.01542591, 0.04976993, 0.03047125,
-0.06873457, -0.00606309, -0.01566394, -0.00729234, 0.02118564,
-0.02180263, 0.04011146, -0.0738162 , -0.06278081, 0.00753566,
-0.06096083, -0.04877877, -0.01127802, 0.04446675, -0.02236561,
0.01035857, -0.02206145, -0.03292053, -0.01618384, -0.03100098,
0.0676478 , -0.10707849, -0.01150786, 0.04351562, -0.01791094,
-0.00431769, -0.0049645 , 0.04900745, 0.03982552, -0.03878992,
0.00659816, -0.02593656, -0.02570266, -0.12747312, 0.0494753 ,
-0.01329002, -0.00264208, 0.03973205, -0.100038 , -0.01836757,
0.0236165 , 0.02873622, 0.02456198, -0.02947933, -0.03124348,
-0.0517611 , -0.03143985, -0.04440191, -0.00843053, -0.00296288,
0.01514868, 0.01892053, 0.01062289, 0.00799673, 0.17570479,
-0.03454979, 0.04009602, 0.0713095 , -0.12413061, 0.00034242,
-0.06435894, 0.0845928 , 0.03802553, -0.1751569 , 0.01174091,
-0.08430453], dtype=float32), array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan], dtype=float32), array([138.71233 , 143.4597 , 52.366314, 239.61154 , 105.63001 ,
169.46217 , 168.73875 , 190.50938 , 188.52414 , 75.47664 ,
165.98553 , 169.36244 , 152.27237 , 66.28865 , 71.25762 ,
194.51697 , 100.27181 , 120.084465, 252.51112 , 108.817085,
127.83531 , 184.69302 , 52.401028, 172.47392 , 178.41951 ,
158.10571 , 107.729454, 239.42154 , 153.06093 , 210.01903 ,
171.95633 , 172.42186 , 94.53224 , 171.9558 , 98.768486,
107.27287 , 167.34947 , 121.79589 , 139.91867 , 185.08607 ,
113.509705, 156.48535 , 173.54575 , 74.16201 , 168.05954 ,
159.63252 , 96.90468 , 175.31023 , 111.47005 , 169.68703 ,
146.08734 , 76.32107 , 150.7184 , 197.71414 , 111.61184 ,
159.9483 , 153.95746 , 201.43568 , 141.50627 , 141.01636 ,
126.97432 , 42.3665 , 165.11559 , 165.68098 , 160.95749 ,
176.41151 , 147.87408 , 119.34812 , 51.771393, 105.82823 ,
182.31734 , 219.16847 , 136.30508 , 225.81941 , 158.1635 ,
193.53983 , 140.47253 , 142.7074 , 135.03156 , 129.84628 ,
150.3206 , 156.24223 , 190.24785 , 114.009544, 132.7888 ,
224.1297 , 105.6914 , 138.42787 , 213.81845 , 103.33176 ,
202.3564 , 161.14067 , 187.62227 , 176.96579 , 211.05275 ,
113.52303 ], dtype=float32)]
Yes batch_normalization_12
[array([ 0.01501185, 0.29032254, 0.03455493, 0.07597651, -0.07218402,
-0.16735457, 0.03332845, -0.00645222, 0.02006806, -0.03635798,
0.07917645, -0.05444119, -0.16789341, -0.086788 , 0.12365555,
-0.19256745, 0.03382557, 0.04357202, -0.07722911, 0.03080065,
-0.13690165, 0.02521633, 0.04587894, -0.025393 , -0.1004908 ,
-0.04962835, 0.09113129, 0.00759747, 0.2795353 , -0.23438764,
-0.1480339 , -0.02406131], dtype=float32), array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan], dtype=float32),