我试图找出为什么有时tf.GradientTape().gradient
返回None
的原因,所以我使用了以下三个损失函数(mmd0()
,mmd1()
,mmd2()
) ,尽管mmd0和mmd1的格式略有不同,但仍返回梯度,但对于mmd2,梯度为None
。我将这三个函数的损失打印出来,有人为什么会这样表现吗?
def mmd0(x, y): # a and b are lists of aribiturary lengths
return x
def mmd1(x1, x2): # a and b are lists of aribiturary lengths
dis = sum([x**2 for x in x1])/len(x1) - sum([x**2 for x in x2])/len(x2)
return dis**2
def mmd2(x, y):
dis = x-y
return [tf.convert_to_tensor(elem) for elem in dis]
def get_MMD_norm(errors, sigma=0.1):
x2 = np.random.normal(0, sigma, len(errors))
loss0 = mmd0(errors, x2)
loss1 = mmd1(errors, x2)
loss2 = mmd2(errors, x2)
print("loss0:", loss0)
print("loss1:", loss1)
print("loss2:", loss2)
return tf.cast(loss2, tf.float32)
def loss(model, x, y, sigma=0.1):
y_ = model(x) # y_.shape is (batch_size, 3) for Iris dataset
losses = []
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
for i in range(y.shape[0]):
loss = loss_object(y_true=y[i], y_pred=y_[i])
losses.append(loss)
batch_loss = get_MMD_norm(losses)
single_losses_list = [loss.numpy() for loss in losses]
return tf.convert_to_tensor(batch_loss, dtype=np.float32), single_losses_list
def grad(model, inputs, targets, sigma=0.1):
with tf.GradientTape() as tape:
tape.watch(model.trainable_variables)
batch_loss, single_losses = loss(model, inputs, targets, sigma=0.1)
return tape.gradient(batch_loss, model.trainable_variables), batch_loss, single_losses
grads, batch_loss, single_losses = grad(model, features, labels)
print("grads:", grads)
print("batch_loss:", batch_loss)
##########################################################
loss0: [<tf.Tensor: id=39621, shape=(), dtype=float32, numpy=2.1656876>, <tf.Tensor: id=39659, shape=(), dtype=float32, numpy=2.057112>, <tf.Tensor: id=39697, shape=(), dtype=float32, numpy=2.2769136>, <tf.Tensor: id=39735, shape=(), dtype=float32, numpy=2.0263004>, <tf.Tensor: id=39773, shape=(), dtype=float32, numpy=2.1568372>, <tf.Tensor: id=39811, shape=(), dtype=float32, numpy=0.7392154>, <tf.Tensor: id=39849, shape=(), dtype=float32, numpy=0.7742219>, <tf.Tensor: id=39887, shape=(), dtype=float32, numpy=2.2176154>, <tf.Tensor: id=39925, shape=(), dtype=float32, numpy=1.0187237>, <tf.Tensor: id=39963, shape=(), dtype=float32, numpy=2.160415>, <tf.Tensor: id=40001, shape=(), dtype=float32, numpy=0.80997854>, <tf.Tensor: id=40039, shape=(), dtype=float32, numpy=0.70803094>, <tf.Tensor: id=40077, shape=(), dtype=float32, numpy=0.8207226>, <tf.Tensor: id=40115, shape=(), dtype=float32, numpy=0.82957774>, <tf.Tensor: id=40153, shape=(), dtype=float32, numpy=0.88732547>, <tf.Tensor: id=40191, shape=(), dtype=float32, numpy=0.90633464>, <tf.Tensor: id=40229, shape=(), dtype=float32, numpy=0.7932346>, <tf.Tensor: id=40267, shape=(), dtype=float32, numpy=2.1767666>, <tf.Tensor: id=40305, shape=(), dtype=float32, numpy=0.80166155>, <tf.Tensor: id=40343, shape=(), dtype=float32, numpy=0.7831647>, <tf.Tensor: id=40381, shape=(), dtype=float32, numpy=0.77431095>, <tf.Tensor: id=40419, shape=(), dtype=float32, numpy=0.82067406>, <tf.Tensor: id=40457, shape=(), dtype=float32, numpy=0.74510425>, <tf.Tensor: id=40495, shape=(), dtype=float32, numpy=2.1666338>, <tf.Tensor: id=40533, shape=(), dtype=float32, numpy=0.7922478>, <tf.Tensor: id=40571, shape=(), dtype=float32, numpy=0.73235756>, <tf.Tensor: id=40609, shape=(), dtype=float32, numpy=2.1792874>, <tf.Tensor: id=40647, shape=(), dtype=float32, numpy=0.919183>, <tf.Tensor: id=40685, shape=(), dtype=float32, numpy=0.761979>, <tf.Tensor: id=40723, shape=(), dtype=float32, numpy=2.1664479>, <tf.Tensor: id=40761, shape=(), dtype=float32, numpy=0.77892226>, <tf.Tensor: id=40799, shape=(), dtype=float32, numpy=0.99058735>]
loss1: tf.Tensor(4.158007, shape=(), dtype=float32)
loss2: [<tf.Tensor: id=40935, shape=(), dtype=float64, numpy=2.325676997771268>, <tf.Tensor: id=40936, shape=(), dtype=float64, numpy=1.9988182000798667>, <tf.Tensor: id=40937, shape=(), dtype=float64, numpy=2.303379813455908>, <tf.Tensor: id=40938, shape=(), dtype=float64, numpy=2.0615775258879356>, <tf.Tensor: id=40939, shape=(), dtype=float64, numpy=2.2949723624257774>, <tf.Tensor: id=40940, shape=(), dtype=float64, numpy=0.7019287657319235>, <tf.Tensor: id=40941, shape=(), dtype=float64, numpy=0.8522054859739794>, <tf.Tensor: id=40942, shape=(), dtype=float64, numpy=2.0819949907118125>, <tf.Tensor: id=40943, shape=(), dtype=float64, numpy=1.065878291073558>, <tf.Tensor: id=40944, shape=(), dtype=float64, numpy=2.1225998300026805>, <tf.Tensor: id=40945, shape=(), dtype=float64, numpy=0.9485520218242218>, <tf.Tensor: id=40946, shape=(), dtype=float64, numpy=0.7221746903906889>, <tf.Tensor: id=40947, shape=(), dtype=float64, numpy=0.9985009994522388>, <tf.Tensor: id=40948, shape=(), dtype=float64, numpy=0.9143119687525019>, <tf.Tensor: id=40949, shape=(), dtype=float64, numpy=0.9230117922853999>, <tf.Tensor: id=40950, shape=(), dtype=float64, numpy=1.0220225043292934>, <tf.Tensor: id=40951, shape=(), dtype=float64, numpy=0.8735972169951878>, <tf.Tensor: id=40952, shape=(), dtype=float64, numpy=2.1279260795512753>, <tf.Tensor: id=40953, shape=(), dtype=float64, numpy=0.9597649765787801>, <tf.Tensor: id=40954, shape=(), dtype=float64, numpy=0.8338326272407959>, <tf.Tensor: id=40955, shape=(), dtype=float64, numpy=0.6674084331022461>, <tf.Tensor: id=40956, shape=(), dtype=float64, numpy=0.8679296826013285>, <tf.Tensor: id=40957, shape=(), dtype=float64, numpy=0.8174893483228802>, <tf.Tensor: id=40958, shape=(), dtype=float64, numpy=2.212290299049252>, <tf.Tensor: id=40959, shape=(), dtype=float64, numpy=0.7304098620074719>, <tf.Tensor: id=40960, shape=(), dtype=float64, numpy=0.8463413221121661>, <tf.Tensor: id=40961, shape=(), dtype=float64, numpy=2.3081013094190443>, <tf.Tensor: id=40962, shape=(), dtype=float64, numpy=1.0314178020997722>, <tf.Tensor: id=40963, shape=(), dtype=float64, numpy=0.774951045805575>, <tf.Tensor: id=40964, shape=(), dtype=float64, numpy=2.127838465488091>, <tf.Tensor: id=40965, shape=(), dtype=float64, numpy=0.909498425717612>, <tf.Tensor: id=40966, shape=(), dtype=float64, numpy=1.0217239989370837>]
grads: [None, None, None, None, None, None]
batch_loss: tf.Tensor(
[2.325677 1.9988182 2.3033798 2.0615776 2.2949724 0.7019288
0.8522055 2.081995 1.0658783 2.1225998 0.948552 0.7221747
0.998501 0.91431195 0.9230118 1.0220225 0.8735972 2.127926
0.95976496 0.8338326 0.6674084 0.8679297 0.8174893 2.2122903
0.73040986 0.8463413 2.3081014 1.0314178 0.77495104 2.1278384
0.90949845 1.021724 ], shape=(32,), dtype=float32)
答案 0 :(得分:0)
您看到this个答案了吗?我想我遇到了类似的问题,并且我相信您可能与我有关。它与在过程中某个位置的某个步长所计算出的损耗有关,在该过程中,从磁带的始端到末尾“丢失”了目标张量。引用的答案指出,原始海报的区域返回了numpy数组,而不是张量流张量,从而导致Gradient Tape无法计算梯度。
我可能是错的,因为我离张量流专家不远,但这是我在寻找解决类似问题的方法时不断出现的东西。