我正在尝试利用TensorFlow 2.0的自动差异化功能来自动计算金融工具中某些梯度。通常,这涉及各种“基准点”之间的分段插值方案。最简单的示例如下:
import tensorflow as tf
MATURITIES = tf.constant([1.0, 2.0, 3.0, 5.0, 7.0, 10.0, 12.0, 15.0, 20.0, 25.0])
CASH_FLOW_TIMES = tf.constant([n * 0.5 for n in range(1, 51)])
YIELDS = tf.Variable([0.04153733, 0.0425888, 0.04662959, 0.05406879, 0.05728735, 0.0606996, 0.06182699, 0.05854381, 0.05376556, 0.0531946])
@tf.function
def linear(knot_y, knot_x, x):
"""Linear interpolation"""
i = tf.maximum(tf.minimum(tf.searchsorted(knot_x, x, side="right") - 1, knot_x.shape[0] - 2), 0)
y_i = tf.gather(knot_y, i)
x_i = tf.gather(knot_x, i)
return y_i + y_i / x_i * (x - x_i)
with tf.GradientTape() as tape:
tape.watch(YIELDS)
y = linear(YIELDS, MATURITIES, CASH_FLOW_TIMES)
dydx = tape.gradient(y, YIELDS)
y, dydx
输出以下内容:
(<tf.Tensor: id=1249, shape=(50,), dtype=float32, numpy=
array([0.02076866, 0.04153733, 0.06230599, 0.0425888 , 0.053236 ,
0.04662959, 0.05440119, 0.06217279, 0.06994438, 0.05406879,
0.05947567, 0.06488255, 0.07028943, 0.05728735, 0.0613793 ,
0.06547125, 0.06956321, 0.07365517, 0.07774712, 0.0606996 ,
0.06373458, 0.06676956, 0.06980454, 0.06182699, 0.06440312,
0.06697924, 0.06955536, 0.07213148, 0.07470761, 0.05854381,
0.06049527, 0.06244673, 0.06439819, 0.06634965, 0.06830111,
0.07025257, 0.07220403, 0.07415549, 0.07610695, 0.05376556,
0.0551097 , 0.05645384, 0.05779798, 0.05914212, 0.06048626,
0.06183039, 0.06317453, 0.06451868, 0.06586281, 0.06720695],
dtype=float32)>,
<tensorflow.python.framework.indexed_slices.IndexedSlices at 0x203027345c0>)
问题是,由于(我怀疑)由于tf.searchsorted
或tf.gather
调用,渐变是IndexedSlice
而不是张量。当我需要将多个tf.function
链接在一起以评估安全性时,这会引起问题。例如,假设我想在YIELDS
函数中使用linear
变量的转换:
import tensorflow as tf
MATURITIES = tf.constant([1.0, 2.0, 3.0, 5.0, 7.0, 10.0, 12.0, 15.0, 20.0, 25.0])
CASH_FLOW_TIMES = tf.constant([n * 0.5 for n in range(1, 51)])
YIELDS = tf.Variable([0.04153733, 0.0425888, 0.04662959, 0.05406879, 0.05728735, 0.0606996, 0.06182699, 0.05854381, 0.05376556, 0.0531946])
@tf.function
def logdf_from_yields(yields, times):
return tf.math.multiply(yields, times) * -1.0
@tf.function
def linear(knot_y, knot_x, x):
"""Linear interpolation"""
i = tf.maximum(tf.minimum(tf.searchsorted(knot_x, x, side="right") - 1, knot_x.shape[0] - 2), 0)
y_i = tf.gather(knot_y, i)
x_i = tf.gather(knot_x, i)
return y_i + y_i / x_i * (x - x_i)
with tf.GradientTape() as tape:
tape.watch(YIELDS)
y = linear(logdf_from_yields(YIELDS, MATURITIES), MATURITIES, CASH_FLOW_TIMES)
dydx = tape.gradient(y, YIELDS)
y, dydx
此代码引发以下异常:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-9-2bc3457894ea> in <module>
20 tape.watch(YIELDS)
21 y = linear(logdf_from_yields(YIELDS, MATURITIES), MATURITIES, CASH_FLOW_TIMES)
---> 22 dydx = tape.gradient(y, YIELDS)
23 y, dydx
~\Anaconda3\lib\site-packages\tensorflow\python\eager\backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
1000 output_gradients=output_gradients,
1001 sources_raw=flat_sources_raw,
-> 1002 unconnected_gradients=unconnected_gradients)
1003
1004 if not self._persistent:
~\Anaconda3\lib\site-packages\tensorflow\python\eager\imperative_grad.py in imperative_grad(tape, target, sources, output_gradients, sources_raw, unconnected_gradients)
74 output_gradients,
75 sources_raw,
---> 76 compat.as_str(unconnected_gradients.value))
~\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py in backward_function(*args)
904 if a is not None and i not in skip_positions]
905 return self._backward_graph_function._call_flat( # pylint: disable=protected-access
--> 906 list(args) + side_outputs)
907
908 tape.record_operation(self._forward_function.signature.name, real_outputs,
~\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args)
610 if any(isinstance(a, composite_tensor.CompositeTensor) for a in args):
611 raise AssertionError("Expected all args to be Tensors or Variables; "
--> 612 "but got CompositeTensor: %r" % args)
613
614 if (tape.could_possibly_record() or
AssertionError: Expected all args to be Tensors or Variables; but got CompositeTensor: [<tensorflow.python.framework.indexed_slices.IndexedSlices object at 0x00000203013C2128>, <tf.Tensor: id=1024, shape=(), dtype=float32, numpy=-1.0>, <tf.Tensor: id=1025, shape=(10,), dtype=float32, numpy=
array([0.04153733, 0.0851776 , 0.13988876, 0.27034396, 0.40101147,
0.606996 , 0.74192387, 0.87815714, 1.0753112 , 1.329865 ],
dtype=float32)>, <tf.Tensor: id=1026, shape=(10,), dtype=float32, numpy=array([ 1., 2., 3., 5., 7., 10., 12., 15., 20., 25.], dtype=float32)>, <tf.Tensor: id=1027, shape=(10,), dtype=float32, numpy=
array([0.04153733, 0.0425888 , 0.04662959, 0.05406879, 0.05728735,
0.0606996 , 0.06182699, 0.05854381, 0.05376556, 0.0531946 ],
dtype=float32)>]
检查回溯,我发现唯一不是张量或变量的项是IndexedSlice,我再次相信它来自linear
函数。
我是否有办法重写分段linear
函数(例如,使用不同的函数),以便它可以与渐变磁带一起使用?
我尝试了什么?
我看了this question,但似乎作者正在尝试使索引成为变量。在这里,我应该提前知道所有索引(例如,对于问题的任何给定实例,MATURITIES
和CASH_FLOW_TIMES
都是固定的……只有YIELDS
是tf.Variable
)。所以我不确定它如何应用。
This question也很有趣,因为它推荐使用tf.convert_to_tensor
,但是我不确定如何在我的情况下应用它。
答案 0 :(得分:3)
您的渐变效果似乎不错。在某些情况下,例如tf.IndexedSlices
,TensorFlow使用tf.gather
表示稀疏梯度,但是您可以像这样轻松地将其转换为常规张量(示例在图形模式下,但功能在急切模式下是相同的):
import tensorflow as tf
def convert_indexed_slices_to_tensor(idx_slices):
return tf.scatter_nd(tf.expand_dims(idx_slices.indices, 1),
idx_slices.values, idx_slices.dense_shape)
# Test
with tf.Graph().as_default(), tf.Session() as sess:
a = tf.constant([1., 2., 3., 4.])
b = tf.gather(a, [0, 2])
g = tf.gradients(b, a)[0]
print(g)
# IndexedSlices(indices=..., values=..., dense_shape=...)
g_dense = convert_indexed_slices_to_tensor(g)
# Tensor(...)
print(sess.run(g_dense))
# [1. 0. 1. 0.]
如果要强制tf.gather
生成规则张量,可以用tf.custom_gradient
包裹它,如下所示:
import tensorflow as tf
@tf.custom_gradient
def gather_dense_gradient(params, indices, validate_indices=None, name=None):
def grad(ys):
return tf.scatter_nd(tf.expand_dims(indices, 1), ys, tf.shape(params)), None
return tf.gather(params, indices, validate_indices, name), grad
# Test
with tf.Graph().as_default(), tf.Session() as sess:
a = tf.constant([1., 2., 3., 4.])
b = gather_dense_gradient(a, [0, 2])
g = tf.gradients(b, a)[0]
print(sess.run(g))
# [1. 0. 1. 0.]
请注意,这里假设axis=0
和一维indices
,否则仍然可以这样做,但需要做更多的工作。