我的目标是在Tensorflow 2中创建一个具有预定义,固定和稀疏权重结构的自定义图层。出于内存原因,我们需要将权重矩阵保持为可训练的SparseTensor形式。
到目前为止,我们的解决方案是将SparseTensor的权重/值表示为自定义层(“ self.w”)中的标准可训练对象,并使用自定义层处理稀疏矩阵运算和反向传播(“ matmul_dense_sparse”)。
我的问题是,如何使用自定义渐变反向传播稀疏网络体系结构?
这是一个玩具示例(TF 2.1.0;急切执行):
import numpy as np
tf.__version__
# 2.1.0
@tf.custom_gradient
def matmul_dense_sparse(dense, sparse_mat):
sparse_mat = tf.sparse.to_dense(sparse_mat)
sparse_indices = sparse_mat.indices
ta = tf.transpose(dense)
b = sparse_mat
tb = tf.sparse.transpose(b)
res = tf.transpose(tf.sparse.sparse_dense_matmul(tb, ta))
def grad_fn(grad_res):
print(grad_res)
tgrad = tf.transpose(grad_res)
grad_dense = tf.transpose(tf.sparse.sparse_dense_matmul(b, tgrad))
dense_edge_starts = tf.gather(dense, sparse_indices[:, 0], axis=1)
grad_res_edge_ends = tf.gather(grad_res, sparse_indices[:, 1], axis=1)
grad_values = tf.reduce_sum(tf.multiply(dense_edge_starts, grad_res_edge_ends), axis=0)
return grad_dense, grad_values
return res, grad_fn
# custom layer with sparse weight architecture
class SparseLinear(tf.keras.layers.Layer):
def __init__(self, indices, units=32):
super(SparseLinear, self).__init__()
self.units = units
self.indices = indices
def build(self, input_shape):
# self.w corresponds to the trainable "values" in the sparse tensor
self.w = self.add_weight(shape=(self.indices.shape[0],),
initializer='random_normal',
trainable=True)
self.sparse_mat = tf.sparse.reorder(tf.sparse.SparseTensor(indices=self.indices, values=self.w, dense_shape=[input_shape[-1], self.units]))
def call(self, x):
return matmul_dense_sparse(x, self.sparse_mat)
# non-zero weights in SparseTensor
sp_idxs = np.array([[0, 2],[1, 2], [8, 1], [9, 3], [10, 5]])
train_x = np.random.rand(20, 100)
train_y = np.random.rand(20, 1)
# build keras model
n_features = train_x.shape[1]
inputs = tf.keras.Input(shape=(n_features,), name='snp_input')
layers = SparseLinear(indices=sp_idxs, units=32)(inputs)
layers = tf.keras.layers.Dense(12, activation='linear')(layers)
layers = tf.keras.layers.Dense(1, activation='linear')(layers)
model = tf.keras.Model(inputs=inputs, outputs=layers, name='model')
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
history = model.fit(train_x, train_y)
weights = model.get_weights()
以及相应的错误消息:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-34c364b12112> in <module>()
51 n_features = train_x.shape[1]
52 inputs = tf.keras.Input(shape=(n_features,), name='snp_input')
---> 53 layers = Linear(indices=sp_idxs, units=32)(inputs)
54 layers = tf.keras.layers.Dense(12, activation='linear')(layers)
~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
771 not base_layer_utils.is_in_eager_or_tf_function()):
772 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 773 outputs = call_fn(cast_inputs, *args, **kwargs)
774 # Wrap Tensors in `outputs` in `tf.identity` to avoid
775 # circular dependencies.
~/.local/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
<ipython-input-11-34c364b12112>:37 call *
return matmul_dense_sparse(x, self.sparse_mat)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:256 __call__
return self._d(self._f, a, k)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:212 decorated
return _graph_mode_decorator(wrapped, args, kwargs)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 _graph_mode_decorator
args = [ops.convert_to_tensor(x) for x in args]
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 <listcomp>
args = [ops.convert_to_tensor(x) for x in args]
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1314 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:317 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:258 constant
allow_broadcast=True)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:296 _constant_impl
allow_broadcast=allow_broadcast))
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/tensor_util.py:547 make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=tf.Tensor(
[[ 0 2]
[ 1 2]
[ 8 1]
[ 9 3]
[10 5]], shape=(5, 2), dtype=int64), values=tf.Tensor([ 0.03837506 -0.07365214 -0.02256368 -0.05631712 0.05937713], shape=(5,), dtype=float32), dense_shape=tf.Tensor([100 32], shape=(2,), dtype=int64)). Consider casting elements to a supported type.
答案 0 :(得分:0)
发布似乎运行良好的解决方案:
class SparseLinear(tf.keras.layers.Layer):
def __init__(self, indices, units=32):
super(SparseLinear, self).__init__()
self.units = units
self.indices = indices
def build(self, input_shape):
self.n_feat = input_shape[-1]
self.w = self.add_weight(shape=(len(self.indices),),
initializer=tf.keras.initializers.he_normal(),
trainable=True)
def call(self, x):
kernel = tf.SparseTensor(self.indices, self.w, [self.n_feat, self.units])
return tf.sparse.sparse_dense_matmul(x, kernel)
答案 1 :(得分:0)
我已经在tensorflow 1.13.1中成功实现了这样一个层,
https://github.com/ArnovanHilten/GenNet/blob/master/utils/LocallyDirectedConnected.py
对于tensorflow 2:
https://github.com/ArnovanHilten/GenNet/blob/master/utils/LocallyDirectedConnected_tf2.py