我试图验证卷积定理以将其用作我的项目的一部分。我实现了自定义Keras层,以执行基于FFT的卷积(Fourier_Conv2D())。但是它的运行速度比Keras Conv2D层慢。我认为这可能是因为Conv2D层的内部实现得到了非常优化。因此,我还编写了另一个自定义层,以幼稚的方式执行空间卷积(Spatial_convolution())。但是基于fft的层甚至比这种简单的实现要慢。 我看到基于fft的卷积只有在大内核的情况下才更快。我尝试在28x28大小的图像数据集(MNIST)上使用从3x3、5x5、7x7 ...到19x19的内核。内核大小所花费的最大时间是每个训练步骤40毫秒,而基于fft的方法要花费60毫秒(请注意,基于ftf的方法所用的时间与内核大小无关,因为我已经初始化了图像大小的复杂内核。这种方法)。
这是两层的代码-
FOURIER CONVOLUTION
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = int(no_of_kernels/2)+1
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel_shape = (int(input_shape[1]), int(input_shape[2]), int(input_shape[3]), self.no_of_kernels)
self.kernel_r = self.add_weight(name = 'kernel',
shape = self.kernel_shape,
initializer = 'uniform', trainable = True)
self.kernel_i = self.add_weight(name = 'kernel',
shape = self.kernel_shape,
initializer = 'uniform', trainable = True)
self.kernel = tf.complex(self.kernel_r,self.kernel_i)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
if(x.dtype=='float32'):
x = tf.spectral.rfft2d(x)
outputs = tf.TensorArray(dtype='complex64',
size=self.no_of_kernels,
element_shape=[None,self.kernel_shape[0],self.kernel_shape[1]])
index = tf.constant(0)
#loop_vars = [index, outputs]
def condition(i, outputs):
return i < self.no_of_kernels
def body(i, outputs):
op = tf.reduce_sum(tf.multiply(x, self.kernel[:,:,:,i]), 3)
return i+1, outputs.write(i, op)
j, output = tf.while_loop(condition, body, loop_vars=[index, outputs])
output = output.stack()
output = tf.transpose(output, perm=[1,2,3,0])
output.set_shape([None, self.kernel_shape[0], self.kernel_shape[1], self.no_of_kernels])
print(output)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[2], self.no_of_kernels)
SPATIAL CONVOLUTION
class Spatial_convolution(Layer):
# Works for square images and kernels only
def __init__(self, no_of_kernels, kernel_size, **kwargs):
self.no_of_kernels = no_of_kernels
self.kernel_size = kernel_size
super(Spatial_convolution, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel_shape = (int(self.kernel_size[0]), int(self.kernel_size[1]), int(input_shape[3]), self.no_of_kernels)
self.kernel = self.add_weight(name = 'kernel',
shape = self.kernel_shape,
initializer = 'uniform', trainable = True)
self.inputshape = input_shape
super(Spatial_convolution, self).build(input_shape)
def call(self, x):
k = int(self.kernel_size[0])
n = int(self.inputshape[1]) #height=width
self.outshape = n-k+1
out = tf.TensorArray(dtype='float32', size=self.no_of_kernels, element_shape=[None, self.outshape, self.outshape])
o = tf.constant(0)
def bodyj(j, i, o, row):
op = tf.reduce_sum(tf.multiply(x[:,i-int(k/2):i+int(k/2)+1,j-int(k/2):j+int(k/2)+1,:], self.kernel[:,:,:,o]), [1,2,3])
return j+1, i, o, row.write(j, op)
def condj(j, i, o, row):
return j < self.outshape
def bodyi(i, o, col):
j = tf.constant(int(k/2))
row = tf.TensorArray(dtype='float32', size=self.outshape, element_shape=[None])
j, i, o, row = tf.while_loop(condj, bodyj, loop_vars=[j, i, o, row])
row = row.stack()
row = tf.transpose(row, perm=[1,0])
row.set_shape([None, self.outshape])
return i+1, o, col.write(i, row)
def condi(i, o, col):
return i < self.outshape
def bodyo(o, out):
i = tf.constant(int(k/2))
col = tf.TensorArray(dtype='float32', size=self.outshape, element_shape=[None, self.outshape])
i, o, col = tf.while_loop(condi, bodyi, loop_vars=[i, o, col])
col = col.stack()
col = tf.transpose(col, perm=[1,0,2])
col.set_shape([None, self.outshape, self.outshape])
return o+1, out.write(o, col)
def condo(o, out):
return o < self.no_of_kernels
o, out = tf.while_loop(condo, bodyo, loop_vars=[o, out])
out = out.stack()
out = tf.transpose(out, perm=[1,2,3,0])
out.set_shape([None, self.outshape, self.outshape, self.no_of_kernels])
return out
def compute_output_shape(self, input_shape):
return (input_shape[0], self.outshape, self.outshape, self.no_of_kernels)
任何人都可以建议为什么会这样发生吗?