Question

我需要对卷积进行实验，其中内核不是恒定的，而是取决于输入的补丁（我将其称为过滤器来区分），并由函数f计算。

所以我需要的是这样的东西：

conv2d :: 
    R^(batch x height x width x in_channel) x 
    f : 
        R ^ (filter_height x filter_width x in_channels) 
        -> R ^ (filter_height x filter_width x in_channels x out_channels)
    -> out

我不想要的只是对卷积中的每个补丁评估f。对于我的应用程序而言，至关重要的是，f仅针对输入的每个补丁生成过滤器，然后将过滤器应用于补丁。

使用tf可以以某种方式进行破解吗？如果没有，是否存在合理的方法来扩展tf以向我提供所需的功能？我从未写过CUDA，但我不回避。

Answer 1

编辑：

如OP所建议，鉴于操作的复杂性，使用tf.map_fn可能会变得更易于处理：

import tensorflow as tf
import numpy as np

mode_same = True  # True to make output same size as input
BATCH_SIZE = 10
HEIGHT = 100
WIDTH = 200
IN_CHANNELS = 3
FILTER_HEIGHT = 10
FILTER_WIDTH = 7
OUT_CHANNELS = 5

def make_img_filters(img):
    # Dummy filters function
    img_shape = tf.shape(img)
    img_height = img_shape[0]
    img_width = img_shape[1]
    img_ch = img_shape[2]
    filters_shape = (img_height, img_width, FILTER_HEIGHT, FILTER_WIDTH, img_ch, OUT_CHANNELS)
    return tf.zeros(filters_shape, dtype=img.dtype)

def filter_img(img, mode_same=True):
    img_filters = make_img_filters(img)
    # Shapes
    img_shape = tf.shape(img)
    img_height = img_shape[0]
    img_width = img_shape[1]
    filters_shape = tf.shape(img_filters)
    filter_height = filters_shape[2]
    filter_width = filters_shape[3]
    # Image margins to pad or crop
    margin_bottom = filter_height // 2
    margin_top = tf.maximum(filter_height - margin_bottom - 1, 0)
    margin_right = filter_width // 2
    margin_left = tf.maximum(filter_width - margin_right - 1, 0)
    # Pad or crop depending on "same" or "valid" mode
    img_pad = img
    img_filters_crop = img_filters
    if mode_same:
        img_pad = tf.pad(img, [[margin_top, margin_bottom], [margin_left, margin_right], [0, 0]])
        img_height += margin_top + margin_bottom
        img_width += margin_left + margin_right
    else:
        img_filters_crop = img_filters[margin_top:img_height - margin_bottom, margin_left:img_width - margin_right]
    # Make tensor of image patches
    # This could be replaced with tf.while_loop and tf.TensorArray
    img_extend = tf.stack([img_pad[i:(img_height - (FILTER_HEIGHT - i - 1))] for i in range(FILTER_HEIGHT)], axis=2)
    img_extend = tf.stack([img_extend[:, i:(img_width - (FILTER_WIDTH - i - 1))] for i in range(FILTER_WIDTH)], axis=3)
    # Compute "convolution" result
    img_result = tf.einsum('hwpqc,hwpqcd->hwd', img_extend, img_filters_crop)
    # Or with multiplication and reduction
    img_result = tf.reduce_sum(img_extend[..., tf.newaxis] * img_filters_crop, axis=(2, 3))
    return img_result

# Input
imgs = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, IN_CHANNELS])
filters = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS])
# Compute "convolution" with mapping
result = tf.map_fn(lambda img: filter_img(img, mode_same), imgs)

# Test
with tf.Session() as sess:
    imgs_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, IN_CHANNELS))
    filters_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS))
    value = sess.run(result, feed_dict={imgs: imgs_random, filters: filters_random})
    print(value.shape)
    # (10, 91, 194, 5) with mode_same=False, (10, 100, 200, 5) with mode_same=True

如果我对您的理解正确，则应该执行所需的操作。这可能不是最有效的方法，但是我不确定使用标准TensorFlow操作是否可以更快地完成它。

import tensorflow as tf
import numpy as np

mode_same = False  # True to make output same size as input
BATCH_SIZE = 10
HEIGHT = 100
WIDTH = 200
IN_CHANNELS = 3
FILTER_HEIGHT = 10
FILTER_WIDTH = 7
OUT_CHANNELS = 5
# Input
imgs = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, IN_CHANNELS])
filters = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS])
# Shapes
imgs_shape = tf.shape(imgs)
img_height = imgs_shape[1]
img_width = imgs_shape[2]
filters_shape = tf.shape(filters)
filter_height = filters_shape[3]
filter_width = filters_shape[4]
# Image margins to pad or crop
margin_bottom = filter_height // 2
margin_top = tf.maximum(filter_height - margin_bottom - 1, 0)
margin_right = filter_width // 2
margin_left = tf.maximum(filter_width - margin_right - 1, 0)
# Pad or crop depending on "same" or "valid" mode
imgs_pad = imgs
filters_crop = filters
if mode_same:
    imgs_pad = tf.pad(imgs, [[0, 0], [margin_top, margin_bottom], [margin_left, margin_right], [0, 0]])
    img_height += margin_top + margin_bottom
    img_width += margin_left + margin_right
else:
    filters_crop = filters[:, margin_top:img_height - margin_bottom, margin_left:img_width - margin_right]
# Make tensor of image patches
# This could be replaced with tf.while_loop and tf.TensorArray
imgs_extend = tf.stack([imgs_pad[:, i:(img_height - (FILTER_HEIGHT - i - 1))] for i in range(FILTER_HEIGHT)], axis=3)
imgs_extend = tf.stack([imgs_extend[:, :, i:(img_width - (FILTER_WIDTH - i - 1))] for i in range(FILTER_WIDTH)], axis=4)
# Compute "convolution" result
result = tf.einsum('ahwpqc,ahwpqcd->ahwd', imgs_extend, filters_crop)
# Test
with tf.Session() as sess:
    imgs_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, IN_CHANNELS))
    filters_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS))
    value = sess.run(result, feed_dict={imgs: imgs_random, filters: filters_random})
    print(value.shape)
    # (10, 91, 194, 5) with mode_same=False, (10, 100, 200, 5) with mode_same=True

以函数为内核的卷积

1 个答案: