Question

我有一个张量，例如一批灰度图像（所以是BxHxW张量）。

我想将张量沿着H和W分别切成N个和M个，均匀间隔（或多或少均匀）的窗口，并将tf.math.bincount应用于每个窗口。 <-对不起，这门英语有些令人费解。

下面是我目前正在做的事情，但是构建起来很慢，并且创建了一个肿的张量流图。 有更好的方法吗？

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)
    for k in range(LEN):
        for i in range(N):
            for j in range(M):
                hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
                if j == 0:
                    row = tf.expand_dims(hist, 0)
                else:
                    row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
            if i == 0:
                col = tf.expand_dims(row, 0)
            else:
                col = tf.concat([col, tf.expand_dims(row, 0)], 0)
        if k == 0:
            out = tf.expand_dims(col, 0)
        else:
            out = tf.concat([out, tf.expand_dims(col, 0)], 0)
    return out

示例

# Images is 10x100x100
Out = makeHistBins(Images, 5)
# Out should be 10x5x5x256, where bincount was applied to 20x20 windows along each image.

Answer 1

您应该使用tf.extract_image_patches。此操作会从输入图像中收集色块，就像应用卷积一样。它支持窗口滑动和重叠。

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()
    HistH = math.ceil(H / N)
    HistW = math.ceil(W / M)

    inPut = tf.pad(inPut+1,[[0,0],[math.ceil((HistH*N-H)/2),math.floor((HistH*N-H)/2)],[math.ceil((HistW*M-W)/2),math.floor((HistW*M-W)/2)]])

    # extract inPut to (-1,N,M,HistH*HistW)
    # the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
    # the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
    # ...
    inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
                               ksizes=[1, HistH, HistW, 1],
                               strides=[1, HistH, HistW, 1],
                               rates=[1, 1, 1, 1],
                               padding='VALID')

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

实验比较

您可以同时比较三种方法的时间消耗。

import tensorflow as tf
import math
import time
import numpy as np

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)
    for k in range(LEN):
        for i in range(N):
            for j in range(M):
                hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
                if j == 0:
                    row = tf.expand_dims(hist, 0)
                else:
                    row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
            if i == 0:
                col = tf.expand_dims(row, 0)
            else:
                col = tf.concat([col, tf.expand_dims(row, 0)], 0)
        if k == 0:
            out = tf.expand_dims(col, 0)
        else:
            out = tf.concat([out, tf.expand_dims(col, 0)], 0)
    return out

def makeHistBins2(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)

    splits_h = [int(np.floor((ImgH*(i+1))/N)- np.floor((ImgH*i)/N)) for i in range(N)]
    splits_w = [int(np.floor((ImgW*(i+1))/M)- np.floor((ImgW*i)/M)) for i in range(M)]

    # splits_h = np.array([ImgH // N for _ in range(N)])
    # if ImgH - sum(splits_h) > 0:
    #     splits_h[:(ImgH - sum(splits_h))] += 1
    # 
    # splits_w = np.array([ImgW // M for _ in range(M)])
    # if ImgW - sum(splits_w) > 0:
    #     splits_w[:ImgW - sum(splits_w)] += 1

    # print(splits_w)
    out = tf.map_fn(
        lambda x: tf.stack(
            [tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
            ), inPut)
    return tf.reshape(out, [-1, len(splits_h), len(splits_w), 256])

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()
    HistH = math.ceil(H / N)
    HistW = math.ceil(W / M)

    inPut = tf.pad(inPut+1,[[0,0],[0,HistH*N-H],[0,HistW*M-H]])

    # extract inPut to (-1,N,M,HistH*HistW)
    # the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
    # the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
    # ...
    inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
                               ksizes=[1, HistH, HistW, 1],
                               strides=[1, HistH, HistW, 1],
                               rates=[1, 1, 1, 1],
                               padding='VALID')

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

Images =tf.placeholder(shape=(10,100,100),dtype=tf.int32)
out1 = makeHistBins(Images, 5)
out2 = makeHistBins2(Images, 5)
out3 = imagemakeHistBins(Images, 5)

with tf.Session() as sess:
    example = np.random.randint(0, 256, size=(10, 100, 100))
    value1,value2,value3 = sess.run([out1,out2,out3], feed_dict={Images: example})
    print(np.all(value1 == value2))
    print(np.all(value1 == value3))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out1,feed_dict={Images:example})
    time2 = time.clock()
    print('makeHistBins cost time(ms) : %.2f' % ((time2 - time1)*1000))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out2, feed_dict={Images: example})
    time2 = time.clock()
    print('makeHistBins2 cost time(ms) : %.2f' % ((time2 - time1) * 1000))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out3, feed_dict={Images: example})
    time2 = time.clock()
    print('imagemakeHistBins cost time(ms) : %.2f' % ((time2 - time1) * 1000))

结果：

# True
# True
# makeHistBins cost time(ms) : 1187.79
# makeHistBins2 cost time(ms) : 568.32
# imagemakeHistBins cost time(ms) : 74.57

imagemakeHistBins的操作时间比imagemakeHistBins和makeHistBins2的操作时间减少了一个数量级。

注释

也请注意您的提取策略。

根据您的逻辑，阻止策略如下：

ImgH = 100
N=6
start_h = [math.floor(ImgH*i/N) for i in range(N)] # [0, 16, 33, 50, 66, 83]
end_h = [math.ceil(ImgH*(i+1)/N) for i in range(N)] # [17, 34, 50, 67, 84, 100]
# [0:17, 16:34, 33:50, 50:67, 66:84, 83:100]

tf.split和tf.extract_image_patches都不支持这样的块。

splits_h = np.array([ImgH // N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
    splits_h[:(ImgH - sum(splits_h))] += 1
# [17 17 17 17 16 16]

所以我的策略是用0填充最后一个块。因为我将1的结果从MAX(256) + 1到out[:,:,:,1:]，所以不会影响最终结果的正确性。

# tf.extract_image_patches
# [17 17 17 17 17 15]

即使是解决方案

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()

    splits_h = np.array([H // N for _ in range(N)])
    if H - sum(splits_h) > 0:
        splits_h[:(H - sum(splits_h))] += 1
    splits_h = [0] + splits_h.tolist()
    splits_h = np.cumsum(splits_h)

    splits_w = np.array([W // M for _ in range(M)])
    if W - sum(splits_w) > 0:
        splits_w[:(W - sum(splits_w))] += 1
    splits_w = [0] + splits_w.tolist()
    splits_w = np.cumsum(splits_w)
    # splits_w=[  0  17  34  51  68  84 100] if inPut is (10,100,100) and M=N=6
    print(splits_w)

    inPut_new = []
    for i in range(N):
        for j in range(M):
            hist = inPut[:, splits_h[i]:splits_h[i + 1], splits_w[j]:splits_w[j + 1]]
            hist = tf.pad(hist+1, [[0, 0],
                                 [0, splits_h[1] - splits_h[i + 1] + splits_h[i]],
                                 [0, splits_w[1] - splits_w[j + 1] + splits_w[j]]])
            inPut_new.append(hist)

    inPut_new = tf.reshape(tf.transpose(tf.stack(inPut_new), [1, 0, 2, 3]), [-1, N, M, splits_h[1] * splits_w[1]])

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

Answer 2

以下将起作用。但是请确保也指定minlength参数，因为如果返回的bin计数不同，则无法将其堆叠。因此，基本上，我们要做两个for循环（一个用于拆分高度，另一个用于拆分宽度），然后使用map_fn遍历行，这为您提供了一个10, 20, 20, 256矩阵（如@giser_yugang指出，输出的末尾为256维）。而且，要使它在不均匀的分割中工作还需要一些工作。

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    #HistH = math.ceil(ImgH/N)
    #HistW = math.ceil(ImgW/M)

    splits_h = np.array([ImgH//N for _ in range(N)])
    if  ImgH - sum(splits_h) > 0:
      splits_h[:(ImgH - sum(splits_h))] += 1

    splits_w = np.array([ImgW//M for _ in range(M)])
    if ImgW - sum(splits_w) > 0:
      splits_w[:ImgW - sum(splits_w)] += 1

    print(splits_w)
    out = tf.map_fn(
        lambda x: tf.stack(
            [tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
            ), inPut)
    return tf.reshape(out, [LEN, N, M, 256])

如何在张量切片上有效地应用张量流功能？

2 个答案: