奖金

Question

我对图像卷积感兴趣。这是我使用 3x3 内核执行卷积的代码。我正在寻找有关如何使其运行更快的任何想法。

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
img = mpimg.imread('benfrank.png')
imgCopy = img.copy()
Width = 1200
Height = 1464
x1 = 0
y1 = 0
cWidth = 3
cHeight = 3
convul = np.array([[0,0,-5],
                  [0,1,0],
                  [-5,0,0]])

summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0]

def convulute3x3(x,y):
    global convul
    global img,imgCopy, Width, Height, summ
    
    i = x
    j = y
    if(i < 1 or i > Width-2 ):
        return
    elif(j < 1 or j > Height-2 ):
        return
    for c in range(3):
        n11 = img[j-1,i-1,c]*convul[0,0]
        n22 = img[j-1,i,c]*convul[1,0]
        n33 = img[j-1,i+1,c]*convul[2,0]
        n44= img[j,i-1,c]*convul[0,1]
        n55 = img[j,i,c]*convul[1,1]
        n66 = img[j,i+1,c]*convul[2,1]
        n77 = img[j+1,i-1,c]*convul[0,2]
        n88 = img[j+1,i,c]*convul[1,2]
        n99 = img[j+1,i+1,c]*convul[2,2]   
        color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ       
        imgCopy[j,i,c] = color               
for x in img:
    x1=0
    for y in x:
        convulute3x3(x1,y1) 
        x1 = x1+1
    y1 = y1+1
plt.imshow(imgCopy)
plt.show()

Answer 1

正如@Reti43 在评论中提到的那样，已经存在可以这样做的库，但我怀疑您只是想尝试一些自制的实现。

我也对如何在 Python 中手动实现卷积感兴趣。 Python 循环非常慢，如果您关心速度，则应远离纯 Python 循环，而应坚持使用更多矢量化方法。

到目前为止我所拥有的最好的是使用 numpy.lib.stride_tricks.as_strided，它允许您获得非常定制的 numpy 数组视图。我使用 as_strided 获得图像的滑动窗口视图，然后使用 np.tensordot 与内核进行“更一般的矩阵乘法”（docs）。此外，numpy 1.20 (iirc) 有 numpy.lib.stride_tricks.sliding_window_view，这是我下面代码的不太通用的版本（截至目前），因为它不能进行自定义步幅。

import numpy as np 
from numpy.lib.stride_tricks import as_strided


def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    imgChannels, imgRows, imgCols = x.shape
    _, kernelRows, kernelCols = k.shape
    u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
    return as_strided(x,
        shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols), 
        strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)
    )


def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    """
    Performs 2d convolution on images with arbitrary number of channels where you can
    specify the strides as well. 

    x: np.ndarray, image array of shape (C x N x M), where C is number of channels
    k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
    rowstride: int, "vertical" step size
    colstride: int, "horizontal" step size
    """
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    return np.tensordot(sliding_window_view, k, axes=3)


x = np.array([
    [[1,1,1,1],
     [1,1,1,1],
     [2,2,2,2],
     [2,2,2,2]], 

    [[1,1,2,2],
     [1,1,2,2],
     [4,4,8,8],
     [4,4,8,8]]
])


k = np.array([
    [[1,1],  
     [1,1]],

    [[1,1],  
     [1,1]]
]) / 8

print(conv2d(x,k,1,1))
#[[1.    1.25  1.5  ]
# [2.    2.625 3.25 ]
# [3.    4.    5.   ]]

print(conv2d(x,k,2,2))
#[[1.  1.5]
# [3.  5. ]]

奖金

我实现了一个 ascii 可视化的东西来检查滑动窗口是否正确：

import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    x = x.copy().astype(object)
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
    r = np.full(sliding_window_view.shape[:2], np.nan)
    with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
        for i, row in enumerate(sliding_window_view):
            for j, window in enumerate(row):
                temp = window.copy()
                r[i,j] = np.tensordot(window, k, axes=3)
                window[...] = highlighter(window)
                print(f"\x1b[JChannels:\n{x}\n\nResult:\n{str(r)}\x1b[{x.shape[0]*x.shape[1]+len(r)+4}A")
                window[...] = temp
                time.sleep(0.69)
    print(f"\x1b[{x.shape[0]*x.shape[1]+len(r)+4}B")
    return r

print("Output:\n",conv2d(x,k,1,1))

python 2d卷积优化

1 个答案:

奖金