python 2d卷积优化

我对图像卷积感兴趣。这是我使用 3x3 内核执行卷积的代码。我正在寻找有关如何使其运行更快的任何想法。

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
img = mpimg.imread('benfrank.png')
imgCopy = img.copy()
Width = 1200
Height = 1464
x1 = 0
y1 = 0
cWidth = 3
cHeight = 3
convul = np.array([[0,0,-5],

summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0]

def convulute3x3(x,y):
    global convul
    global img,imgCopy, Width, Height, summ
    i = x
    j = y
    if(i < 1 or i > Width-2 ):
    elif(j < 1 or j > Height-2 ):
    for c in range(3):
        n11 = img[j-1,i-1,c]*convul[0,0]
        n22 = img[j-1,i,c]*convul[1,0]
        n33 = img[j-1,i+1,c]*convul[2,0]
        n44= img[j,i-1,c]*convul[0,1]
        n55 = img[j,i,c]*convul[1,1]
        n66 = img[j,i+1,c]*convul[2,1]
        n77 = img[j+1,i-1,c]*convul[0,2]
        n88 = img[j+1,i,c]*convul[1,2]
        n99 = img[j+1,i+1,c]*convul[2,2]   
        color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ       
        imgCopy[j,i,c] = color               
for x in img:
    for y in x:
        x1 = x1+1
    y1 = y1+1

正如@Reti43 在评论中提到的那样,已经存在可以这样做的库,但我怀疑您只是想尝试一些自制的实现。

我也对如何在 Python 中手动实现卷积感兴趣。 Python 循环非常慢,如果您关心速度,则应远离纯 Python 循环,而应坚持使用更多矢量化方法。

到目前为止我所拥有的最好的是使用 numpy.lib.stride_tricks.as_strided,它允许您获得非常定制的 numpy 数组视图。我使用 as_strided 获得图像的滑动窗口视图,然后使用 np.tensordot 与内核进行“更一般的矩阵乘法”(docs)。此外,numpy 1.20 (iirc) 有 numpy.lib.stride_tricks.sliding_window_view,这是我下面代码的不太通用的版本(截至目前),因为它不能进行自定义步幅。

import numpy as np 
from numpy.lib.stride_tricks import as_strided

def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    imgChannels, imgRows, imgCols = x.shape
    _, kernelRows, kernelCols = k.shape
    u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
    return as_strided(x,
        shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols), 
        strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)

def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    Performs 2d convolution on images with arbitrary number of channels where you can
    specify the strides as well. 

    x: np.ndarray, image array of shape (C x N x M), where C is number of channels
    k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
    rowstride: int, "vertical" step size
    colstride: int, "horizontal" step size
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    return np.tensordot(sliding_window_view, k, axes=3)

x = np.array([


k = np.array([

]) / 8

#[[1.    1.25  1.5  ]
# [2.    2.625 3.25 ]
# [3.    4.    5.   ]]

#[[1.  1.5]
# [3.  5. ]]


我实现了一个 ascii 可视化的东西来检查滑动窗口是否正确:

import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    x = x.copy().astype(object)
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
    r = np.full(sliding_window_view.shape[:2], np.nan)
    with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
        for i, row in enumerate(sliding_window_view):
            for j, window in enumerate(row):
                temp = window.copy()
                r[i,j] = np.tensordot(window, k, axes=3)
                window[...] = highlighter(window)
                window[...] = temp
    return r


