我对图像卷积感兴趣。这是我使用 3x3 内核执行卷积的代码。我正在寻找有关如何使其运行更快的任何想法。
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
img = mpimg.imread('benfrank.png')
imgCopy = img.copy()
Width = 1200
Height = 1464
x1 = 0
y1 = 0
cWidth = 3
cHeight = 3
convul = np.array([[0,0,-5],
[0,1,0],
[-5,0,0]])
summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0]
def convulute3x3(x,y):
global convul
global img,imgCopy, Width, Height, summ
i = x
j = y
if(i < 1 or i > Width-2 ):
return
elif(j < 1 or j > Height-2 ):
return
for c in range(3):
n11 = img[j-1,i-1,c]*convul[0,0]
n22 = img[j-1,i,c]*convul[1,0]
n33 = img[j-1,i+1,c]*convul[2,0]
n44= img[j,i-1,c]*convul[0,1]
n55 = img[j,i,c]*convul[1,1]
n66 = img[j,i+1,c]*convul[2,1]
n77 = img[j+1,i-1,c]*convul[0,2]
n88 = img[j+1,i,c]*convul[1,2]
n99 = img[j+1,i+1,c]*convul[2,2]
color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ
imgCopy[j,i,c] = color
for x in img:
x1=0
for y in x:
convulute3x3(x1,y1)
x1 = x1+1
y1 = y1+1
plt.imshow(imgCopy)
plt.show()
答案 0 :(得分:0)
正如@Reti43 在评论中提到的那样,已经存在可以这样做的库,但我怀疑您只是想尝试一些自制的实现。
我也对如何在 Python 中手动实现卷积感兴趣。 Python 循环非常慢,如果您关心速度,则应远离纯 Python 循环,而应坚持使用更多矢量化方法。
到目前为止我所拥有的最好的是使用 numpy.lib.stride_tricks.as_strided
,它允许您获得非常定制的 numpy 数组视图。我使用 as_strided
获得图像的滑动窗口视图,然后使用 np.tensordot
与内核进行“更一般的矩阵乘法”(docs)。此外,numpy 1.20 (iirc) 有 numpy.lib.stride_tricks.sliding_window_view
,这是我下面代码的不太通用的版本(截至目前),因为它不能进行自定义步幅。
import numpy as np
from numpy.lib.stride_tricks import as_strided
def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
imgChannels, imgRows, imgCols = x.shape
_, kernelRows, kernelCols = k.shape
u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
return as_strided(x,
shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols),
strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)
)
def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
"""
Performs 2d convolution on images with arbitrary number of channels where you can
specify the strides as well.
x: np.ndarray, image array of shape (C x N x M), where C is number of channels
k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
rowstride: int, "vertical" step size
colstride: int, "horizontal" step size
"""
sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
return np.tensordot(sliding_window_view, k, axes=3)
x = np.array([
[[1,1,1,1],
[1,1,1,1],
[2,2,2,2],
[2,2,2,2]],
[[1,1,2,2],
[1,1,2,2],
[4,4,8,8],
[4,4,8,8]]
])
k = np.array([
[[1,1],
[1,1]],
[[1,1],
[1,1]]
]) / 8
print(conv2d(x,k,1,1))
#[[1. 1.25 1.5 ]
# [2. 2.625 3.25 ]
# [3. 4. 5. ]]
print(conv2d(x,k,2,2))
#[[1. 1.5]
# [3. 5. ]]
我实现了一个 ascii 可视化的东西来检查滑动窗口是否正确:
import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
x = x.copy().astype(object)
sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
r = np.full(sliding_window_view.shape[:2], np.nan)
with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
for i, row in enumerate(sliding_window_view):
for j, window in enumerate(row):
temp = window.copy()
r[i,j] = np.tensordot(window, k, axes=3)
window[...] = highlighter(window)
print(f"\x1b[JChannels:\n{x}\n\nResult:\n{str(r)}\x1b[{x.shape[0]*x.shape[1]+len(r)+4}A")
window[...] = temp
time.sleep(0.69)
print(f"\x1b[{x.shape[0]*x.shape[1]+len(r)+4}B")
return r
print("Output:\n",conv2d(x,k,1,1))