(对不起,如果已经发布了类似的问题 - 我不确定要搜索什么。)
您好,
我使用PyOpenCl并且当我把它放在一个函数中时,我的OpenCl内核代码的行为不同(错误),而不是它的主内核程序的一部分。它可以在我戴有Nvidia GPU的戴尔笔记本电脑上正常工作,但不能在戴尔GPU的另一台笔记本电脑上正常工作。两者都在使用Ubuntu 16.04。 我正在编写一个非常粗略的图像过滤功能,以平滑地将像素偏移一个非整数值,例如。通过(.25,0)像素翻译图像会产生这种效果:`newPixelHere = oldPixelHere * .75 + oldPixelToTheLeft * .25。
我附上了代码的redux版本,其中一个简单的3d数组用作彩色图像的替代品。正如我在代码注释中指出的那样,函数filterImg()包含与主体中注释掉的代码相同的代码,但是主体工作,而函数没有 - 它只是设置" ret& #34;如果我改变" i< 4"而且,该函数的行为与预期的一致。到"我< 3"在主循环中,或者如果我避免使用整数/和%。
我刚刚开始使用OpenCl编程,但我确实不在其中,但我已经阅读了一些关于在OpenCl中谨慎使用循环的内容。同样,这适用于我的Nvidia GPU,但不适用于我的戴尔。
最后,尽管OpenCl在我的戴尔GPU上运行正常直到出现此问题,但在运行OpenCl时我收到了以下警告:
beignet-opencl-icd: no supported GPU found, this is probably the wrong opencl-icd package for this hardware
(If you have multiple ICDs installed and OpenCL works, you can ignore this message)
由于我认为可能安装了多个ICD,所以我并不担心这个问题,因为/ etc / OpenCL / vendors /中有多个条目:
~:ls -1 /etc/OpenCL/vendors/
intel-beignet-x86_64-linux-gnu.icd
mesa.icd
~:
......但是,我真的不知道自己在做什么。任何帮助表示感谢,非常感谢。
杰里米。
python代码:
#!/usr/bin/python
import pyopencl as cl
import numpy as np
def printLs3d(ls3d):
# Utility to print "red" channel of 3d array.
for y in range(len(ls3d[0])):
print
for x in range(len(ls3d)):
if ls3d[x][y][0] == 0:
print "...",
else:
print "%03d" % ls3d[x][y][0],
def shadeImg(lsIn):
printLs3d(lsIn)
cntxt = cl.create_some_context()
queue = cl.CommandQueue(cntxt)
res = (len(lsIn)-1, len(lsIn[0])-1)
print
# Inputs
srcImgAr_buf = cl.Buffer(cntxt, cl.mem_flags.READ_ONLY |
#cl.mem_flags.COPY_HOST_PTR,hostbuf=np.array(list(pygame.surfarray.array3d(srcImg))))
cl.mem_flags.COPY_HOST_PTR,hostbuf=np.array(lsIn))
# Outputs
shadedImg = np.zeros((len(lsIn), len(lsIn[0]), len(lsIn[0][0])), dtype=np.uint8)
shadedImg_buf = cl.Buffer(cntxt, cl.mem_flags.WRITE_ONLY |
cl.mem_flags.COPY_HOST_PTR,hostbuf=shadedImg)
kernelPath = "/home/jeremy/dev/warp/testOpenClLoops/testOpenClLoops.c"
with open(kernelPath) as f:
kernel = "".join(f.readlines())
bld = cl.Program(cntxt, kernel).build()
launch = bld.krShadeImg(
queue,
#srcImgAr.shape,
res,
None,
np.int32(res[0]),
np.int32(res[1]),
srcImgAr_buf,
shadedImg_buf)
launch.wait()
cl.enqueue_read_buffer(queue, shadedImg_buf, shadedImg).wait()
printLs3d(shadedImg)
testIn = [
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [100, 100, 100], [100, 100, 100], [100, 100, 100], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [100, 100, 100], [100, 100, 100], [100, 100, 100], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [100, 100, 100], [100, 100, 100], [100, 100, 100], [0, 0, 0], [100, 100, 100], [100, 100, 100], [0, 0, 0]],
[[0, 0, 0], [100, 100, 100], [100, 100, 100], [100, 100, 100], [0, 0, 0], [100, 100, 100], [100, 100, 100], [0, 0, 0]],
[[0, 0, 0], [100, 100, 100], [100, 100, 100], [100, 100, 100], [0, 0, 0], [100, 100, 100], [100, 100, 100], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [100, 100, 100], [100, 100, 100], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
testAr = np.array(testIn, dtype=np.uint8)
shadeImg(testAr)
" /home/jeremy/dev/warp/testOpenClLoops/testOpenClLoops.c"中包含的C ++内核代码:
void setArrayCell(int x, int y, int xres, int yres,
uchar* val,
__global uchar* ret)
{
if (x >= 0 && x < xres && y >= 0 && y < yres) {
int i = (x * yres + y) * 3;
ret[i] = val[0];
ret[i+1] = val[1];
ret[i+2] = val[2];
}
}
void filterImg (unsigned int x, unsigned int y, int xres, int yres,
__global uchar* img,
uchar* ret) {
// This function contains identical code to the commented-out
// block in the main body, except some comments.
// The main body code works; this function doesn't. Why???
// Offset xy lookup to weigh influence of each neighbour.
// If xOfs == yOfs == 0, pixel (x,y) gets full weight.
// If xOfs == yOfs == 1, pixel (x+1,y+1) gets full weight.
float xOfs = .75f;
float yOfs = .5f;
for (int i=0; i<4; i++) { // ***WORKS IF YOU CHANGE 4 TO 3!
// Sample 2x2 grid of neighbouring pixels in this order:
// (x,y), (x+1,y), (x,y+1), (x+1,y+1)
int dx = i%2; // ***WORKS IF YOU SET dx AND dy TO CONSTANT 0 OR 1
int dy = i/2;
int xx = x + dx;
int yy = y + dy;
// Calculate weight of this pixel (not 100% sure this is correct)
float wx = dx == 0 ? xOfs : 1.0f-xOfs;
float wy = dy == 0 ? yOfs : 1.0f-yOfs;
float k = wy*wx;
int address = (xx * yres + yy) * 3;
for (int j=0; j<3; j++) {
ret[j] += img[address+j]*k;
}
}
}
__kernel void krShadeImg(
int xres,
int yres,
__global uchar* img,
__global uchar* shadedImg)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
if (x < xres-1 && y < yres-1) {
uchar ret[3] = {0, 0, 0};
filterImg(x, y, xres, yres, img, ret);
// *** TO MAKE THE CODE WORK, comment out the above
// *** line and uncomment the following block.
/*
// Offset xy lookup to weigh influence of each neighbour.
// If xOfs == yOfs == 0, pixel (x,y) gets full weight.
// If xOfs == yOfs == 1, pixel (x+1,y+1) gets full weight.
float xOfs = .75f;
float yOfs = .5f;
for (int i=0; i<4; i++) {
// Sample 2x2 grid of neighbouring pixels in this order:
// (x,y), (x+1,y), (x,y+1), (x+1,y+1)
int dx = i%2;
int dy = i/2;
int xx = x + dx;
int yy = y + dy;
// Calculate weight of this pixel (not 100% sure this is correct)
float wx = dx == 0 ? xOfs : 1.0f-xOfs;
float wy = dy == 0 ? yOfs : 1.0f-yOfs;
float k = wy*wx;
int address = (xx * yres + yy) * 3;
for (int j=0; j<3; j++) {
ret[j] += img[address+j]*k;
}
}
*/
setArrayCell(x, y, xres, yres, ret, shadedImg);
}
}
使用filterImg函数(已损坏)时的输出:
Before OpenCl process:
... ... ... ... ... ... ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... ... ... ... ... ... ... ... ... ...
... ... ... 100 100 100 100 ... ... ...
... ... ... 100 100 100 100 ... ... ...
... ... ... ... ... ... ... ... ... ...
After OpenCl process:
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
049 049 049 049 049 049 049 049 049 ...
... ... ... ... ... ... ... ... ... ...
使用主体代码时的输出(正确):
Before OpenCl process:
... ... ... ... ... ... ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... 100 100 100 100 100 ... ... ... ...
... ... ... ... ... ... ... ... ... ...
... ... ... 100 100 100 100 ... ... ...
... ... ... 100 100 100 100 ... ... ...
... ... ... ... ... ... ... ... ... ...
After OpenCl process:
012 049 049 049 049 037 ... ... ... ...
024 098 098 098 098 074 ... ... ... ...
024 098 098 098 098 074 ... ... ... ...
012 049 049 049 049 037 ... ... ... ...
... ... 012 049 049 049 037 ... ... ...
... ... 024 098 098 098 074 ... ... ...
... ... 012 049 049 049 037 ... ... ...
... ... ... ... ... ... ... ... ... ...