编辑
我对内核的char数组输入进行了一些测试。我注意到了一个相当奇怪的行为:考虑内核程序和随附的PyOpenCL代码:
#!/usr/bin/env python3
import pyopencl as cl
import numpy as np
import seq
# Write down our kernel as a multiline string.
kernel = """
__kernel void dragon(
const int N,
__global char *AplusB,
__global char *AminusB,
__global char *plusMinus,
__global char *minusMinus,
__global char *output
)
{
int idx = get_global_id(0);
if (idx < N){
char b =AplusB[12];
printf("\\ %c \\n",b);
}
}
"""
#declare constants
number_of_expansions = 4
total_probelem_size =7
resulting_str_size=62
# Step 1: Create a context.
# This will ask the user to select the device to be used.
context = cl.create_some_context()
# Create a queue to the device.
queue = cl.CommandQueue(context)
# Create the program.
program = cl.Program(context, kernel).build()
# Create the input string
AplusB =np.array(('FX+YF++-FX-YF+'))
AminusB= np.array(('FX+YF+--FX-YF+'))
plusMinus= np.array(('+-'))
minusMinus = np.array(('--'))
# Send the data to the guest memory.
mf = cl.mem_flags
AplusBBuf = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=AplusB)
AminusBBuf= cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=AminusB)
plusMinusBuf = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=plusMinus)
minusMinusBuf = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=minusMinus)
# Create the memory on the device to put the result into.
out_buf = cl.Buffer(context, mf.WRITE_ONLY, size=resulting_str_size)
copied_str = np.zeros(resulting_str_size)
# Initiate the kernel.
dragon = program.dragon
dragon.set_scalar_arg_dtypes([np.int32, None, None,None,None,None])
global_work_size = total_probelem_size
# Execute C = A * B.
dragon(queue, (global_work_size,), None,total_probelem_size,AplusBBuf, AminusBBuf,plusMinusBuf,minusMinusBuf,out_buf)
# Wait for the queue to be completely processed.
queue.finish()
# Read the array from the device.
cl.enqueue_copy(queue, copied_str, out_buf).wait()
print (copied_str)
"https://stackoverflow.com/questions/17603740/how-to-pass-a-list-of-strings-to-an-opencl-kernel-using-pyopencl"
请注意,在内核中,我尝试打印出缓冲区AplusB
中的字符。看来我只能打印索引为0、4、8和12的字符。AplusB
的大小为14。这可能是对此行为的解释。