该函数如下(摘自http://arrayfire.org/docs/interop_opencl.htm)
main
功能 int main() {
size_t length = 10;
// Create ArrayFire array objects:
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
// ... additional ArrayFire operations here
// 2. Obtain the device, context, and queue used by ArrayFire
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
// For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
return 0;
}
效果很好,因为B的最终值与A的最终值一致,即
af_print(B);
匹配A,但是当我分别编写函数时,如下所示:
main
功能 arraycopy
功能
void arraycopy(af::array A, af::array B,size_t length) {
// 2. Obtain the device, context, and queue used by ArrayFire
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
// For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
}
main
功能
int main()
{
size_t length = 10;
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
arraycopy(A, B, length);
af_print(B);//does not match A
}
B的最终值没有改变,为什么会这样?我该怎么做才能使它正常工作?
答案 0 :(得分:2)
您通过值而不是通过引用将af::array
传递给arraycopy
,因此A
中的B
和main
保持不变,无论您在{ {1}}。您可以在引用列表中通过引用传递arraycopy
:B
。我还建议通过const引用将af::array &B
作为自定义传递,以避免不必要的复制(A
)。
答案 1 :(得分:1)
您看到的行为背后的原因是引用计数。但这不是肯定的错误,并且与C ++语言行为保持一致。
使用分配或等效操作创建的af :: array 对象仅执行元数据的复制并保留共享指针。
在具有功能的代码版本中, B按值传递,因此,内部 B arraycopy中的B 函数是元数据的副本> B from main 函数,并共享指向main数组B中数据的指针。在这一点上,如果用户执行device
调用以获取指针,我们假定它是用于写入该指针的位置的。因此,在数组对象上调用 device 时,该对象的共享指针的引用计数大于1,我们将复制原始数组(从main到B),然后将指针返回该内存。因此,如果在内部进行af_print(B)
,将看到正确的值。这本质上是写时复制-由于B是按值传递的,因此您无法从arraycopy函数中看到B的修改结果。
在我说的第一行中,它与C ++的行为保持一致,因为如果需要从函数中修改对象B,则必须通过引用将其传递。按值传递值只会使函数内部的值发生变化-这正是ArrayFire处理af :: array对象的方式。
希望能消除混乱。
Pradeep。 ArrayFire开发团队。