我在OpenCL上工作,而我只有一个CPU i3核心Duo =>我只拥有一台设备(我的CPU)。所以基本上,我猜我的HOST(cpu)也将是DEVICE。我尝试启动内核但分配给DEVICE(也是主机)的任务永远不会终止。在考虑了这个问题后,很明显等待DEVICE(本身)完成的HOST是不可能的。但有人知道克服这个问题的方法吗?也许使用clCreateSubDevice,将我唯一的设备细分为主机和真实设备?
答案 0 :(得分:1)
你会在下面找到我的类似java的代码,以便让我知道我的错误。实际上当我在没有clFinish(commandQueue)的情况下运行以下代码时; (在代码底部),我有以下输出:
我使用平台Intel(R)OpenCL 排队内核...... 暂停15000毫秒。 任务INCOMPLETE
如果我添加clFinish(commandQueue),我有输出,我的任务完成了:
我使用平台Intel(R)OpenCL 排队内核...... 事件内核状态:CL_COMPLETE事件ID:10运行时:2.631ms 暂停15000毫秒。 任务完成
那么为什么单个clFinish()指令允许我完成任务?提前谢谢你的解释。
public class Test_CPU
{
private static String programSource0 =
"__kernel void vectorAdd(" +
" __global const float *a,"+
" __global const float *b, " +
" __global float *c)"+
"{"+
" int gid = get_global_id(0);"+
" c[gid] = a[gid]+b[gid];"+
"}";
/**
* The entry point of this sample
*
* @param args Not used
*/
public static void main(String args[])
{
/**
* Callback function that is called when the event ev has the event_status status and will display the runtime of execution kernel in seconds
* @param event: the event
* @param event_status: status of the event
* @param user_data: data given by the user is an integer tag that can be used to match profiling output to the associated kernel
* @return: none
*/
EventCallbackFunction kernelCommandEvent = new EventCallbackFunction()
{
@Override
public void function(cl_event event, int event_status, Object user_data)
{
int evID = (int)user_data;
long[] ev_start_time = new long[1];
Arrays.fill(ev_start_time, 0);
long[] ev_end_time = new long[1];
Arrays.fill(ev_end_time, 0);
long[] return_bytes = new long[1];
double run_time = 0.0;
clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_QUEUED, Sizeof.cl_long, Pointer.to(ev_start_time), return_bytes);
clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_END , Sizeof.cl_long, Pointer.to(ev_end_time), return_bytes);
run_time = (double)(ev_end_time[0] - ev_start_time[0]);
System.out.println("Event kernel status: " + CL.stringFor_command_execution_status(event_status) + " event ID: " + evID + " runtime: " + String.format("%8.3f", (run_time*1.0e-6)) + " ms.");
}
};
// Initialize the input data
int n = 1000000;
float srcArrayA[] = new float[n];
float srcArrayB[] = new float[n];
float dstArray0[] = new float[n];
for (int i=0; i<srcArrayA.length; i++)
{
srcArrayA[i] = i;
srcArrayB[i] = i;
}
Pointer srcA = Pointer.to(srcArrayA);
Pointer srcB = Pointer.to(srcArrayB);
Pointer dst0 = Pointer.to(dstArray0);
// The platform, device type and device number that will be used
final int platformIndex = 1;
final long deviceType = CL_DEVICE_TYPE_CPU;
final int deviceIndex = 0;
// Enable exceptions and subsequently omit error checks in this sample
CL.setExceptionsEnabled(true);
// Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
// Obtain a platform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];
long size[] = new long[1];
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, null, size);
// Create a buffer of the appropriate size and fill it with the info
byte buffer[] = new byte[(int)size[0]];
clGetPlatformInfo(platform, CL_PLATFORM_NAME, buffer.length, Pointer.to(buffer), null);
// Create a string from the buffer (excluding the trailing \0 byte)
System.out.println("I use the platform " + new String(buffer, 0, buffer.length-1));
// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
// Obtain the number of devices for the platform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
// Obtain a device ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];
// Create a context for the selected device
cl_context context = clCreateContext(contextProperties, 1, new cl_device_id[]{device}, null, null, null);
// Create a command-queue, with profiling info enabled
long properties = 0;
properties |= CL.CL_QUEUE_PROFILING_ENABLE;
cl_command_queue commandQueue = CL.clCreateCommandQueue(context, devices[0], properties, null);
// Allocate the buffer memory objects
cl_mem srcMemA = CL.clCreateBuffer(context, CL.CL_MEM_READ_ONLY | CL.CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcA, null);
cl_mem srcMemB = CL.clCreateBuffer(context, CL.CL_MEM_READ_ONLY | CL.CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcB, null);
cl_mem dstMem0 = CL.clCreateBuffer(context, CL.CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null);
// Create and build the the programs and the kernels
cl_program program0 = CL.clCreateProgramWithSource(context, 1, new String[]{ programSource0 }, null, null);
// Build the programs
CL.clBuildProgram(program0, 0, null, null, null, null);
// Create the kernels
cl_kernel kernel0 = CL.clCreateKernel(program0, "vectorAdd", null);
// Set the arguments
CL.clSetKernelArg(kernel0, 0, Sizeof.cl_mem, Pointer.to(srcMemA));
CL.clSetKernelArg(kernel0, 1, Sizeof.cl_mem, Pointer.to(srcMemB));
CL.clSetKernelArg(kernel0, 2, Sizeof.cl_mem, Pointer.to(dstMem0));
// Set work-item dimensions and execute the kernels
long globalWorkSize[] = new long[]{n};
System.out.println("Enqueueing kernels...");
cl_event[] myEventID = new cl_event[1];
myEventID[0] = new cl_event();
clEnqueueNDRangeKernel(commandQueue, kernel0, 1, null, globalWorkSize, null, 0, null, myEventID[0]);
int ID[] = new int[1];
ID[0] = 10;
clSetEventCallback(myEventID[0], CL_COMPLETE, kernelCommandEvent, ID[0]);
clFinish(commandQueue);
System.out.println("Pause for 15000 ms.");
try
{
Thread.sleep(15000);
}
catch(InterruptedException iEx)
{
iEx.printStackTrace();
}
// See if task completed
int[] ok = new int[1];
Arrays.fill(ok, 0);
clGetEventInfo(myEventID[0], CL_EVENT_COMMAND_EXECUTION_STATUS, Sizeof.cl_int, Pointer.to(ok), null);
if (ok[0] == CL_COMPLETE) System.out.println("Task COMPLETE");else System.out.println("Task INCOMPLETE");
}
}
答案 1 :(得分:-1)
我认为我的想法并不是那么糟糕,因为实际上,你需要以编程方式强制主机切换到DEVICE工作,在这种情况下,HOST和DEVICE都是相同的硬件。
实际上,可以将HOST作为DEVICE,但是为了让DEVICE工作,你需要调用至少一个阻塞函数(clFinish()或clEnqueueRead(... CL_TRUE,.. ))。否则,HOST将始终有效,永远不会切换到DEVICE工作。我试图添加一个sleep()函数,但它不起作用,你真的需要添加一个阻塞的opencl函数。
无论如何,谢谢!