对于以下代码,OpenCL在我的虚拟机上不起作用

时间:2019-11-06 09:45:17

标签: virtual-machine gpu opencl

我在HP机器上的Oracle虚拟机上使用Ubuntu 18.04。我尝试安装并运行OpenCL代码,但收到了OpenCL返回的以下错误。我试图将sin ^ 2(i)和cos ^ 2(i)的值相加并取它们的平均值。答案是1.000,但是由于安装或机器中的某些问题,我遇到了很多错误,答案为0。

我尝试添加和删除beignet。它没有解决问题

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <CL/opencl.h>

// OpenCL kernel. Each work item takes care of one element of c
const char *kernelSource =                                       "\n" \
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable                    \n" \
"__kernel void vecAdd(  __global double *a,                       \n" \
"                       __global double *b,                       \n" \
"                       __global double *c,                       \n" \
"                       const unsigned int n)                    \n" \
"{                                                               \n" \
"    //Get our global thread ID                                  \n" \
"    int id = get_global_id(0);                                  \n" \
"                                                                \n" \
"    //Make sure we do not go out of bounds                      \n" \
"    if (id < n)                                                 \n" \
"        c[id] = a[id] + b[id];                                  \n" \
"}                                                               \n" \
                                                                "\n" ;

int main( int argc, char* argv[] )
{
    // Length of vectors
   unsigned int n = 100000;

// Host input vectors
double *h_a;
double *h_b;
// Host output vector
double *h_c;

// Device input buffers
cl_mem d_a;
cl_mem d_b;
// Device output buffer
cl_mem d_c;

cl_platform_id cpPlatform;        // OpenCL platform
cl_device_id device_id;           // device ID
cl_context context;               // context
cl_command_queue queue;           // command queue
cl_program program;               // program
cl_kernel kernel;                 // kernel

// Size, in bytes, of each vector
size_t bytes = n*sizeof(double);

// Allocate memory for each vector on host
h_a = (double*)malloc(bytes);
h_b = (double*)malloc(bytes);
h_c = (double*)malloc(bytes);

// Initialize vectors on host
int i;
for( i = 0; i < n; i++ )
{
    h_a[i] = sinf(i)*sinf(i);
    h_b[i] = cosf(i)*cosf(i);
}

size_t globalSize, localSize;
cl_int err;

// Number of work items in each local work group
localSize = 64;

// Number of total work items - localSize must be devisor
globalSize = ceil(n/(float)localSize)*localSize;

// Bind to platform
err = clGetPlatformIDs(1, &cpPlatform, NULL);

// Get ID for the device
err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

// Create a context 
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);

// Create a command queue
queue = clCreateCommandQueue(context, device_id, 0, &err);

// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1,
                        (const char **) & kernelSource, NULL, &err);

// Build the program executable
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

// Create the compute kernel in the program we wish to run
kernel = clCreateKernel(program, "vecAdd", &err);

// Create the input and output arrays in device memory for our calculation
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);

// Write our data set into the input array in device memory
err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
                               bytes, h_a, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
                               bytes, h_b, 0, NULL, NULL);

// Set the arguments to our compute kernel
err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);

// Execute the kernel over the entire range of the data set 
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
                                                          0, NULL, NULL);

// Wait for the command queue to get serviced before reading back results
clFinish(queue);

// Read the results from the device
clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
                            bytes, h_c, 0, NULL, NULL );

//Sum up vector c and print result divided by n, this should equal 1 within error
double sum = 0;
for(i=0; i<n; i++)
    sum += h_c[i];
printf("final result: %f\n", sum/n);

// release OpenCL resources
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);

//release host memory
free(h_a);
free(h_b);
free(h_c);

return 0;
}

这些是我收到的错误消息

    DRM_IOCTL_I915_GEM_APERTURE failed: Invalid argument
Assuming 131072kB available aperture size.
May lead to reduced performance or incorrect rendering.
get chip id failed: -1 [22]
param: 4, val: 0
DRM_IOCTL_I915_GEM_APERTURE failed: Invalid argument
Assuming 131072kB available aperture size.
May lead to reduced performance or incorrect rendering.
get chip id failed: -1 [22]
param: 4, val: 0
beignet-opencl-icd: no supported GPU found, this is probably the wrong opencl-icd package for this hardware
(If you have multiple ICDs installed and OpenCL works, you can ignore this message)
    final result: 0.000000

0 个答案:

没有答案