我目前正在使用OpenL和Ibex来尝试实现并行化的区间矩阵乘法。但是,每当我使用命令“cclc clTest”时,我都会收到以下错误:
clTest.cpp:78:68: error: ‘oclLoadProgSource’ was not declared in this scope
clTest.cpp:105:112: error: ‘clEnqueueNdRangeKernel’ was not declared in this scope
我引用了这个网站,认为我需要它指定的方法才能编译。
http://gpgpu-computing4.blogspot.com/2009/09/matrix-multiplication-2-opencl.html
clTest.cpp文件如下。
#define PROGRAM_FILE "clMult.cl"
#define KERNEL_FUNC1 "genMatrix"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <OclUtils.hpp>
//#include <CLUtil.hpp>
#include "ibex.h"
#include <time.h>
#include <iostream>
#define N 16
#define MAX_SRC_SIZE (0x100000)
using namespace std;
using namespace ibex;
IntervalMatrix genMatrix(int n, IntervalMatrix m);
int main(){
srand(N);
//Allocate memory for host a and x
unsigned int size = N * N;
unsigned int memSize = sizeof(IntervalMatrix) * size;
IntervalMatrix a(N, N); //= (IntervalMatrix) malloc(memSize);
IntervalMatrix x(N, N); //= (IntervalMatrix) malloc(memSize);
//Initialize a and b
a = genMatrix(N, a);
x = genMatrix(N, x);
//Allocate memory for host b
IntervalMatrix b(N, N); //= (IntervalMatrix) malloc(memSize);
//Need to initialize values of b to [0, 0], incorrect otherwise
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++){
b[i][j] = Interval(0, 0);
}
}
//Initialize OpenCL variables
cl_context clGPUContext;
cl_command_queue clCommandQue;
cl_program clProgram;
cl_kernel clKernel;
size_t dataBytes;
size_t kernelLength;
cl_int errcode;
cl_mem deviceA;
cl_mem deviceX;
cl_mem deviceB;
//Initialize OpenCL
clGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode);
//shrCheckError(errcode, CL_SUCCESS);
//Get list of GPU devices associated with context
errcode = clGetContextInfo(clGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes);
cl_device_id *clDevices = (cl_device_id *) malloc(dataBytes);
errcode |= clGetContextInfo(clGPUContext, CL_CONTEXT_DEVICES, dataBytes, clDevices, NULL);
//shrCheckError(errcode, CL_SUCCESS);
//Create command queue
clCommandQue = clCreateCommandQueue(clGPUContext, clDevices[0], 0, &errcode);
//shrCheckError(errcode, CL_SUCCESS);
//Device memory
deviceB = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, memSize, NULL, &errcode);
deviceA = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, memSize, &a, &errcode);
deviceX = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, memSize, &x, &errcode);
//Load and build kernel
char *clCl_Mult = oclLoadProgSource("clMult.cl", "", &kernelLength);
//shrCheckError(clCl_Mult != NULL, shrTRUE);
clProgram = clCreateProgramWithSource(clGPUContext, 1, (const char **)&clCl_Mult, &kernelLength, &errcode);
//shrCheckError(errcode, CL_SUCCESS);
errcode = clBuildProgram(clProgram, 0, NULL, NULL, NULL, NULL);
//shrCheckError(errcode, CL_SUCCESS);
clKernel = clCreateKernel(clProgram, "cl_mult", &errcode);
//shrCheckError(errcode, CL_SUCCESS);
size_t localWorkSize[2], globalWorkSize[2];
int n = N;
errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&n);
errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&deviceA);
errcode |= clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void *)&deviceX);
errcode |= clSetKernelArg(clKernel, 3, sizeof(int), (void *)&deviceB);
//shrCheckError(errcode, CL_SUCCESS);
localWorkSize[0] = 16;
localWorkSize[1] = 16;
globalWorkSize[0] = 1024;
globalWorkSize[1] = 1024;
errcode = clEnqueueNdRangeKernel(clCommandQue, clKernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
//shrCheckError(errcode, CL_SUCCESS);
//Retrieve result from device
errcode = clEnqueueReadBuffer(clCommandQue, deviceB, CL_TRUE, 0, memSize, &b, 0, NULL, NULL);
//shrCheckError(errcode, CL_SUCCESS);
//Print results
//Free memory
free(&a);
free(&x);
free(&b);
clReleaseMemObject(deviceA);
clReleaseMemObject(deviceB);
clReleaseMemObject(deviceX);
free(clDevices);
free(clCl_Mult);
clReleaseContext(clGPUContext);
clReleaseKernel(clKernel);
clReleaseProgram(clProgram);
clReleaseCommandQueue(clCommandQue);
}
IntervalMatrix genMatrix(int n, IntervalMatrix m){
double a;
double b;
Interval aInt;
Interval bInt;
for(int i = 0; i < n; i++){
for(int j = 0; j < n; j++){
a = rand() % n;
b = rand() % n;
aInt = Interval(a, b);
bInt = Interval(b, a);
if(a <= b){
m[i][j] = aInt;
}
else{
m[i][j] = bInt;
}
}
}
return m;
}
我不确定我做错了什么。该站点没有列出任何有关用户创建的oclLoadProgSource方法或clEnqueueNdRangeKernel方法的信息。我认为这些可能是库函数。包含乘法方法的文件名是clMult.cl,如有必要,我可以发布。我该怎么办?