oclLoadProgSource问题

时间:2015-05-08 22:37:33

标签: c++ opencl

我目前正在使用OpenL和Ibex来尝试实现并行化的区间矩阵乘法。但是,每当我使用命令“cclc clTest”时,我都会收到以下错误:

clTest.cpp:78:68: error: ‘oclLoadProgSource’ was not declared in this scope
clTest.cpp:105:112: error: ‘clEnqueueNdRangeKernel’ was not declared in this scope

我引用了这个网站,认为我需要它指定的方法才能编译。

http://gpgpu-computing4.blogspot.com/2009/09/matrix-multiplication-2-opencl.html

clTest.cpp文件如下。

#define PROGRAM_FILE "clMult.cl"
#define KERNEL_FUNC1 "genMatrix"

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <OclUtils.hpp>
//#include <CLUtil.hpp>
#include "ibex.h"
#include <time.h>
#include <iostream>

#define N 16
#define MAX_SRC_SIZE (0x100000)

using namespace std;
using namespace ibex;

IntervalMatrix genMatrix(int n, IntervalMatrix m);

int main(){
        srand(N);

        //Allocate memory for host a and x
        unsigned int size = N * N;
        unsigned int memSize = sizeof(IntervalMatrix) * size;
        IntervalMatrix a(N, N); //= (IntervalMatrix) malloc(memSize);
        IntervalMatrix x(N, N); //= (IntervalMatrix) malloc(memSize);

        //Initialize a and b
        a = genMatrix(N, a);
        x = genMatrix(N, x);

        //Allocate memory for host b
        IntervalMatrix b(N, N); //= (IntervalMatrix) malloc(memSize);

        //Need to initialize values of b to [0, 0], incorrect otherwise
        for(int i = 0; i < N; i++){
                for(int j = 0; j < N; j++){
                        b[i][j] = Interval(0, 0);
                }
        }

        //Initialize OpenCL variables
        cl_context clGPUContext;
        cl_command_queue clCommandQue;
        cl_program clProgram;
        cl_kernel clKernel;

        size_t dataBytes;
        size_t kernelLength;
        cl_int errcode;

        cl_mem deviceA;
        cl_mem deviceX;
        cl_mem deviceB;

        //Initialize OpenCL
        clGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode);
        //shrCheckError(errcode, CL_SUCCESS);

        //Get list of GPU devices associated with context
        errcode = clGetContextInfo(clGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes);
        cl_device_id *clDevices = (cl_device_id *) malloc(dataBytes);
        errcode |= clGetContextInfo(clGPUContext, CL_CONTEXT_DEVICES, dataBytes, clDevices, NULL);
        //shrCheckError(errcode, CL_SUCCESS);

        //Create command queue
        clCommandQue = clCreateCommandQueue(clGPUContext, clDevices[0], 0, &errcode);
        //shrCheckError(errcode, CL_SUCCESS);

        //Device memory
        deviceB = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, memSize, NULL, &errcode);
        deviceA = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, memSize, &a, &errcode);
        deviceX = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, memSize, &x, &errcode);

        //Load and build kernel
        char *clCl_Mult = oclLoadProgSource("clMult.cl", "", &kernelLength);
        //shrCheckError(clCl_Mult != NULL, shrTRUE);

        clProgram = clCreateProgramWithSource(clGPUContext, 1, (const char **)&clCl_Mult, &kernelLength, &errcode);
        //shrCheckError(errcode, CL_SUCCESS);

        errcode = clBuildProgram(clProgram, 0, NULL, NULL, NULL, NULL);
        //shrCheckError(errcode, CL_SUCCESS);

        clKernel = clCreateKernel(clProgram, "cl_mult", &errcode);
        //shrCheckError(errcode, CL_SUCCESS);

        size_t localWorkSize[2], globalWorkSize[2];

        int n = N;

        errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&n);
        errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&deviceA);
        errcode |= clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void *)&deviceX);
        errcode |= clSetKernelArg(clKernel, 3, sizeof(int), (void *)&deviceB);
        //shrCheckError(errcode, CL_SUCCESS);

        localWorkSize[0] = 16;
        localWorkSize[1] = 16;
        globalWorkSize[0] = 1024;
        globalWorkSize[1] = 1024;

        errcode = clEnqueueNdRangeKernel(clCommandQue, clKernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
        //shrCheckError(errcode, CL_SUCCESS);

        //Retrieve result from device
        errcode = clEnqueueReadBuffer(clCommandQue, deviceB, CL_TRUE, 0, memSize, &b, 0, NULL, NULL);
        //shrCheckError(errcode, CL_SUCCESS);
        //Print results


        //Free memory
        free(&a);
        free(&x);
        free(&b);
        clReleaseMemObject(deviceA);
        clReleaseMemObject(deviceB);
        clReleaseMemObject(deviceX);
        free(clDevices);
        free(clCl_Mult);
        clReleaseContext(clGPUContext);
        clReleaseKernel(clKernel);
        clReleaseProgram(clProgram);
        clReleaseCommandQueue(clCommandQue);
}

IntervalMatrix genMatrix(int n, IntervalMatrix m){
        double a;
        double b;
        Interval aInt;
        Interval bInt;
        for(int i = 0; i < n; i++){
                for(int j = 0; j < n; j++){
                        a = rand() % n;
                        b = rand() % n;
                        aInt = Interval(a, b);
                        bInt = Interval(b, a);
                        if(a <= b){
                                m[i][j] = aInt;
                        }
                        else{
                                m[i][j] = bInt;
                        }
                }
        }
        return m;
}

我不确定我做错了什么。该站点没有列出任何有关用户创建的oclLoadProgSource方法或clEnqueueNdRangeKernel方法的信息。我认为这些可能是库函数。包含乘法方法的文件名是clMult.cl,如有必要,我可以发布。我该怎么办?

0 个答案:

没有答案