OpenCL中的Bizzare段错误

时间:2018-07-09 16:28:36

标签: c segmentation-fault opencl macos-sierra

我尝试运行以下代码(这与我开始时的内容有所提炼,但仍然会导致错误):

#include "randNorm.h"
#include <OpenCL/opencl.h>
#include <stdio.h>
#include <string.h>
#include "gpu_comp.h"

cl_program compute;
void setup(void) {
  cl_int error;
  char *src_full =
    "__kernel void prods(const size_t d,\n"
    "                    const size_t n,\n"
    "                    __global const double *v,\n"
    "                    __global const double *p,\n"
    "                    __global double *o) {\n"
    "  size_t x = get_global_id(0), y = get_global_id(1), z = get_global_id(3);\
\n"
    "  o[(x * n + y) * d + z] = v[x * d + z] * p[y * d + z];\n"
    "}\n";
  size_t len = strlen(src_full);
  compute = clCreateProgramWithSource(gpu_context, 1, (const char **)&src_full,
                                      &len, &error);
  if(error != CL_SUCCESS) {
    fprintf(stderr, "Error loading OpenCL code.\n");
    exit(1);
  }
  if(clBuildProgram(compute, 0, NULL, "", NULL, NULL) != CL_SUCCESS) {
    fprintf(stderr, "Error building program.\n");
    exit(1);
  }
}

void genRand(size_t n, size_t d, double *points) {
  for(size_t i = 0; i < n * d; i++)
    points[i] = rand_norm();
}

int main(void) {
  size_t m = 10, n = 70, d = 80;
  double *vh = malloc(sizeof(double) * m * d);
  double *ph = malloc(sizeof(double) * n * d);
  double *oh = malloc(sizeof(double) * m * n * d);
  for(size_t x = 0; x < m; x++)
    for(size_t y = 0; y < n; y++)
      for(size_t z = 0; z < n; z++)
        oh[(x * n + y) * d + z] = vh[x * d + z] * ph[y * d + z];
  fprintf(stderr, "That worked.\n");
  gpu_init();
  setup();
  cl_command_queue q = clCreateCommandQueue(gpu_context, the_gpu, 0, NULL);
  genRand(m, d, vh);
  genRand(m, d, ph);
  cl_mem v = clCreateBuffer(gpu_context,
                            CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR |
                            CL_MEM_HOST_NO_ACCESS, sizeof(double) * m * d,
                            (void *)vh, NULL);
  cl_mem p = clCreateBuffer(gpu_context,
                            CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR |
                            CL_MEM_HOST_NO_ACCESS, sizeof(double) * n * d,
                            (void *)ph, NULL);
  cl_mem o = clCreateBuffer(gpu_context,
                            CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
                            sizeof(double) * m * n * d, NULL, NULL);
  cl_kernel prods = clCreateKernel(compute, "prods", NULL);
  clSetKernelArg(prods, 0, sizeof(d), &d);
  clSetKernelArg(prods, 1, sizeof(n), &n);
  clSetKernelArg(prods, 2, sizeof(v), &v);
  clSetKernelArg(prods, 3, sizeof(p), &p);
  clSetKernelArg(prods, 4, sizeof(o), &o);
  size_t foo[3] = {m, n, d};
  clEnqueueNDRangeKernel(q, prods, 3, NULL, foo, NULL, 0, NULL, NULL);
  fprintf(stderr, "Fine to here.\n");
  clFlush(q);
  fprintf(stderr, "And here.\n");
  clFinish(q);
  fprintf(stderr, "But segfaults before here.\n");
}

由于某些原因,它会打印出来:     那行得通。     到这里很好。     和这里。     细分错误:11

问题是,为什么此段错误? 对于我来说,绝对没有任何意义,为什么当CPU以C而不是以OpenCL的形式运行完全相同的代码时,它仍可以正常工作。 当GPU将其作为OpenCL运行时,它甚至可以工作(在用unsigned long替换double并编辑gpu finder以确保它在其上运行之后),但是在CPU上存在段错误。 WTF?

编辑: 弄清楚了,问题在于我在打电话给get_global_id(3),这是错误的。

0 个答案:

没有答案