Opencl启用寄存器溢出选项

时间:2014-01-29 11:59:36

标签: java recursion opencl gpu raytracing

我将用于C ++的光线跟踪算法从scratchapixel.com site转换为opencl java(编译为gpu)形式,并且它不适用于等于或大于7的深度值。当我将深度设置为7(或更高)时,编译器告诉:

Error:E013:Insufficient Private Resources! 

因为半递归函数,每个函数都获得32位私有寄存器(60+)的另一个副本。 我需要这些寄存器溢出到主内存,这样我就可以设置更高的深度值而不会将内核变成迭代版本。是否有可能让一个坏主意泄漏?如果没有,我该如何启用它?

如果我将球体的数量设置为大于100ish值的值,编译器会告诉:

Frontend phase failed compilation.
Error: Creating kernel rayTraceSphereRender failed!

但是总缓冲区数和缓冲区长度是常量的,直到sphereNumber = 4096,因此它只是将“for loop bound”从100改为110并且编译器给出了该错误。我怀疑它会自动展开循环,这会导致更大的注册压力。这导致了更高的私人登记需求(并且需要再次泄漏)

以下是主机构建内核字符串的方式:

kernelx+=

                 "float4 trace"+(depth+1)+"(float4 *rayorig, float4 *raydir,__global sphereObject *spheres, int sphereNum, int depth, int threadNo ){return (float4)(0,0,0,0);}";



        for(int iteration=depth;iteration>=0;iteration--)
        {
        //here is the bloating action to make traceX(...) function semi-recursive.
        kernelx+=   

                 "float4 trace"+iteration+"(float4 *rayorig, float4 *raydir,__global sphereObject *spheres, int sphereNum, int depth, int threadNo )"
                + "{"
                + "                   int MAX_RAY_DEPTH="+depth+";"
                        + "float facingratio =0; float fresneleffect=0;"
                        + "float4 refldir=(float4)(0,0,0,0);"
                        + "float4 arg00=(float4)(0,0,0,0);  float4 reflection=(float4)(0,0,0,0);float4 refraction=(float4)(0,0,0,0);"
                        + "float4 refrdir=(float4)(0,0,0,0); float4 arg01=(float4)(0,0,0,0);  "
                        + "float4 surfaceColor=(float4)(0,0,0,0);"
                        + "float4 phit=(float4)(0,0,0,0); "
                        + "float4 nhit=(float4)(0,0,0,0);"
                        + "float bias=0.000f;     "
                        + "float4 traceTmpReturn=(float4)(0,0,0,0);"
                        + "float tnear=100000000.0f;  "
                        + "sphereObject so;"
                + "                   initElem(&so);  "
                + "                   for(int i=0;i<sphereNum;i++)"
                + "                   {"
                + "                           float t0=100000000.0f;float t1=100000000.0f;"
                + "                           if(intersect(rayorig[0],raydir[0],&t0,&t1,spheres[i].center,spheres[i].radius)==1)" 
                + "                           {"
                + "                                  if(t0<0){t0=t1;}"
                + "                                  if(t0<tnear)"
                + "                                  {"
                + "                                        tnear=t0;"
                //+ "                                        so=&spheres[i];"
                + "                                        copyElem(&so,spheres[i]);"
                + "                                  }"  
                + "                           }"
                + "                    }"
                + "                      "
                + "                    if(so.radius<-0.5f){ return (float4)(0,0,0,0);} "
                + ""
                + "                    surfaceColor=(float4)(0,0,0,0);"
                + "                    phit=rayorig[0]+raydir[0]*tnear;"
                + "                    nhit=phit-so.center;"
                + "                    nhit=normalize(nhit);"
                + "                    bias=0.001f;"
                + "                    bool inside=false;"
                + "                    if(dot3X(raydir[0],nhit)>0){nhit=-nhit; inside=true;}"
                + "                    if(((so.transparency > 0) || (so.reflection > 0))&& (depth<MAX_RAY_DEPTH) )"
                + "                    {"
                + "                         facingratio = -dot3X(raydir[0],nhit);"
                + "                         fresneleffect= mixx(pow(1.0f-facingratio,3),1.0f,0.1f);"
                + "                         refldir = raydir[0] - nhit*2.0f*dot3X(raydir[0],nhit);"
                + "                         refldir=normalize(refldir);"
                + "                         arg00=phit+nhit*bias;"
                + "                         reflection=trace"+(iteration+1)+"(&arg00,&refldir,spheres,sphereNum,depth+1,threadNo);"
                + "                         refraction=(float4)(0,0,0,0);"
                + "                         if(so.transparency>0)"
                + "                         {"
                + "                               float ior=1.1f; float eta=(inside)?ior:1/ior; "
                + "                               float cosi=-dot3X(nhit,raydir[0]);"
                + "                               float k=1.0f-eta*eta*(1.0f-cosi*cosi);"
                + "                               refrdir = raydir[0]*eta+nhit*(eta*cosi-sqrt(k));"
                + "                               refrdir=normalize(refrdir);"
                + "                               arg01=phit-nhit*bias;"
                + "                               refraction=trace"+(iteration+1)+"(&arg01,&refrdir, spheres,sphereNum, depth+1, threadNo);"
                + "                                "
                + "                         }"
                + "                         surfaceColor=(reflection*fresneleffect+refraction*(1.0f-fresneleffect)*so.transparency)*so.surfaceColor;"
                + ""
                + "                    }"
                + "                    else"
                + "                    {"
                + "                         for(int i=0;i<sphereNum;i++)"
                + "                         {"
                + "                                if(spheres[i].emissionColor.x>0)"
                + "                                {"
                + "                                      float4 transmission=(float4)(1,1,1,1);"
                + "                                      float4 lightDirection=spheres[i].center-phit;"
                + "                                      lightDirection=normalize(lightDirection);"
                + "                                      for(int j=0;j<sphereNum;j++)"
                + "                                      {"
                + "                                            if(i!=j)"
                + "                                            {"
                + "                                                float t0,t1;"
                + "                                                float4 arg02=phit+nhit*bias;"
                + "                                                if(intersect(arg02,lightDirection,&t0,&t1,spheres[j].center,spheres[j].radius)==1)"
                + "                                                {"
                + "                                                      transmission= (float4)(0,0,0,0);break;"
                + "                                                } "
                + "                                            } "
                + "                                      }"
                + "                                      surfaceColor += so.surfaceColor*transmission*max(0.0f, dot3X(nhit,lightDirection))*spheres[i].emissionColor;"

                + "                                 }"
                + "                         }"
                + "                    }"
                + ""
                + "                    return surfaceColor+so.emissionColor;  "

                + "}";
        }






        kernelx+= "__kernel void rayTraceSphereRender(__global float4 *center, __global float *radius,"
        + "                                      __global float4 *surfaceColor, __global float4 *emissionColor,"
        + "                                      __global float *transparency,"
        + "                                      __global float *reflection, __global float4 *image,"
        + "                                      __global sphereObject *spheres)"+
        "{"+
        "    int gid=get_global_id(0);" +
        "    int lid=get_local_id(0);"
        + "  "
        + "  {"
        + "        int numSphr="+raytraceSphereNumber0+";"
        + "        int width="+n+", height="+n+";"
        + "        float invWidth = 1.0f/((float)width);"
        + "        float invHeight= 1.0f/((float)height);"
        + "        float fov = 30.0f; float aspectratio= ((float)width)/((float)height);"
        + "        float angle=tan(3.141592653589793f*0.5f*fov/(180.0f));"
        + "        int y=gid/"+n+";"
        + "        "
        + "        int x=gid%"+n+";"
        + "        "

        + "                     float xx=(2.0f*((x-0.5f)*invWidth)-1.0f)*angle*aspectratio;"
        + "                     float yy=(1.0f-2.0f*((y-0.5f)*invHeight))*angle;"
        + "                     float4 raydir=(float4)(xx,yy,-1.0f,0.0f);"

        + "                     raydir=normalize(raydir);"
        + "                     float4 ref=(float4)(0,0,0,0);"
        + "                     float4 upp=(float4)(0,0,0,0);"
        + "                     upp=trace0(&ref,&raydir,spheres,numSphr,0,gid);"
        + "                     upp.w=1.0;"
        + "              if(x>(width-width/"+sayi2+"))   image[y+x*"+n+"]=upp;"

        + "                      "
        + "         "
        + "        "
        + "  }"
        + "  "
         //   


        +"}";

主机:FX8150 windows7-64bit家庭高级版,Java-64bit(Eclipse)

装置:HD7870催化剂13.12

代码适用于GPU的较低深度和球体数量;对于我设置的任何深度和球体数量的CPU都没有问题:

enter image description here

0 个答案:

没有答案