cuda编程分段故障

时间:2017-07-24 09:14:24

标签: c++ cuda

我是CUDA编程新手并编写一个简单的CUDA程序来进行简单的计算,但当我在CPU上打印特定的浮点数时,它给了我分段错误。 我被困在这请求帮助!

#include <cuda.h>
#include <cuComplex.h>
#include <thrust/complex.h>
#include <cuda_runtime.h>
#include <math.h>
#include "generate.h"   // it is used to generate vectors X1,X2,Y1,Y2
#include <bits/stdc++.h>

using namespace std;

__global__ void shdce(float *dx1, float *dx2, float *dy1, float *dy2,float *dax, float *dbx, float *day, float *dby, float *dsr, float *deta1, float *deta2)
{
    int ii = threadIdx.x;
    dax[ii] = (dx2[ii] - dx1[ii])/2.0;
    dbx[ii] = (dx2[ii] + dx1[ii])/2.0;

    day[ii] = (dy2[ii] - dy1[ii])/2.0;
    dby[ii] = (dy2[ii] - dy1[ii])/2.0;

    // element normal vector (float)
    dsr[ii] = sqrt(pow(dax[ii],2) + pow(day[ii],2));
    deta1[ii] = (dy2[ii]-dy1[ii])/(2.0*dsr[ii]);
    deta2[ii] = (dx2[ii]-dx1[ii])/(2.0*dsr[ii]);
}

int main()
{
    vector<float> X1, Y1, X2, Y2;
    int size1, size2;
    float *dx1, *dx2, *dy1, *dy2, *dax, *dbx, *day, *dby, *dsr, *deta1, *deta2;

    X1 = generate1();                                         //X1=[0:10:1]
    size1 = X1.size();

    Y1 = generate3(size1);                  //Y1=zeroes (sizeof X1)     

    X2 = generate2();                       //X2=[1:11:1]
    size2 = X2.size();

    Y2 = generate3(size2);                  //Y2=zeroes (sizeof X2)

    float eta2[size1];
    for(int i = 0; i < size1; i++)
    {
        eta2[i]=0.0;
    }
    cudaMalloc( (void**)&dx1, size1 * sizeof(float) );
    cudaMalloc( (void**)&dx2, size2 * sizeof(float) );
    cudaMalloc( (void**)&dy1, size1 * sizeof(float) );
    cudaMalloc( (void**)&dy2, size2 * sizeof(float) );
    cudaMalloc( (void**)&dax, size1 * sizeof(float) );
    cudaMalloc( (void**)&dbx, size1 * sizeof(float) );
    cudaMalloc( (void**)&day, size1 * sizeof(float) );
    cudaMalloc( (void**)&dby, size1 * sizeof(float) );
    cudaMalloc( (void**)&dsr, size1 * sizeof(float) );
    cudaMalloc( (void**)&deta1, size1 * sizeof(float) );
    cudaMalloc( (void**)&deta2, size1 * sizeof(float) );

    cudaMemcpy( dx1, &X1, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dx2, &X2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dy1, &Y1, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dy2, &Y2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( deta2, &eta2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    dim3 dimBlock( size1, 1 );
    dim3 dimGrid( 1, 1 );
    shdce <<< dimGrid, dimBlock >>> dx1,dx2,dy1,dy2,dax,dbx,day,dby,dsr,deta1,deta2);
    cudaMemcpy( eta2, deta2, size1 * sizeof(float), cudaMemcpyDeviceToHost);

    for(int i=0;i<size1;i++)
    {
        printf("%f \n",deta2[i]);
    }
    return 0;
}

1 个答案:

答案 0 :(得分:1)

cudaMemcpy来电时很可能出现问题:

cudaMemcpy( dx1, &X1, size1 * sizeof(float), cudaMemcpyHostToDevice );

更确切地说,您使用&X1(等)。这不会复制向量中的数据,而是复制向量对象本身的内容。

你应该使用例如&X1[0]获取指向矢量包装的实际数据的指针。