从设备到主机的Cuda Memcpy崩溃

时间:2015-11-09 09:09:10

标签: c++ image-processing cuda gpgpu

我试图在15 x 15

的补丁大小周围找到最小的RGB

的source.cpp文件中
SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

程序崩溃了 这是我的代码片段

darkprior.h

#ifndef DARKPRIOR_H_INCLUDED
#define DARKPRIOR_H_INCLUDED

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "cuda.h"
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
 #include <iostream>
 #include "opencv2/opencv.hpp"

 #define SAFE_CALL(call)                                                                                                            \
 do                                                                                                                          \
    {                                                                                                                           \
    cudaError_t err = (call);                                                                                               \
    if(cudaSuccess != err)                                                                                                  \
            {                                                                                                                       \
        fprintf(stderr,"CUDA Error:\nFile = %s\nLine = %d\nReason = %s\n", __FILE__, __LINE__, cudaGetErrorString(err));    \
        cudaDeviceReset();                                                                                                  \
        exit(EXIT_FAILURE);                                                                                                 \
            }                                                                                                                       \
    }                                                                                                                           \
        while (0)


    void dark_channel(float *image_d, float *rgbmin_d, int height, int width);



   #endif

Source.cpp

#include "DarkPrior.h"
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;

int main()
{
    //load the image
    Mat src = imread("foggy_river.jpg");

    //check whether image loaded is empty or not.
    if (src.empty())
    {
         cerr << "no image"; return -1;
    }

    //Mat rgbMin(src.size(), CV_MAKETYPE(src.depth(), 1));
   //   int step = src.step;
    float *image_h = NULL;
    float *image_d = NULL;
     float *Dark_d = NULL;
    float *Dark_h = NULL;
   //Mat rgbmin(src.size(), CV_MAKETYPE(src.depth(), 1));

   size_t size1 = src.step * src.rows * sizeof(float);
   size_t size2 = src.cols * src.rows * sizeof(float);

   image_h = (float *)malloc(size1);
   Dark_h = (float *)malloc(size1);

   SAFE_CALL(cudaMalloc((void**)&image_d, size1));
   SAFE_CALL(cudaMalloc((void**)&Dark_d, size2));

   //convert image from CV::MAT to float*.
   Mat dst;
   src.convertTo(dst, CV_32F);
   image_h = dst.ptr<float>();

   SAFE_CALL(cudaMemcpy(image_d, image_h, size1, cudaMemcpyHostToDevice));

   cout << "Calculating Minimum of RGB ..." << endl;
   dark_channel(image_d, Dark_d, src.rows, src.cols);

   SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

   Mat Dark_out(src.rows, src.cols, CV_32FC1, Dark_h);
   imwrite("MinRGB.jpg", Dark_out);

   cudaFree(image_d);
   cudaFree(Dark_d);

   //free(image_h);
   //free(rgbmin_h);

   return 0;
}

minRGB.cu

#include "DarkPrior.h"

//#define min(x,y) ((x<y)?x:y)

__device__ float safe_get(float *rgbMin, int width, int height, int x, int y)
{

 // Clamp indices to image boundaries
 x = min( max(0, x), width - 1);
 y = min( max(0, y), height - 1);

 // Translate 2D index into 1D index
 const int idx = y * width + x ;

 return rgbMin[idx];
}

 __device__ float  estimate_minimum_patch(float *rgbMin, int width, int  height, int radius, int x, int y, float Minval)
{
   for(int i = -radius; i <= radius; i++)
  {
    for(int j = -radius; j <= radius; j++)
    {
        float val = safe_get(rgbMin, width, height, x+i, y+j);

        Minval = min (val, Minval);
     }
   }

}

  __global__ void kernel_darkChannel (float *rgbMin, float *darkCh, int height,    int width)
 {
  int radius  = 7;

int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int tid = y * width + x;

float Minval = 255.0;

estimate_minimum_patch(rgbMin, width, height, radius, x, y, Minval);

darkCh[tid] = Minval;
}

__global__ void kernel_findMinRGB (float3 *image, float *tmp_min, int height, int width)
{
int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int i = y * width + x;

if(x > height && y > width)
{
    return;
}

tmp_min[i] = min(image[i].x, min(image[i].y, image[i].z));

}

 void dark_channel(float *image_d, float *Dark_d, int height, int width)
 {
dim3 grid(width, height);

float *tmp_min;
cudaMalloc((void **)(&tmp_min), sizeof(float)*height*width);

kernel_findMinRGB <<<grid, 1>>> ((float3 *)image_d, tmp_min, height, width);
printf("RGB min is found\n");

kernel_darkChannel <<<grid, 1>>> (tmp_min, Dark_d, height, width);
printf("patch of minimum is also found\n");


return;
}

我的代码在source.cpp的第45行

中遇到未知错误的错误而崩溃

我完全没有想到是什么原因,也许你能够提供帮助。

1 个答案:

答案 0 :(得分:1)

指针Dark_h指向size1字节的主机内存段。指针Dark_d指向size2字节的设备内存段。如果size1 < size2来电:

cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost)

会很麻烦,因为你会编写非法内存(不是Dark_h指向的数组段的内存,也许你会得到SEGFAULT)。我没试过,但我敢打赌这就是撞车事故背后的原因。