cuda中线程索引的行主要或列主要访问?

时间:2017-01-06 19:25:50

标签: c++ image opencv cuda

我很困惑图像是以行主要顺序还是列主要顺序存储在设备的全局内存中。 在两个订单中访问图像时,我得到两个不同的图像输出 以行主要顺序访问时 -

int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;

int m = numCols * y + x;

if (x >= numCols || y >= numRows)
    return;

//marking column boundaries
if (x <= 2){                    
    d_Image[m].x = 255;
    d_Image[m].y = 0;
    d_Image[m].z = 0;
}
else if (x >= numCols-2){
    d_Image[m].x = 0;
    d_Image[m].y = 0;
    d_Image[m].z = 255;
}
else{
    d_Image[m].x = d_sample[m].x;
    d_Image[m].y = d_sample[m].y;
    d_Image[m].z = d_sample[m].z;
}
d_Image[m].w = d_sample[m].w;

output使用row-major
以列主要顺序访问时 -

int m = x * numRows + y;

output使用col-major
尺寸 -

const dim3 blockSize(16,16);
const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
blur << < gridSize, blockSize >> >(d_Image, d_sample, numRows, numCols);

我正在使用opencv加载并保存图像 在第一个输出中,红色和蓝色点散布在整个图像上。在第二个输出(col-major)中,边界行被标记,而我正在尝试标记列。我太困惑了。 修改

void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols);

cv::Mat sample;
cv::Mat Image;

size_t numRows() { return sample.rows; }
size_t numCols() { return sample.cols; }

__global__ void blur(const uchar4 *d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

  int x = threadIdx.x + blockDim.x * blockIdx.x;
  int y = threadIdx.y + blockDim.y * blockIdx.y;

  int m = y*numCols + x;                    

  if (x >= numCols || y >= numRows)
        return;

  if (x <= 2){
      d_Image[m].x = 255;
      d_Image[m].y = 0;
      d_Image[m].z = 0;
  }
  else if (x >= (numCols-2)){
      d_Image[m].x = 0;
      d_Image[m].y = 0;
      d_Image[m].z = 255;
  }
  else{
      d_Image[m].x = d_sample[m].x;
      d_Image[m].y = d_sample[m].y;
      d_Image[m].z = d_sample[m].z;
  }
  d_Image[m].w = d_sample[m].w;
  }

int main(){

  uchar4  *h_sample, *d_sample, *d_Image, *h_Image;
  int filter[9];
  sample = cv::imread("sample.jpg", CV_LOAD_IMAGE_COLOR);
  if (sample.empty()){
        std::cout << "error in loading image.";
        system("pause");
  }

  cv::cvtColor(sample,sample,CV_BGR2RGBA);
  Image.create(numRows(), numCols(), CV_8UC4);

  if (!sample.isContinuous() || !Image.isContinuous()) {
      std::cerr << "Images aren't continuous!! Exiting." << std::endl;
      system("pause");
      exit(1);
  }
  cv::cvtColor(Image,Image,CV_BGR2RGBA);

  h_sample = (uchar4*)sample.data;
  h_Image = (uchar4*)Image.data;

  size_t numPixels = numRows() * numCols();

    //allocate mmeory on device
  checkCudaErrors(cudaMalloc((void**)&d_sample, sizeof(uchar4) * numPixels));
  checkCudaErrors(cudaMalloc((void**)&d_Image, sizeof(uchar4) * numPixels));

  checkCudaErrors(cudaMemset(d_sample, 0, sizeof(uchar4) * numPixels));
  checkCudaErrors(cudaMemset(d_Image, 0, sizeof(uchar4) * numPixels));

//copy to device
  checkCudaErrors(cudaMemcpy(d_sample, h_sample, sizeof(uchar4) * numPixels, cudaMemcpyHostToDevice));

  helper(d_sample, d_Image, numCols(), numRows());

//copy back to  host
  checkCudaErrors(cudaMemcpy(h_Image, d_Image, sizeof(uchar4) * numPixels, cudaMemcpyDeviceToHost));

  cv::cvtColor(Image,Image,CV_RGBA2BGR);

  cv::namedWindow("Image", CV_WINDOW_AUTOSIZE);
  cv::imshow("Image", Image);
  cv::waitKey(0);
  cv::imwrite("sample.jpg", Image);

  return 0;
}

void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

  const dim3 blockSize(16,16);
  const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
  blur << < gridSize, blockSize >> >(d_sample, d_Image, numRows, numCols);
  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}

1 个答案:

答案 0 :(得分:1)

.expanded

你打电话

void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

我认为当你打电话给帮助时你可能已经切换了cols和row ......