在CUDA C ++中分配变量 - 无法分配设备变量

时间:2017-05-09 18:30:38

标签: c++ cuda gpu bmp

我正在尝试使用GPU上的过滤器处理.bmp文件。为此,我需要在设备上分配和填充变量d_bufferRGB。但是当我这样做时,我的编译器只是告诉我它在主机上定义并以错误“标识符d_bufferRGB未定义”结束。函数load_BMP和save_BMP功能齐全。我需要知道如何将bufferRGB复制到d_bufferRGB中,因为显然我犯了一些错误。 你能帮我吗?

源代码在这里>>>

  #include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "device_launch_parameters.h"
#include <iostream>
//using namespace std;

int width, heigth;
long size, *d_size;
RGBTRIPLE *bufferRGB, *new_bufferRGB;
RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;


void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(file_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);

    *width = bmp_info_header.biWidth;
    *heigth = bmp_info_header.biHeight;
    size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);

    int x, y;
    *bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);

    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    for (y = 0; y < *heigth; y++)
    {
        for (x = 0; x < *width; x++)
        {
            (*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
        }
        for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
            fgetc(file);
    }
    fclose(file);
}

void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(old_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fclose(file);

    file = fopen(new_name, "wb");

    fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
    fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    int alligment_x = (4 - (3 * width) % 4) % 4;
    unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
    unsigned int index = 0;
    int x, y;

    for (y = 0; y < heigth; y++)
    {
        for (x = 0; x < width; x++)
        {
            to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
        }
        for (x = 0; x < alligment_x; x++)
            to_save[index++] = 0;
    }
    fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);

    fclose(file);
    free(to_save);
}


__global__ void CUDA_filter_grayscale()
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    BYTE grayscale;

    if (idx < *d_size)
    {
        grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
        d_new_bufferRGB[idx].rgbtRed = grayscale;
        d_new_bufferRGB[idx].rgbtGreen = grayscale;
        d_new_bufferRGB[idx].rgbtBlue = grayscale;
    }
}

int main()
{

    //GPU cast ----------------------------------------------------------------------------------------------

    cudaMalloc((void**)&d_new_bufferRGB, width*heigth * 4);
    cudaMalloc((void**)&d_bufferRGB, width*heigth * 4);
    cudaMalloc((void**)&d_size, sizeof(size));

    load_bmp(&bufferRGB, &width, &heigth, "test.bmp");

    cudaMemcpy(&d_size, &size, sizeof(size), cudaMemcpyHostToDevice);
    cudaMemcpy(d_bufferRGB, bufferRGB, sizeof(bufferRGB), cudaMemcpyHostToDevice);

    CUDA_filter_grayscale << <256, 512 >> >(); //size dont bother me for now

    cudaMemcpy(d_new_bufferRGB, new_bufferRGB, sizeof(new_bufferRGB), cudaMemcpyDeviceToHost);

    save_bmp(new_bufferRGB, "filter_grayscale_GPU", "test.bmp");

}

1 个答案:

答案 0 :(得分:1)

内核无法访问主机代码变量(例如变量d_bufferRGB)。如果需要访问内核中的主机代码变量,则需要将其作为参数传递给内核:

__global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB)
{
    // Code same as before
}

// Usage in main():
CUDA_filter_grayscale << <256, 512 >> >(d_bufferRGB, d_new_bufferRGB);