cudaMemcpy - 似乎无法正常工作

时间:2017-05-10 14:45:58

标签: c++ cuda gpu nvidia

我正在尝试在GPU灰度过滤器程序中将数据从主机复制到设备。但是,存在某种问题,因为当我尝试这样做时,没有任何反应。可能我的代码中有一些错误,但编译器没有显示任何错误。我需要将变量d_bufferRGB复制到GPU中,处理它并将其返回到d_new_bufferRGB中,以便使用函数save_bmp();

编辑1:在main()中实现CUDA错误检查     它说这行中有无效的参数cudaMemcpy(d_bufferRGB,bufferRGB,size,cudaMemcpyHostToDevice)

HERE是代码>>>

     #include <stdio.h>
        #include <stdlib.h>
        #include <Windows.h>
        #include <cuda_runtime.h>
        #include <cuda.h>
        #include "device_launch_parameters.h"
        #include <iostream>


        #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}
        int width, heigth;
        long size;
        long *d_size;
        RGBTRIPLE *bufferRGB, *new_bufferRGB;
        RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;


        void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
        {
            BITMAPFILEHEADER bmp_file_header;
            BITMAPINFOHEADER bmp_info_header;
            FILE *file;

            file = fopen(file_name, "rb");

            fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

            fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);

            *width = bmp_info_header.biWidth;
            *heigth = bmp_info_header.biHeight;
            size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
            std::cout << "velkost nacitanych pixelov je " << size <<'\n';

            int x, y;
            *bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);

            fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

            for (y = 0; y < *heigth; y++)
            {
                for (x = 0; x < *width; x++)
                {
                    (*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
                    (*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
                    (*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
                }
                for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
                    fgetc(file);
            }
            fclose(file);
        }

        void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
        {
            BITMAPFILEHEADER bmp_file_header;
            BITMAPINFOHEADER bmp_info_header;
            FILE *file;

            file = fopen(old_name, "rb");

            fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

            fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
            fclose(file);

            file = fopen(new_name, "wb");

            fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
            fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
            fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

            int alligment_x = (4 - (3 * width) % 4) % 4;
            unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
            unsigned int index = 0;
            int x, y;

            for (y = 0; y < heigth; y++)
            {
                for (x = 0; x < width; x++)
                {
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
                }
                for (x = 0; x < alligment_x; x++)
                    to_save[index++] = 0;
            }
            std::cout << "velkost na ulozenie je " << sizeof(&to_save) << '\n';
            fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);

            fclose(file);
            free(to_save);
        }


        __global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
        {
            int idx = blockIdx.x*blockDim.x + threadIdx.x;
            BYTE grayscale;

            if (idx < *d_size)
            {
                grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
                d_new_bufferRGB[idx].rgbtRed = grayscale;
                d_new_bufferRGB[idx].rgbtGreen = grayscale;
                d_new_bufferRGB[idx].rgbtBlue = grayscale;
            }
        }

        int main()
    {

            gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
            gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
            gpuErrchk(cudaMalloc(&d_size, sizeof(size)));

            load_bmp(&bufferRGB, &width, &heigth, "test.bmp"); //tu je vztvoreny a naplneny smernik *buffer_RGB

            gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
            gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));

            CUDA_filter_grayscale << <32, 512 >> > (d_bufferRGB, d_new_bufferRGB, d_size); //size of kernel dont bother me for now

            gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));

            save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");
    } 

它杀了我的大脑好几天了,请帮助我。

1 个答案:

答案 0 :(得分:1)

所以,在@Robert Crovella的帮助下,我完成了我的代码。我还做了一些额外的功能,如动态内核分配作为互联网用户的免费礼物。代码完全适用于BMP版本。 3来自微软(可以在Paint中创建一些)。我试图上传一些图片,但最大可能是2MB,这对于真正的色深来说是不够的。编译时,存在空指针错误,但程序已创建并存储在项目Debug文件夹中。当您使用文件夹中的图像运行它时,它可以正常工作。

以上代码的问题是&gt; 1,uninicialised new_bufferRGB 2,加载功能不提供变量然后我使用它们 3,cudaMemcpy函数中的错误

所以,这里是代码&gt;&gt;&gt;

#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "device_launch_parameters.h"
#include <iostream>


int width, heigth;
long size;
long *d_size;
RGBTRIPLE *bufferRGB, *new_bufferRGB;
RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        //if (abort) exit(code);
    }
}

void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(file_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);

    *width = bmp_info_header.biWidth;
    *heigth = bmp_info_header.biHeight;
    size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
    std::cout << "size of loaded pixels is " << size << '\n';

    int x, y;
    *bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);

    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    for (y = 0; y < *heigth; y++)
    {
        for (x = 0; x < *width; x++)
        {
            (*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
        }
        for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
            fgetc(file);
    }
    fclose(file);
}

void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(old_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fclose(file);

    file = fopen(new_name, "wb");

    fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
    fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    int alligment_x = (4 - (3 * width) % 4) % 4;
    unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
    unsigned int index = 0;
    int x, y;

    for (y = 0; y < heigth; y++)
    {
        for (x = 0; x < width; x++)
        {
            to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
        }
        for (x = 0; x < alligment_x; x++)
            to_save[index++] = 0;
    }
    fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);

    fclose(file);
    free(to_save);
}


__global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    BYTE grayscale;

    if (idx < *d_size)
    {
        grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
        d_new_bufferRGB[idx].rgbtRed = grayscale;
        d_new_bufferRGB[idx].rgbtGreen = grayscale;
        d_new_bufferRGB[idx].rgbtBlue = grayscale;
    }
}

int main()
{
    // load to have all variables reachable and loaded
    load_bmp(&bufferRGB, &width, &heigth, "test.bmp");

    // inicialise buffer for copy of proccesed image from device to host 
    new_bufferRGB = (RGBTRIPLE *)malloc(width* heigth * 4);

    //inicializing variables on GPU
    gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
    gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
    gpuErrchk(cudaMalloc(&d_size, sizeof(size)));

    // copying variables to GPU
    gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));

    // find out the kernel size, number of threads depends on your GPU max number of threads
    int numbThreads = 1024;
    int numbBlocks = (width*heigth) / numbThreads;
    if (((width*heigth) % numbThreads)>0)   numbBlocks++;

    CUDA_filter_grayscale <<<numbBlocks, numbThreads >>> (d_bufferRGB, d_new_bufferRGB, d_size); 

    //copy result from device to host
    gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));

    //save result
    save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");

    return 0;
}