我正在尝试在GPU灰度过滤器程序中将数据从主机复制到设备。但是,存在某种问题,因为当我尝试这样做时,没有任何反应。可能我的代码中有一些错误,但编译器没有显示任何错误。我需要将变量d_bufferRGB复制到GPU中,处理它并将其返回到d_new_bufferRGB中,以便使用函数save_bmp();
编辑1:在main()中实现CUDA错误检查 它说这行中有无效的参数cudaMemcpy(d_bufferRGB,bufferRGB,size,cudaMemcpyHostToDevice)
HERE是代码>>>
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "device_launch_parameters.h"
#include <iostream>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
int width, heigth;
long size;
long *d_size;
RGBTRIPLE *bufferRGB, *new_bufferRGB;
RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;
void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
{
BITMAPFILEHEADER bmp_file_header;
BITMAPINFOHEADER bmp_info_header;
FILE *file;
file = fopen(file_name, "rb");
fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
*width = bmp_info_header.biWidth;
*heigth = bmp_info_header.biHeight;
size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
std::cout << "velkost nacitanych pixelov je " << size <<'\n';
int x, y;
*bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);
fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);
for (y = 0; y < *heigth; y++)
{
for (x = 0; x < *width; x++)
{
(*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
(*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
(*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
}
for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
fgetc(file);
}
fclose(file);
}
void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
{
BITMAPFILEHEADER bmp_file_header;
BITMAPINFOHEADER bmp_info_header;
FILE *file;
file = fopen(old_name, "rb");
fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
fclose(file);
file = fopen(new_name, "wb");
fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);
int alligment_x = (4 - (3 * width) % 4) % 4;
unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
unsigned int index = 0;
int x, y;
for (y = 0; y < heigth; y++)
{
for (x = 0; x < width; x++)
{
to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
}
for (x = 0; x < alligment_x; x++)
to_save[index++] = 0;
}
std::cout << "velkost na ulozenie je " << sizeof(&to_save) << '\n';
fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);
fclose(file);
free(to_save);
}
__global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
BYTE grayscale;
if (idx < *d_size)
{
grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
d_new_bufferRGB[idx].rgbtRed = grayscale;
d_new_bufferRGB[idx].rgbtGreen = grayscale;
d_new_bufferRGB[idx].rgbtBlue = grayscale;
}
}
int main()
{
gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
gpuErrchk(cudaMalloc(&d_size, sizeof(size)));
load_bmp(&bufferRGB, &width, &heigth, "test.bmp"); //tu je vztvoreny a naplneny smernik *buffer_RGB
gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));
CUDA_filter_grayscale << <32, 512 >> > (d_bufferRGB, d_new_bufferRGB, d_size); //size of kernel dont bother me for now
gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));
save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");
}
它杀了我的大脑好几天了,请帮助我。
答案 0 :(得分:1)
所以,在@Robert Crovella的帮助下,我完成了我的代码。我还做了一些额外的功能,如动态内核分配作为互联网用户的免费礼物。代码完全适用于BMP版本。 3来自微软(可以在Paint中创建一些)。我试图上传一些图片,但最大可能是2MB,这对于真正的色深来说是不够的。编译时,存在空指针错误,但程序已创建并存储在项目Debug文件夹中。当您使用文件夹中的图像运行它时,它可以正常工作。
以上代码的问题是&gt; 1,uninicialised new_bufferRGB 2,加载功能不提供变量然后我使用它们 3,cudaMemcpy函数中的错误
所以,这里是代码&gt;&gt;&gt;
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "device_launch_parameters.h"
#include <iostream>
int width, heigth;
long size;
long *d_size;
RGBTRIPLE *bufferRGB, *new_bufferRGB;
RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
//if (abort) exit(code);
}
}
void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
{
BITMAPFILEHEADER bmp_file_header;
BITMAPINFOHEADER bmp_info_header;
FILE *file;
file = fopen(file_name, "rb");
fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
*width = bmp_info_header.biWidth;
*heigth = bmp_info_header.biHeight;
size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
std::cout << "size of loaded pixels is " << size << '\n';
int x, y;
*bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);
fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);
for (y = 0; y < *heigth; y++)
{
for (x = 0; x < *width; x++)
{
(*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
(*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
(*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
}
for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
fgetc(file);
}
fclose(file);
}
void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
{
BITMAPFILEHEADER bmp_file_header;
BITMAPINFOHEADER bmp_info_header;
FILE *file;
file = fopen(old_name, "rb");
fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
fclose(file);
file = fopen(new_name, "wb");
fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);
int alligment_x = (4 - (3 * width) % 4) % 4;
unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
unsigned int index = 0;
int x, y;
for (y = 0; y < heigth; y++)
{
for (x = 0; x < width; x++)
{
to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
}
for (x = 0; x < alligment_x; x++)
to_save[index++] = 0;
}
fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);
fclose(file);
free(to_save);
}
__global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
BYTE grayscale;
if (idx < *d_size)
{
grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
d_new_bufferRGB[idx].rgbtRed = grayscale;
d_new_bufferRGB[idx].rgbtGreen = grayscale;
d_new_bufferRGB[idx].rgbtBlue = grayscale;
}
}
int main()
{
// load to have all variables reachable and loaded
load_bmp(&bufferRGB, &width, &heigth, "test.bmp");
// inicialise buffer for copy of proccesed image from device to host
new_bufferRGB = (RGBTRIPLE *)malloc(width* heigth * 4);
//inicializing variables on GPU
gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
gpuErrchk(cudaMalloc(&d_size, sizeof(size)));
// copying variables to GPU
gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));
// find out the kernel size, number of threads depends on your GPU max number of threads
int numbThreads = 1024;
int numbBlocks = (width*heigth) / numbThreads;
if (((width*heigth) % numbThreads)>0) numbBlocks++;
CUDA_filter_grayscale <<<numbBlocks, numbThreads >>> (d_bufferRGB, d_new_bufferRGB, d_size);
//copy result from device to host
gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));
//save result
save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");
return 0;
}