Prevoius对此计划的疑问:
Translating four nested loops into a CUDA kernel
我正在使用Visual Studio 2012和CUDA 6 代码应该使用CUDA将模糊效果添加到BMP文件中。在转换到CUDA之前,一切都很完美。这是我与C和CUDA的第一个项目,所以我可能犯了一些愚蠢的错误。我的代码中出现了76个错误,其中大多数都是“这个声明没有存储类或类型说明符”,而且没有任何意义。我在http://computer-graphics.se/hello-world-for-cuda.html之前尝试了Hello World程序,它运行正常。有同样的错误,所以我并不真正关心它们。
但我有两个不同的错误:
Error 2 error : Unaligned memory accesses not supported C:\Users\Karpińscy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
和
错误3错误MSB3721:命令“”C:\ Program Files \ NVIDIA GPU 计算工具包\ CUDA \ v6.0 \ bin \ nvcc.exe“ -gencode = arch = compute_10,code = \“sm_10,compute_10 \” - use-local-env --cl-version 2012 -ccbin“C:\ Program Files(x86)\ Microsoft Visual Studio 11.0 \ VC \ bin” -I“C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include“-I”C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include“-G -maxrregcount = 0 --machine 32 --compile -cudart static -g -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler“/ EHsc / W3 / nologo / Od / Zi / RTC1 / MDd”-o Debug \ kernel.cu.obj“C:\ Users \Karpińscy \ documents \ visual studio 2012 \ Projects \ blur \ blur \ kernel.cu“”退出代码2. C:\ Program 文件(x86)\ MSBuild \ Microsoft.Cpp \ v4.0 \ V110 \ BuildCustomizations \ CUDA 6.0.targets 597 9模糊
我甚至在google.com的第二个网站上搜索了答案,我还没有资助那些对我有用的解决方案。请帮帮我!
程序代码:
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#pragma pack(push,1)
/* Windows 3.x bitmap file header */
typedef struct {
char filetype[2]; /* magic - always 'B' 'M' */
unsigned int filesize;
short reserved1;
short reserved2;
unsigned int dataoffset; /* offset in bytes to actual bitmap data */
} file_header;
/* Windows 3.x bitmap full header, including file header */
typedef struct {
file_header fileheader;
unsigned int headersize;
int width;
int height;
short planes;
short bitsperpixel; /* we only support the value 24 here */
unsigned int compression; /* we do not support compression */
unsigned int bitmapsize;
int horizontalres;
int verticalres;
unsigned int numcolors;
unsigned int importantcolors;
} bitmap_header;
#pragma pack(pop)
__global__ void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;
xx = blockIdx.y * blockDim.y + threadIdx.y;
yy = blockIdx.x * blockDim.x + threadIdx.x;
if(xx >= hp->width || yy >= hp->height)
return;
avgB = avgG = avgR = 0;
ile = 0;
for(x = xx; x < hp->width && x < xx + blurSize; x++)
{
for(y = yy; y < hp->height && y < yy + blurSize; y++)
{
avgB += data[x*3 + y*hp->width*3 + 0];
avgG += data[x*3 + y*hp->width*3 + 1];
avgR += data[x*3 + y*hp->width*3 + 2];
ile++;
}
}
avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;
data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}
int filter(char* input, char *output)
{
FILE *fp,*out;
bitmap_header* hp;
bitmap_header* d_hp;
unsigned char *data;
unsigned char *d_data;
//Open input file:
fp = fopen(input, "r");
if(fp==NULL)
return 1;
//Read the input file headers:
hp=(bitmap_header*)malloc(sizeof(bitmap_header));
cudaMalloc( &d_hp, sizeof(bitmap_header));
if(hp==NULL)
return 1;
fread(hp, sizeof(bitmap_header), 1, fp);
cudaMemcpy(d_hp, hp, sizeof(bitmap_header), cudaMemcpyHostToDevice);
//Read the data of the image:
data = (unsigned char*)malloc(sizeof(char)*hp->bitmapsize);
cudaMalloc( &d_data, sizeof(char)*hp->bitmapsize);
fseek(fp,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fread(data,sizeof(char),hp->bitmapsize, fp);
cudaMemcpy(d_data, data, sizeof(char)*hp->bitmapsize, cudaMemcpyHostToDevice);
//Not sure if correctly calling function
dim3 block(16,16);
dim3 grid ( (hp->height + 15)/16, (hp->width + 15)/16 );
blur<<<grid,block>>>(d_hp, d_data);
cudaMemcpy(data, d_data, sizeof(char)*hp->bitmapsize, cudaMemcpyDeviceToHost);
//Open output file:
out = fopen(output, "wb");
if(out==NULL)
{
fclose(fp);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 1;
}
fwrite(hp,sizeof(char),sizeof(bitmap_header),out);
fseek(out,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fwrite(data,sizeof(char),hp->bitmapsize,out);
fclose(fp);
fclose(out);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 0;
}
int main(int argc, char* argv[])
{
char *path = "file.bmp";
filter(path,path);
return 0;
}
我被要求从What is the canonical way to check for errors using the CUDA runtime API?实施错误检查,但我不知道它是如何帮助我的。
编辑:
由于@DanielKamilKozar,我解决了这些问题。程序编译但模糊不会添加到BMP文件。是否正确调用了模糊函数的CUDA语法?
答案 0 :(得分:1)
我通过不通过函数参数发送完整的BMP头来解决它,但它是必要的内容。函数没有被调用的另一个问题,我通过更新CUDA软件修复了这个问题。
答案 1 :(得分:1)
我能够通过将arch值从sm_10更改为sm_20来解决此问题。我的应用程序在Win 8.1 x64 VS2012上运行在GT750M上。