我在使用C Cuda和lodepng库编译cuda项目时遇到了一些麻烦。
我的makefile看起来像这样。
gpu: super-resolution.cu
gcc -g -O -c lodepng.c
nvcc -c super-resolution.cu
nvcc -o super-resolution-cuda super-resolution.o
rm -rf super-resolution.o
rm -rf lodepng.o
有人能告诉我我做错了什么,因为它抱怨
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
parallel-algorithm/super-resolution.cu:238: undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'
parallel-algorithm/super-resolution.cu:259: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:269: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:282: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:292: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:301: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
...
我只需要一种方法来编译我的.cu文件,并在编译过程中使用nvcc将C .o文件添加到其中。
编辑:尝试过建议。没有成功。gcc -g -O -c lodepng.c
nvcc -c super-resolution.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
#import "cuda.h"
^
super-resolution.cu(106): warning: expression has no effect
super-resolution.cu(116): warning: expression has no effect
super-resolution.cu(141): warning: variable "y" was declared but never referenced
super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
#import "cuda.h"
^
super-resolution.cu(106): warning: expression has no effect
super-resolution.cu(116): warning: expression has no effect
super-resolution.cu(141): warning: variable "y" was declared but never referenced
ptxas /tmp/tmpxft_00000851_00000000-5_super-resolution.ptx, line 197; warning : Double is not supported. Demoting to float
nvcc -o super-resolution-cuda super-resolution.o lodepng.o
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
tmpxft_00000851_00000000-3_super-resolution.cudafe1.cpp:(.text+0x5d): undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'
它仍然无法找到对象文件的引用。 编辑:这是我们的.cu文件。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>
extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );
答案 0 :(得分:2)
#import
。如果您想要包含cuda.h
(这应该是不必要的),请使用#include
。相反,我只会从您的super-resolution.cu
文件中删除该行。之前未显示的内容,但现在很明显,您super-resolution.cu
中包含的lodepng.h
和 后来指定了C-linkage 2个功能:lodepng_decode32_file
和lodepng_encode32_file
。当我尝试编译你的super-resolution.cu
时,编译器给了我这样的错误(我不知道你为什么不看到它们):
super-resolution.cu(8): error: linkage specification is incompatible with previous "lodepng_encode32_file"
lodepng.h(184): here
super-resolution.cu(9): error: linkage specification is incompatible with previous "lodepng_decode32_file"
lodepng.h(134): here
所以基本上你正在绊倒C和C ++的联系。
我认为最简单的解决方案是使用lodepng.cpp
(而不是lodepng.c
),从super-resolution.cu
删除以下行:
extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );
只需编译所有内容并链接所有c ++样式:
$ g++ -c lodepng.cpp
$ nvcc -c super-resolution.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
$ nvcc -o super-resolution super-resolution.o lodepng.o
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
$
如果你真的想要链接lodepng.o
c风格而不是c ++风格,那么你需要用适当的lodepng.h
包装器修改extern "C"
,其中调用必要的函数出。在我看来,这会变得混乱。
如果您想要删除有关sm_10
的警告,请添加nvcc
开关以针对其他架构进行编译,例如:
nvcc -arch=sm_20 ...
但请确保您选择的任何内容与您的GPU兼容。
答案 1 :(得分:1)
以下是代码的简单摘要。
可以从这里获取lodepng库(http://lodev.org/lodepng/)。
将其重命名为C将使其在C上可用。
即使在这个级别,还有
的编译问题"undefined reference to `lodepng_decode32_file'"
"undefined reference to `lodepng_encode32_file'"
文件:Makefile
all: gpu
gcc -g -O -c lodepng.c
nvcc -c super-resolution.cu
nvcc -o super-resolution-cuda super-resolution.o lodepng.o
rm -rf super-resolution.o
rm -rf lodepng.o
文件:super-resolution.cu
#import "cuda.h"
#include "lodepng.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>
extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );
//GPU 3x3 Blur.
__global__ void gpuBlur(unsigned char* image, unsigned char* buffer, int width, int height)
{
int i = threadIdx.x%width;
int j = threadIdx.x/width;
if (i == 0 || j == 0 || i == width - 1 || j == height - 1)
return;
int k;
for (k = 0; k <= 4; k++)
{
buffer[4*width*j + 4*i + k] = (image[4*width*(j-1) + 4*(i-1) + k] +
image[4*width*(j-1) + 4*i + k] +
image[4*width*(j-1) + 4*(i+1) + k] +
image[4*width*j + 4*(i-1) + k] +
image[4*width*j + 4*i + k] +
image[4*width*j + 4*(i+1) + k] +
image[4*width*(j+1) + 4*(i-1) + k] +
image[4*width*(j+1) + 4*i + k] +
image[4*width*(j+1) + 4*(i+1) + k])/9;
}
}
int main(int argc, char *argv[])
{
//Items for image processing;
//int threshold = 100;
unsigned int error;
unsigned char* image;
unsigned int width, height;
//Load the image;
if (argc > 1)
{
error = lodepng_decode32_file(&image, &width, &height, argv[1]);
printf("Loaded file: %s[%d]\n", argv[1], error);
}
else
{
return 0;
}
unsigned char* buffer =(unsigned char*)malloc(sizeof(char) * 4*width*height);
//GPU Blur Section.
unsigned char* image_gpu;
unsigned char* blur_gpu;
cudaMalloc( (void**) &image_gpu, sizeof(char) * 4*width*height);
cudaMalloc( (void**) &blur_gpu, sizeof(char) * 4*width*height);
cudaMemcpy(image_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
cudaMemcpy(blur_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
gpuBlur<<< 1, height*width >>> (image_gpu, blur_gpu, width, height);
cudaMemcpy(buffer, blur_gpu, sizeof(char) * 4*width*height, cudaMemcpyDeviceToHost);
//Spit out buffer as an image.
error = lodepng_encode32_file("GPU_OUTPUT1_Blur.png", buffer, width, height);
cudaFree(image_gpu);
cudaFree(blur_gpu);
free(buffer);
free(image);
}