编译cuda项目时出错

时间:2014-04-24 19:09:00

标签: c cuda nvcc

我在使用C Cuda和lodepng库编译cuda项目时遇到了一些麻烦。

我的makefile看起来像这样。

gpu:    super-resolution.cu
    gcc -g -O -c lodepng.c
    nvcc -c super-resolution.cu
    nvcc -o super-resolution-cuda super-resolution.o 
    rm -rf super-resolution.o
    rm -rf lodepng.o

有人能告诉我我做错了什么,因为它抱怨

nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
parallel-algorithm/super-resolution.cu:238: undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'
parallel-algorithm/super-resolution.cu:259: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:269: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:282: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:292: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:301: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
...

我只需要一种方法来编译我的.cu文件,并在编译过程中使用nvcc将C .o文件添加到其中。

编辑:尝试过建议。没有成功。

gcc -g -O -c lodepng.c
nvcc -c super-resolution.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
 #import "cuda.h"
  ^
super-resolution.cu(106): warning: expression has no effect

super-resolution.cu(116): warning: expression has no effect

super-resolution.cu(141): warning: variable "y" was declared but never referenced

super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
 #import "cuda.h"
  ^
super-resolution.cu(106): warning: expression has no effect

super-resolution.cu(116): warning: expression has no effect

super-resolution.cu(141): warning: variable "y" was declared but never referenced

ptxas /tmp/tmpxft_00000851_00000000-5_super-resolution.ptx, line 197; warning : Double is not supported. Demoting to float
nvcc -o super-resolution-cuda super-resolution.o lodepng.o

nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
tmpxft_00000851_00000000-3_super-resolution.cudafe1.cpp:(.text+0x5d): undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'

它仍然无法找到对象文件的引用。 编辑:这是我们的.cu文件。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>

extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );

2 个答案:

答案 0 :(得分:2)

  1. 不要#import。如果您想要包含cuda.h(这应该是不必要的),请使用#include。相反,我只会从您的super-resolution.cu文件中删除该行。
  2. 之前未显示的内容,但现在很明显,您super-resolution.cu中包含的lodepng.h 后来指定了C-linkage 2个功能:lodepng_decode32_filelodepng_encode32_file。当我尝试编译你的super-resolution.cu时,编译器给了我这样的错误(我不知道你为什么不看到它们):

    super-resolution.cu(8): error: linkage specification is incompatible with previous "lodepng_encode32_file"
    lodepng.h(184): here
    
    super-resolution.cu(9): error: linkage specification is incompatible with previous "lodepng_decode32_file"
    lodepng.h(134): here
    

    所以基本上你正在绊倒C和C ++的联系。

  3. 我认为最简单的解决方案是使用lodepng.cpp(而不是lodepng.c),从super-resolution.cu删除以下行:

    extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
    extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );
    

    只需编译所有内容并链接所有c ++样式:

    $ g++ -c lodepng.cpp
    $ nvcc -c super-resolution.cu
    nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
    $ nvcc -o super-resolution super-resolution.o lodepng.o
    nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
    $
    
  4. 如果你真的想要链接lodepng.o c风格而不是c ++风格,那么你需要用适当的lodepng.h包装器修改extern "C",其中调用必要的函数出。在我看来,这会变得混乱。

  5. 如果您想要删除有关sm_10的警告,请添加nvcc开关以针对其他架构进行编译,例如:

    nvcc -arch=sm_20 ...
    

    但请确保您选择的任何内容与您的GPU兼容。

答案 1 :(得分:1)

以下是代码的简单摘要。

可以从这里获取lodepng库(http://lodev.org/lodepng/)。

将其重命名为C将使其在C上可用。

即使在这个级别,还有

的编译问题
"undefined reference to `lodepng_decode32_file'"
"undefined reference to `lodepng_encode32_file'"

文件:Makefile

all:    gpu
    gcc -g -O -c lodepng.c
    nvcc -c super-resolution.cu
    nvcc -o super-resolution-cuda super-resolution.o lodepng.o
    rm -rf super-resolution.o
    rm -rf lodepng.o

文件:super-resolution.cu

#import "cuda.h"
#include "lodepng.h"

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>

extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );

//GPU 3x3 Blur.
__global__ void gpuBlur(unsigned char* image, unsigned char* buffer, int width, int height)
{
    int i = threadIdx.x%width;
    int j = threadIdx.x/width;
    if (i == 0 || j == 0 || i == width - 1 || j == height - 1)
        return;

    int k;
    for (k = 0; k <= 4; k++)
    {
        buffer[4*width*j + 4*i + k] =           (image[4*width*(j-1) + 4*(i-1) + k] +
                                image[4*width*(j-1) + 4*i + k] +
                                image[4*width*(j-1) + 4*(i+1) + k] +
                                image[4*width*j + 4*(i-1) + k] +
                                image[4*width*j + 4*i + k] +
                                image[4*width*j + 4*(i+1) + k] +
                                image[4*width*(j+1) + 4*(i-1) + k] +
                                image[4*width*(j+1) + 4*i + k] +
                                image[4*width*(j+1) + 4*(i+1) + k])/9;
    }
}

int main(int argc, char *argv[])
{
    //Items for image processing;
    //int threshold = 100;
    unsigned int error;
    unsigned char* image;
    unsigned int width, height;

    //Load the image;
    if (argc > 1)
    {
        error = lodepng_decode32_file(&image, &width, &height, argv[1]);
        printf("Loaded file: %s[%d]\n", argv[1], error);
    }
    else
    {

        return 0;
    }

    unsigned char* buffer =(unsigned char*)malloc(sizeof(char) * 4*width*height);

    //GPU Blur Section.
    unsigned char* image_gpu;
    unsigned char* blur_gpu;
    cudaMalloc( (void**) &image_gpu, sizeof(char) * 4*width*height);
    cudaMalloc( (void**) &blur_gpu, sizeof(char) * 4*width*height);
    cudaMemcpy(image_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
    cudaMemcpy(blur_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
    gpuBlur<<< 1, height*width >>> (image_gpu, blur_gpu, width, height);
    cudaMemcpy(buffer, blur_gpu, sizeof(char) * 4*width*height, cudaMemcpyDeviceToHost);
    //Spit out buffer as an image.
    error = lodepng_encode32_file("GPU_OUTPUT1_Blur.png", buffer, width, height);
    cudaFree(image_gpu);
    cudaFree(blur_gpu);

    free(buffer);
    free(image);

}