Question

我在使用C Cuda和lodepng库编译cuda项目时遇到了一些麻烦。

我的makefile看起来像这样。

gpu:    super-resolution.cu
    gcc -g -O -c lodepng.c
    nvcc -c super-resolution.cu
    nvcc -o super-resolution-cuda super-resolution.o 
    rm -rf super-resolution.o
    rm -rf lodepng.o

有人能告诉我我做错了什么，因为它抱怨

nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
parallel-algorithm/super-resolution.cu:238: undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'
parallel-algorithm/super-resolution.cu:259: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:269: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:282: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:292: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
parallel-algorithm/super-resolution.cu:301: undefined reference to `lodepng_encode32_file(char const*, unsigned char const*, unsigned int, unsigned int)'
...

我只需要一种方法来编译我的.cu文件，并在编译过程中使用nvcc将C .o文件添加到其中。

编辑：尝试过建议。没有成功。

gcc -g -O -c lodepng.c
nvcc -c super-resolution.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
 #import "cuda.h"
  ^
super-resolution.cu(106): warning: expression has no effect

super-resolution.cu(116): warning: expression has no effect

super-resolution.cu(141): warning: variable "y" was declared but never referenced

super-resolution.cu:1:2: warning: #import is a deprecated GCC extension [-Wdeprecated]
 #import "cuda.h"
  ^
super-resolution.cu(106): warning: expression has no effect

super-resolution.cu(116): warning: expression has no effect

super-resolution.cu(141): warning: variable "y" was declared but never referenced

ptxas /tmp/tmpxft_00000851_00000000-5_super-resolution.ptx, line 197; warning : Double is not supported. Demoting to float
nvcc -o super-resolution-cuda super-resolution.o lodepng.o

nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
super-resolution.o: In function `main':
tmpxft_00000851_00000000-3_super-resolution.cudafe1.cpp:(.text+0x5d): undefined reference to `lodepng_decode32_file(unsigned char**, unsigned int*, unsigned int*, char const*)'

它仍然无法找到对象文件的引用。编辑：这是我们的.cu文件。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>

extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );

Answer 1

不要#import。如果您想要包含cuda.h（这应该是不必要的），请使用#include。相反，我只会从您的super-resolution.cu文件中删除该行。
之前未显示的内容，但现在很明显，您super-resolution.cu中包含的lodepng.h和后来指定了C-linkage 2个功能：lodepng_decode32_file和lodepng_encode32_file。当我尝试编译你的super-resolution.cu时，编译器给了我这样的错误（我不知道你为什么不看到它们）：
```
super-resolution.cu(8): error: linkage specification is incompatible with previous "lodepng_encode32_file"
lodepng.h(184): here

super-resolution.cu(9): error: linkage specification is incompatible with previous "lodepng_decode32_file"
lodepng.h(134): here
```
所以基本上你正在绊倒C和C ++的联系。

我认为最简单的解决方案是使用lodepng.cpp（而不是lodepng.c），从super-resolution.cu删除以下行：

extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );

只需编译所有内容并链接所有c ++样式：

$ g++ -c lodepng.cpp
$ nvcc -c super-resolution.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
$ nvcc -o super-resolution super-resolution.o lodepng.o
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
$

如果你真的想要链接lodepng.o c风格而不是c ++风格，那么你需要用适当的lodepng.h包装器修改extern "C"，其中调用必要的函数出。在我看来，这会变得混乱。
如果您想要删除有关sm_10的警告，请添加nvcc开关以针对其他架构进行编译，例如：
```
nvcc -arch=sm_20 ...
```
但请确保您选择的任何内容与您的GPU兼容。

Answer 2

以下是代码的简单摘要。

可以从这里获取lodepng库（http://lodev.org/lodepng/）。

将其重命名为C将使其在C上可用。

即使在这个级别，还有

的编译问题

"undefined reference to `lodepng_decode32_file'"
"undefined reference to `lodepng_encode32_file'"

文件：Makefile

all:    gpu
    gcc -g -O -c lodepng.c
    nvcc -c super-resolution.cu
    nvcc -o super-resolution-cuda super-resolution.o lodepng.o
    rm -rf super-resolution.o
    rm -rf lodepng.o

文件：super-resolution.cu

#import "cuda.h"
#include "lodepng.h"

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdio>

extern "C" unsigned lodepng_encode32_file(const char* ,const unsigned char* , unsigned , unsigned h);
extern "C" unsigned lodepng_decode32_file(unsigned char** , unsigned* , unsigned* ,const char* );

//GPU 3x3 Blur.
__global__ void gpuBlur(unsigned char* image, unsigned char* buffer, int width, int height)
{
    int i = threadIdx.x%width;
    int j = threadIdx.x/width;
    if (i == 0 || j == 0 || i == width - 1 || j == height - 1)
        return;

    int k;
    for (k = 0; k <= 4; k++)
    {
        buffer[4*width*j + 4*i + k] =           (image[4*width*(j-1) + 4*(i-1) + k] +
                                image[4*width*(j-1) + 4*i + k] +
                                image[4*width*(j-1) + 4*(i+1) + k] +
                                image[4*width*j + 4*(i-1) + k] +
                                image[4*width*j + 4*i + k] +
                                image[4*width*j + 4*(i+1) + k] +
                                image[4*width*(j+1) + 4*(i-1) + k] +
                                image[4*width*(j+1) + 4*i + k] +
                                image[4*width*(j+1) + 4*(i+1) + k])/9;
    }
}

int main(int argc, char *argv[])
{
    //Items for image processing;
    //int threshold = 100;
    unsigned int error;
    unsigned char* image;
    unsigned int width, height;

    //Load the image;
    if (argc > 1)
    {
        error = lodepng_decode32_file(&image, &width, &height, argv[1]);
        printf("Loaded file: %s[%d]\n", argv[1], error);
    }
    else
    {

        return 0;
    }

    unsigned char* buffer =(unsigned char*)malloc(sizeof(char) * 4*width*height);

    //GPU Blur Section.
    unsigned char* image_gpu;
    unsigned char* blur_gpu;
    cudaMalloc( (void**) &image_gpu, sizeof(char) * 4*width*height);
    cudaMalloc( (void**) &blur_gpu, sizeof(char) * 4*width*height);
    cudaMemcpy(image_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
    cudaMemcpy(blur_gpu,image, sizeof(char) * 4*width*height, cudaMemcpyHostToDevice);
    gpuBlur<<< 1, height*width >>> (image_gpu, blur_gpu, width, height);
    cudaMemcpy(buffer, blur_gpu, sizeof(char) * 4*width*height, cudaMemcpyDeviceToHost);
    //Spit out buffer as an image.
    error = lodepng_encode32_file("GPU_OUTPUT1_Blur.png", buffer, width, height);
    cudaFree(image_gpu);
    cudaFree(blur_gpu);

    free(buffer);
    free(image);

}

编译cuda项目时出错

2 个答案: