为什么我需要可分离的编译?

时间:2018-05-17 11:49:40

标签: cmake cuda

我有如下所示的代码。据我所知,

时必须打开可分离的编译
  1. CUDA设备代码分为.h和.cu文件
  2. 将ObjectA的设备代码用于对象的B设备代码
  3. 然而,在我的主要功能中,我没有上述任何情况。你能告诉我为什么我要为这个示例项目设置可分离的编译?

    BitHelper.h

    #pragma once
    #include <cuda_runtime.h>
    
    #define COMPILE_TARGET __host__ __device__
    
    class BitHelper
    {
    public:
        COMPILE_TARGET BitHelper();
        COMPILE_TARGET ~BitHelper();
    
        COMPILE_TARGET static void clear(unsigned int& val0);
    };
    

    BitHelper.cu

    #include "bithelper.h"
    
    BitHelper::BitHelper()
    {}
    
    BitHelper::~BitHelper()
    {}
    
    void BitHelper::clear(unsigned int& val0)
    {
        val0 = 0x0000;
    }
    

    Consume_BitHelper.h

    #pragma once
    
    class Consume_BitHelper
    {
    public:
        void apply();
    
    private:
        bool test_cpu();
        bool test_gpu();
    };
    

    Consume_BitHelper.cu

    #include "consume_bithelper.h"
    
    #include <cuda_runtime.h>
    #include <iostream>
    
    #include "bithelper.h"
    
    __global__
    void myKernel()
    {
        unsigned int FLAG_VALUE = 0x2222;
        printf("GPU before: %d\n", FLAG_VALUE);
        BitHelper::clear(FLAG_VALUE);
        printf("GPU after: %d\n", FLAG_VALUE);
    }
    
    void Consume_BitHelper::apply()
    {
        test_cpu();
        test_gpu();
        cudaDeviceSynchronize();
    }
    
    bool Consume_BitHelper::test_cpu()
    {
        std::cout << "TEST CPU" << std::endl;
        unsigned int FLAG_VALUE = 0x1111;
        std::cout << "CPU before: " << FLAG_VALUE << std::endl;
        BitHelper::clear(FLAG_VALUE);
        std::cout << "CPU after : " << FLAG_VALUE << std::endl;
        return true;
    }
    
    bool Consume_BitHelper::test_gpu()
    {
        std::cout << "TEST GPU" << std::endl;
        myKernel << <1, 1 >> > ();
        return true;
    }
    

    main.cu

    #include "consume_bithelper.h"
    #include "bithelper.h"
    
    #include <iostream>
    
    int main(int argc, char** argv)
    {
        Consume_BitHelper cbh;
        cbh.apply();
    
        std::cout << "\nPress any key to continue...";
        std::cin.get();
    
        return 0;
    }
    

    的CMakeLists.txt

    cmake_minimum_required(VERSION 3.10)
    
    project(cuda_class LANGUAGES CXX CUDA)
    
    #BitHelper needs separable compilation because we have separated declaration from definition
    add_library(bithelper_lib STATIC bithelper.cu)
    set_property(TARGET bithelper_lib PROPERTY CUDA_SEPARABLE_COMPILATION ON)
    
    #Consume_BitHelper needs separable compilation because we call BitHelper's device code
    #from Consume_BitHelper's kernel
    add_library(consume_bithelper_lib STATIC consume_bithelper.cu)
    set_property(TARGET consume_bithelper_lib PROPERTY CUDA_SEPARABLE_COMPILATION ON)
    target_link_libraries(consume_bithelper_lib bithelper_lib)
    
    #We only call CPU code so no need of separable compilation?
    add_executable(${PROJECT_NAME} main.cu)
    target_link_libraries(${PROJECT_NAME} bithelper_lib consume_bithelper_lib)
    

    我得到的错误是这些enter image description here

    修改

    根据 Robert Crovella 的帖子Consume_BitHelper.cu使用单独的编译单元中定义的BitHelper::clear

    1. 这是否意味着我必须为BitHelper激活单独编译?
    2. 由于单独的编译只能与从设备代码调用的设备代码有关。

      1. 为什么在cuda_class没有单独编译时会出现上述错误? (这是从CMake创建的可执行文件,不会调用任何设备代码)

1 个答案:

答案 0 :(得分:0)

可分离的编译与编译器处理函数调用的方式有关。作为一点开销的交换,您可以进行真正的函数调用,从而从其他“编译单元”(即.cu源文件)访问代码。

由于GPU程序员痴迷于性能(尤其是在启用可分离编译时使用的额外寄存器),Nvidia将其作为一个选项而不是默认选项。

你应该只需要.cu文件的可分离编译来访问其他.cu文件中定义的函数/全局变量。