Question

我了解 explicit template instantiation 的用途以及extern template语法。这样做的目的是确保仅在一个翻译单元中实例化某个模板，从而有可能减少编译时间。

但是我对这实际上如何工作感到困惑。 extern template指示编译器不要隐式实例化当前翻译单元中的模板，并承诺它将在链接时找到在其他翻译单元中显式实例化的必要模板。但是编译器如何避免同时实例化模板？

例如，说我们有：

template <class T>
struct Foo { T value; };

extern template class Foo<int>;

现在让我们说，在相同翻译单元中，我们执行以下操作：

Foo<int> f;
// now do lots of stuff with f

在这一点上，由于有extern template class语句，不允许编译器隐式实例化Foo<int>。但是，如何为当前翻译单元生成任何代码呢？它甚至需要实例化Foo<int>才能知道Foo<int>的大小，以便甚至知道在堆栈上声明Foo<int> f时向上移动指针的量。

此外，Foo<int>可能具有各种嵌套的typedef或依赖于类型T的各种成员函数，如果不隐式实例化Foo<int>，就不可能编译它们。

那么这如何工作？编译此翻译单元时，编译器是否只是 not 不生成任何包含Foo<int>的代码？然后，在链接器阶段之后，它在其他翻译单元中找到Foo<int>的显式实例化之后，是否返回并将必要的代码拼接到目标文件中？

如果是这样，这是否也不意味着使用extern template的副作用可能会增加链接器时间，因为很多代码生成必须在链接时而不是在编译时进行？ / p>

Answer 1

实际上非常简单。这是定义模板类foo<T>的头文件：

foo.hpp

#ifndef FOO_HPP
#define FOO_HPP

template<typename T>
struct foo
{
    T const & get() const {
        return _t;
    }
    void set(T const & t) {
        _t = t;
    }

private:
    T _t;
}

#endif

这是一个源文件，用于显式实例化类foo<int>的定义：

foo_int.cpp

#include "foo.hpp"

// An explicit instantiation definition
template struct foo<int>;

当我们将foo_int.cpp编译为foo_int.o时，该目标文件将定义所有实例化foo<int>产生的符号：

$ g++ -Wall -Wextra -pedantic -c foo_int.cpp

$ nm --defined-only foo_int.o
0000000000000000 W _ZN3fooIiE3setERKi
0000000000000000 W _ZNK3fooIiE3getEv

具有拆解功能的是：

$ nm -C --defined-only foo_int.o
0000000000000000 W foo<int>::set(int const&)
0000000000000000 W foo<int>::get() const

（请注意，符号已定义weakly -W-就像隐式实例化的结果一样。还要注意编译器认为根本不需要为任何隐式默认值生成任何定义。特殊的成员函数。）

这是一个头文件，声明一个foo<int>的显式实例，例如正如我们在foo_int.o中定义的那样：

foo_int.hpp

#ifndef FOO_INT_HPP
#define FOO_INT_HPP

#include "foo.hpp"

// An explicit instantiation declaration
extern template struct foo<int>;

#endif

这是一个源文件，它引用 foo<int>的显式实例我们在foo_int.hpp中声明的内容：

make_foo_int.cpp

#include "make_foo_int.hpp"

foo<int> make_foo_int(int i)
{
    foo<int> fi;
    fi.set(i);
    return fi;
}

和关联的头文件：

make_foo_int.hpp

#ifndef MAKE_FOO_INT_HPP
#define MAKE_FOO_INT_HPP
#include "foo_int.hpp"

foo<int> make_foo_int(int i = 0);

#endif

请注意，make_foo_int.cpp 是令人困惑的翻译单元您。 #include个make_foo_int.hpp，#include个foo_int.hpp，其中#include个foo.hpp-模板定义。然后“做” foo<int>。

当我们将make_foo_int.cpp编译为make_foo_int.o时，该目标文件将仅包含未定义的引用，这些符号源自 foo<int>的实例化：

$ g++ -Wall -Wextra -pedantic -c make_foo_int.cpp

$ nm -C --defined-only make_foo_int.o
0000000000000000 T make_foo_int(int)

$ nm -C --undefined-only make_foo_int.o
                 U _GLOBAL_OFFSET_TABLE_
                 U __stack_chk_fail
                 U foo<int>::set(int const&)

编译此翻译单元时，编译器是否不会简单地生成任何涉及Foo<int>的代码？

编译器会生成对未定义外部函数 foo<int>::set(int const&)的调用。这是组装：

make_foo_int.s

    .file   "make_foo_int.cpp"
    .text
    .globl  _Z12make_foo_inti
    .type   _Z12make_foo_inti, @function
_Z12make_foo_inti:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movl    %edi, -20(%rbp)
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    leaq    -20(%rbp), %rdx
    leaq    -12(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZN3fooIiE3setERKi@PLT
    movl    -12(%rbp), %eax
    movq    -8(%rbp), %rcx
    xorq    %fs:40, %rcx
    je  .L3
    call    __stack_chk_fail@PLT
.L3:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   _Z12make_foo_inti, .-_Z12make_foo_inti
    .ident  "GCC: (Ubuntu 8.2.0-7ubuntu1) 8.2.0"
    .section    .note.GNU-stack,"",@progbits

int其中：

call    _ZN3fooIiE3setERKi@PLT

是通过过程查找表对foo<int>::set(int const&)的调用，就像它可能会生成对任何未定义的外部函数的调用一样在 linktime 处解决。

现在这是调用make_foo_int以及foo<int>::get的程序的源文件：

main.cpp

#include "make_foo_int.hpp"
#include <iostream>

int main()
{
    std::cout << make_foo_int(42).get() << std::endl;
    return 0;
}

如果我们编译main.cpp，则目标文件还将仅包含未定义的引用 由foo<int>的实例化产生的符号：

$ g++ -Wall -Wextra -pedantic -c main.cpp

$ nm -C --defined-only main.o | grep foo; echo Done
Done

$ nm -C --undefined-only main.o | grep foo; echo Done
                 U make_foo_int(int)
                 U foo<int>::get() const
Done

如果我们尝试仅使用main.o和make_foo_int.o链接程序：

$ g++ -o prog main.o make_foo_int.o
/usr/bin/ld: main.o: in function `main':
main.cpp:(.text+0x2c): undefined reference to `foo<int>::get() const'
/usr/bin/ld: make_foo_int.o: in function `make_foo_int(int)':
make_foo_int.cpp:(.text+0x29): undefined reference to `foo<int>::set(int const&)'
collect2: error: ld returned 1 exit status

由于未定义对foo<int>::get()和foo<int>::set(int const&)的引用而失败。

如果我们通过添加必要的foo_int.o重新链接，并要求链接器报告这些符号的引用和定义：

$ g++ -o prog main.o make_foo_int.o foo_int.o -Wl,-trace-symbol=_ZN3fooIiE3setERKi,-trace-symbol=_ZNK3fooIiE3getEv
/usr/bin/ld: main.o: reference to _ZNK3fooIiE3getEv
/usr/bin/ld: make_foo_int.o: reference to _ZN3fooIiE3setERKi
/usr/bin/ld: foo_int.o: definition of _ZNK3fooIiE3getEv
/usr/bin/ld: foo_int.o: definition of _ZN3fooIiE3setERKi

我们成功了，发现链接器在foo<int>::get()中找到了对main.o的引用， foo<int>::set(int const&)中对make_foo_int.o的引用，以及 foo_int.o中两个符号的定义。 foo<int>已实例化在foo_int.o中只有一次。

稍后...

根据您的评论，您仍然看不到make_foo_int(int)函数的功能仅出于目的而已编译而没有实例化foo<int>的编译器计算定义的自动对象foo<int> fi的大小该函数中的内容将占据堆栈。

更好地解决这个问题，我首先需要指出一个可能不足的观点在我注意到显式实例化之前，先清除以下内容：

template struct foo<int>;

foo_int.cpp中的

仅生成定义的成员函数的定义模板，如下所示：

$ nm -C --defined-only foo_int.o
0000000000000000 W foo<int>::set(int const&)
0000000000000000 W foo<int>::get() const

，并且不会生成隐式默认特殊成员的定义该类-构造函数等。

因此，非常像您的问题是：如何在不编译器至少实例化默认构造函数的情况下编译函数make_foo_int(int) 执行者：

foo<int> fi;

？答案是：它像通常那样，以内联方式实例化该构造函数。（至少，如果构造函数不是no-op，它将起作用）。但这只是因为 我们没有在我们显式实例化的模板中定义该构造函数在 foo_int.cpp中。

我们也稍微更改一下模板：

foo.hpp（2）

#ifndef FOO_HPP
#define FOO_HPP

template<typename T>
struct foo
{
    T const & get() const {
        return _t;
    }
    void set(T const & t) {
        _t = t;
    }

private:
    T _t = 257;  // <- Default initializer
};

#endif

然后重新编译make_foo_int.cpp，保存程序集：

$ g++ -Wall -Wextra -pedantic -c make_foo_int.cpp -save-temps

现在可以清楚地看到默认构造函数foo<int>() 是内联的，而foo<int>::set(T const &)是从外部调用的：

make_foo_int.s（2）

    .file   "make_foo_int.cpp"
    .text
    .globl  _Z12make_foo_inti
    .type   _Z12make_foo_inti, @function
_Z12make_foo_inti:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movl    %edi, -20(%rbp)
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    movl    $257, -12(%rbp) ; <- Default initializer
    leaq    -20(%rbp), %rdx
    leaq    -12(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZN3fooIiE3setERKi@PLT  ; <- External call
    movl    -12(%rbp), %eax
    movq    -8(%rbp), %rcx
    xorq    %fs:40, %rcx
    je  .L3
    call    __stack_chk_fail@PLT
.L3:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   _Z12make_foo_inti, .-_Z12make_foo_inti
    .ident  "GCC: (Ubuntu 8.2.0-7ubuntu1) 8.2.0"
    .section    .note.GNU-stack,"",@progbits

编译器可以像往常一样内联内联的任何特殊成员函数我们尚未在模板中定义foo<int>，因为该模板看到以下内容时，该定义必须可用：

extern template struct foo<int>;

我们可以通过将foo_int.hpp更改为：

foo_int.hpp（2）

#ifndef FOO_INT_HPP
#define FOO_INT_HPP

//#include "foo.hpp"  <- Hide the template definition

template <typename T> struct foo;

// An explicit instantiation declaration
extern template struct foo<int>;

#endif

并尝试：

$ g++ -Wall -Wextra -pedantic -c make_foo_int.cpp -save-temps
In file included from make_foo_int.hpp:3,
                 from make_foo_int.cpp:1:
foo_int.hpp:9:24: error: explicit instantiation of ‘struct foo<int>’ before definition of template
 extern template struct foo<int>;
                        ^~~~~~~~

因此，可以肯定地说编译器“至少部分实例化了foo<int>” 在make_foo_int.o中。但是它仅实例化一个部分-默认构造函数-

未被外部参考

 extern template struct foo<int>;

并且未提供该默认构造函数，因为我们未在 template struct foo<T>。

如果我们要做在模板中定义构造函数，请说：

foo.hpp（3）

#ifndef FOO_HPP
#define FOO_HPP

template<typename T>
struct foo
{
    foo()
    : _t{257}{}
    foo(foo const & other)
    : _t{other._t}{}
    T const & get() const {
        return _t;
    }
    void set(T const & t) {
        _t = t;
    }

private:
    T _t;
};

#endif

然后我们将在foo_int.o中找到它们：

$ g++ -Wall -Wextra -pedantic -c foo_int.cpp
$ nm -C foo_int.o
0000000000000000 W foo<int>::set(int const&)
0000000000000000 W foo<int>::foo(foo<int> const&)
0000000000000000 W foo<int>::foo()
0000000000000000 W foo<int>::foo(foo<int> const&)
0000000000000000 W foo<int>::foo()
0000000000000000 n foo<int>::foo(foo<int> const&)
0000000000000000 n foo<int>::foo()
0000000000000000 W foo<int>::get() const

（看起来好像它们是 multiply 定义的，但这是一种错觉和干扰！¹）。如果我们用make_foo_int.cpp 3 和我们的原始foo.hpp重新编译foo_int.hpp：并检查新装配：

$ g++ -Wall -Wextra -pedantic -O0 -c make_foo_int.cpp -save-temps
$ mv make_foo_int.s make_foo_int.s.before   # Save that for later
$ cat make_foo_int.s.before
    .file   "make_foo_int.cpp"
    .text
    .globl  _Z12make_foo_inti
    .type   _Z12make_foo_inti, @function
_Z12make_foo_inti:
.LFB4:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -24(%rbp)
    movl    %esi, -28(%rbp)
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    movq    -24(%rbp), %rax
    movq    %rax, %rdi
    call    _ZN3fooIiEC1Ev@PLT      ; <- External ctor call
    leaq    -28(%rbp), %rdx
    movq    -24(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZN3fooIiE3setERKi@PLT  ; <- External `set` call
    nop
    movq    -24(%rbp), %rax
    movq    -8(%rbp), %rcx
    xorq    %fs:40, %rcx
    je  .L3
    call    __stack_chk_fail@PLT
.L3:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE4:
    .size   _Z12make_foo_inti, .-_Z12make_foo_inti
    .ident  "GCC: (Ubuntu 8.2.0-7ubuntu1) 8.2.0"
    .section    .note.GNU-stack,"",@progbits

我们现在看到默认构造函数_ZN3fooIiEC1E也是如此因为set成员函数_ZN3fooIiE3setERKi在外部被调用。

重新链接我们的原始程序，它将运行：

$ g++ -Wall -Wextra -pedantic -O0 -o prog main.cpp make_foo_int.cpp foo_int.cpp
$ ./prog
42

最终为我们准备的问题是：编译器如何知道大小 foo<int> fi的对象以编译函数make_foo_int，而无需实例化foo<int>？

正如make_foo_int.s.before所表明的，编译器不需要计算大小任何这样的对象，因为在它生成的代码中不存在这样的对象。 C ++ 类和类的实例在汇编和目标代码中是未知的。在对象中代码，只有基本整数或浮点数的函数和对象从一开始就知道大小的所有类型。使用0个或多个参数执行函数；它可能作用于物体驻留在堆栈，堆或静态存储中的那些基本类型中的哪些，以及它（通常）将控制权返回到先前的上下文。 C ++语句：

foo<int> fi;

make_foo_int正文中的

不能从字面上编译为放置对象 fi在堆栈上。编译为默认构造函数的执行 foo<int>中的-可能是内联的，可能是在外部调用的；没关系-哪个地方一个整数= 257在其堆栈上，并完成将该整数保留在堆栈上它的调用者。与以往一样，调用者不需要知道被调用者的堆栈净消耗。我们可以（相当疯狂）重新定义template struct foo<T>，使foo<int>成为1000 倍大：

foo.hpp（4）

#ifndef FOO_HPP
#define FOO_HPP

template<typename T>
struct foo
{
    foo() {
        for (unsigned i = 0; i < 1000; ++i) {
            _t[i] = 257;
        }
    }
    foo(foo const & other) {
        for (unsigned i = 0; i < 1000; ++i) {
            _t[i] = other._t[i];
        }
    }
    T const & get() const {
        return _t[999];
    }
    void set(T const & t) {
        _t[0] = t;
    }

private:
    T _t[1000];
};

#endif

然后重新编译make_foo_int.cpp：

$ g++ -Wall -Wextra -pedantic -O0 -c make_foo_int.cpp -save-temps
$ mv make_foo_int.s make_foo_int.s.after

，它与make_foo_int.o的汇编完全没有区别：

$ diff make_foo_int.s.before make_foo_int.s.after; echo Done
Done

$ cat make_foo_int.s.after
    .file   "make_foo_int.cpp"
    .text
    .globl  _Z12make_foo_inti
    .type   _Z12make_foo_inti, @function
_Z12make_foo_inti:
.LFB4:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -24(%rbp)
    movl    %esi, -28(%rbp)
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    movq    -24(%rbp), %rax
    movq    %rax, %rdi
    call    _ZN3fooIiEC1Ev@PLT
    leaq    -28(%rbp), %rdx
    movq    -24(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZN3fooIiE3setERKi@PLT
    nop
    movq    -24(%rbp), %rax
    movq    -8(%rbp), %rcx
    xorq    %fs:40, %rcx
    je  .L3
    call    __stack_chk_fail@PLT
.L3:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE4:
    .size   _Z12make_foo_inti, .-_Z12make_foo_inti
    .ident  "GCC: (Ubuntu 8.2.0-7ubuntu1) 8.2.0"
    .section    .note.GNU-stack,"",@progbits

尽管它对我们的程序有所帮助：

$ g++ -Wall -Wextra -pedantic -O0 -o prog main.cpp make_foo_int.cpp foo_int.cpp
$ ./prog
257

我欣然接受我的开场白，即“实际上很简单”：）

[1]输出：

$ nm -C foo_int.o
0000000000000000 W foo<int>::set(int const&)
0000000000000000 W foo<int>::foo(foo<int> const&)
0000000000000000 W foo<int>::foo()
0000000000000000 W foo<int>::foo(foo<int> const&)
0000000000000000 W foo<int>::foo()
0000000000000000 n foo<int>::foo(foo<int> const&)
0000000000000000 n foo<int>::foo()
0000000000000000 W foo<int>::get() const

似乎说每个构造函数都有两个弱全局定义并且另外定义为comdat符号！但是如果我们禁用分解这种外观消失了：

$ nm foo_int.o
0000000000000000 W _ZN3fooIiE3setERKi
0000000000000000 W _ZN3fooIiEC1ERKS0_
0000000000000000 W _ZN3fooIiEC1Ev
0000000000000000 W _ZN3fooIiEC2ERKS0_
0000000000000000 W _ZN3fooIiEC2Ev
0000000000000000 n _ZN3fooIiEC5ERKS0_
0000000000000000 n _ZN3fooIiEC5Ev
0000000000000000 W _ZNK3fooIiE3getEv

，我们看到所有符号实际上都是不同的。 ABI重整映射所有三个：

_ZN3fooIiEC1ERKS0_
_ZN3fooIiEC2ERKS0_
_ZN3fooIiEC5ERKS0_

到foo<int>::foo(foo<int> const&)，以及类似的所有内容：

_ZN3fooIiEC1Ev
_ZN3fooIiEC2Ev
_ZN3fooIiEC5Ev

到foo<int>::foo()。在用于编译这些构造函数的GCC配方中，包含C1和C2的符号变体实际上是等价但在逻辑上在ABI spec中加以区分，并且 C5的变体只是为编译器在其中命名的节组命名将在其中定义构造函数的函数部分放置。

extern模板实际如何生成代码？

1 个答案: