Question

亲爱的StackOverflowers，

我在Microsoft Visual Studio C ++ 2012上编写了一段简单的代码：

int add(int x, int y)
{
    return x + y;
}

typedef int (*func_t)(int, int);

class A
{
public:
    const static func_t FP;
};

const func_t A::FP = &add;

int main()
{
 int x = 3;
 int y = 2;
 int z = A::FP(x, y);
 return 0;
}

编译器生成以下代码：

int main()
{
000000013FBA2430  sub         rsp,28h  
int x = 3;
int y = 2;
int z = A::FP(x, y);
000000013FBA2434  mov         edx,2  
000000013FBA2439  lea         ecx,[rdx+1]  
000000013FBA243C  call        qword ptr [A::FP (013FBA45C0h)]  
return 0;
000000013FBA2442  xor         eax,eax
}

我在内联函数扩展的'完全优化'（/ Obx标志）和'任何适合'上编译了这个。（/ Ob2标志）

我想知道为什么编译器不会内联这个调用，特别是因为它是const。你们中有没有人知道它为什么没有内联，是否可以让编译器内联呢？

基督教

编辑：我现在正在运行一些测试，MSVC在以下情况下也无法内联函数指针：

- 我将const指针移出类并使其成为全局。

- 我将const指针移出类，并将其置于main中。

- 我将指针设为非常量并在本地移动。

- 当我返回类型为void并且没有参数时

我开始相信Microsoft Visual Studio根本无法内联函数指针......

Answer 1

问题不在于内联，编译器会抓住每一个机会。问题是Visual C ++似乎没有意识到指针变量实际上是一个编译时常量。

测试情况：

// function_pointer_resolution.cpp : Defines the entry point for the console application.
//

extern void show_int( int );

extern "C" typedef int binary_int_func( int, int );

extern "C" binary_int_func sum;
extern "C" binary_int_func* const sum_ptr = sum;

inline int call( binary_int_func* binary, int a, int b ) { return (*binary)(a, b); }

template< binary_int_func* binary >
inline int callt( int a, int b ) { return (*binary)(a, b); }

int main( void )
{
    show_int( sum(1, 2) );
    show_int( call(&sum, 3, 4) );
    show_int( callt<&sum>(5, 6) );
    show_int( (*sum_ptr)(1, 7) );
    show_int( call(sum_ptr, 3, 8) );
//  show_int( callt<sum_ptr>(5, 9) );
    return 0;
}

// sum.cpp
extern "C" int sum( int x, int y )
{
    return x + y;
}

// show_int.cpp
#include <iostream>

void show_int( int n )
{
    std::cout << n << std::endl;
}

将函数分成多个编译单元，以便更好地控制内联。具体来说，我不希望show_int内联，因为它会使汇编代码变得混乱。

第一个麻烦的问题是Visual C ++拒绝了有效代码（注释行）。 G++ has no problem with it，但Visual C ++抱怨“预期的编译时常量表达式”。这实际上是对未来所有行为的良好预测。

启用优化和正常编译语义（无跨模块内联），编译器生成：

_main   PROC                        ; COMDAT

; 18   :    show_int( sum(1, 2) );

    push    2
    push    1
    call    _sum
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int

; 19   :    show_int( call(&sum, 3, 4) );

    push    4
    push    3
    call    _sum
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int

; 20   :    show_int( callt<&sum>(5, 6) );

    push    6
    push    5
    call    _sum
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int

; 21   :    show_int( (*sum_ptr)(1, 7) );

    push    7
    push    1
    call    DWORD PTR _sum_ptr
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int

; 22   :    show_int( call(sum_ptr, 3, 8) );

    push    8
    push    3
    call    DWORD PTR _sum_ptr
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int
    add esp, 60                 ; 0000003cH

; 23   :    //show_int( callt<sum_ptr>(5, 9) );
; 24   :    return 0;

    xor eax, eax

; 25   : }

    ret 0
_main   ENDP

使用sum_ptr和不使用sum_ptr之间存在巨大差异。使用sum_ptr的语句生成间接函数调用call DWORD PTR _sum_ptr，而所有其他语句生成直接函数调用call _sum，即使源代码使用函数指针。

如果我们现在通过编译function_pointer_resolution.cpp和sum.cpp与/GL并与/LTCG链接来启用内联，我们发现编译器内联所有直接调用。间接通话保持原样。

_main   PROC                        ; COMDAT

; 18   :    show_int( sum(1, 2) );

    push    3
    call    ?show_int@@YAXH@Z           ; show_int

; 19   :    show_int( call(&sum, 3, 4) );

    push    7
    call    ?show_int@@YAXH@Z           ; show_int

; 20   :    show_int( callt<&sum>(5, 6) );

    push    11                  ; 0000000bH
    call    ?show_int@@YAXH@Z           ; show_int

; 21   :    show_int( (*sum_ptr)(1, 7) );

    push    7
    push    1
    call    DWORD PTR _sum_ptr
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int

; 22   :    show_int( call(sum_ptr, 3, 8) );

    push    8
    push    3
    call    DWORD PTR _sum_ptr
    push    eax
    call    ?show_int@@YAXH@Z           ; show_int
    add esp, 36                 ; 00000024H

; 23   :    //show_int( callt<sum_ptr>(5, 9) );
; 24   :    return 0;

    xor eax, eax

; 25   : }

    ret 0
_main   ENDP

底线：是的，编译器通过编译时常量函数指针进行内联调用，只要该函数指针不是从变量中读取的。这样使用函数指针得到优化：

call(&sum, 3, 4);

但这没有：

(*sum_ptr)(1, 7);

所有测试都使用Visual C ++ 2010 Service Pack 1运行，针对x86进行编译，托管在x64上。

Microsoft（R）32位C / C ++优化编译器版本16.00.40219.01 for 80x86

Answer 2

我认为你在这个结论中是对的：“......根本不能内联函数指针”。

这个非常简单的例子也打破了优化：

static inline
int add(int x, int y)
{
    return x + y;
}

int main()
{
    int x = 3;
    int y = 2;
    auto q = add;
    int z = q(x, y);
    return z;
}

您的示例对于编译器来说甚至更复杂，因此这并不奇怪。

Answer 3

您可以尝试__forceinline。没有人能够准确地告诉你为什么没有内联。然而，常识告诉我它应该是。 / O2应该支持代码速度超过代码大小（内联）...奇怪。

Answer 4

这不是一个真正的答案，而是一个“可能解决方法”：来自微软的STL曾经提到lambda比你更容易内联，所以你可以试试。

作为一个琐事，Bjarne经常提到那种qsort更快，因为qsort占用了函数ptr，但是像其他人一样注意到gcc没有问题内联它们......所以也许Bjarne应该尝试gcc：P

Visual C ++〜不内联简单的const函数指针调用

4 个答案: