为什么编译器测试地址中的最低有效位?

时间:2018-11-21 22:23:26

标签: c++

https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp中考虑此功能:

FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
    int i = 0;
    FX_DWORD opid = 0;
    while (i < 4 && op[i]) {
        opid = (opid << 8) + op[i];
        i ++;
    }
    while (i < 4) {
        opid <<= 8;
        i ++;
    };
    int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
    while (low <= high) {
        int middle = (low + high) / 2;
        int compare = opid - g_OpCodes[middle].m_OpId;
        if (compare == 0) {
            (this->*g_OpCodes[middle].m_OpHandler)();
            return TRUE;
        } else if (compare < 0) {
            high = middle - 1;
        } else {
            low = middle + 1;
        }
    }
    return m_CompatCount != 0;
}

此功能在FoxitReader 2.4中使用,编译为:

Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
   0x0000000000bc71fe <+0>: xor    edx,edx
   0x0000000000bc7200 <+2>: xor    eax,eax
   0x0000000000bc7202 <+4>: movsx  r8d,BYTE PTR [rsi+rdx*1]
   0x0000000000bc7207 <+9>: mov    ecx,edx
   0x0000000000bc7209 <+11>:    test   r8b,r8b
   0x0000000000bc720c <+14>:    je     0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
   0x0000000000bc720e <+16>:    shl    eax,0x8
   0x0000000000bc7211 <+19>:    inc    rdx
   0x0000000000bc7214 <+22>:    add    eax,r8d
   0x0000000000bc7217 <+25>:    cmp    rdx,0x4
   0x0000000000bc721b <+29>:    jne    0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
   0x0000000000bc721d <+31>:    mov    ecx,0x4
   0x0000000000bc7222 <+36>:    cmp    ecx,0x4
   0x0000000000bc7225 <+39>:    je     0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
   0x0000000000bc7227 <+41>:    shl    eax,0x8
   0x0000000000bc722a <+44>:    inc    ecx
   0x0000000000bc722c <+46>:    jmp    0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
   0x0000000000bc722e <+48>:    lea    r10,[rip+0x180f43b]        # 0x23d6670 <_ZL9g_OpCodes>
   0x0000000000bc7235 <+55>:    mov    cl,0x48
   0x0000000000bc7237 <+57>:    xor    esi,esi
   0x0000000000bc7239 <+59>:    lea    edx,[rsi+rcx*1]
   0x0000000000bc723c <+62>:    sar    edx,1
   0x0000000000bc723e <+64>:    movsxd r9,edx
   0x0000000000bc7241 <+67>:    imul   r8,r9,0x18
   0x0000000000bc7245 <+71>:    add    r8,r10
   0x0000000000bc7248 <+74>:    cmp    eax,DWORD PTR [r8]
   0x0000000000bc724b <+77>:    jne    0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
   0x0000000000bc724d <+79>:    push   rcx
   0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
   0x0000000000bc7252 <+84>:    test   al,0x1
   0x0000000000bc7254 <+86>:    je     0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
   0x0000000000bc7256 <+88>:    mov    rdx,QWORD PTR [r8+0x10]
   0x0000000000bc725a <+92>:    mov    rdx,QWORD PTR [rdi+rdx*1]
   0x0000000000bc725e <+96>:    mov    rax,QWORD PTR [rdx+rax*1-0x1]
   0x0000000000bc7263 <+101>:   imul   r9,r9,0x18
   0x0000000000bc7267 <+105>:   lea    rdx,[rip+0x180f402]        # 0x23d6670 <_ZL9g_OpCodes>
   0x0000000000bc726e <+112>:   add    rdi,QWORD PTR [rdx+r9*1+0x10]
   0x0000000000bc7273 <+117>:   call   rax
   0x0000000000bc7275 <+119>:   mov    eax,0x1
   0x0000000000bc727a <+124>:   pop    rdx
   0x0000000000bc727b <+125>:   ret    
   0x0000000000bc727c <+126>:   jns    0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
   0x0000000000bc727e <+128>:   lea    ecx,[rdx-0x1]
   0x0000000000bc7281 <+131>:   jmp    0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
   0x0000000000bc7283 <+133>:   lea    esi,[rdx+0x1]
   0x0000000000bc7286 <+136>:   cmp    esi,ecx
   0x0000000000bc7288 <+138>:   jle    0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
   0x0000000000bc728a <+140>:   xor    eax,eax
   0x0000000000bc728c <+142>:   cmp    DWORD PTR [rdi+0x454],0x0
   0x0000000000bc7293 <+149>:   setne  al
   0x0000000000bc7296 <+152>:   ret    
End of assembler dump.

请注意与(this->*g_OpCodes[middle].m_OpHandler)();相对应的+84至+96指令。寄存器rax拥有指向处理函数的指针,但是编译器没有直接调用它,而是添加了这些指令并测试最低有效位,为什么呢?

我在其他函数中找到了其他实例。就像在函数void CFX_Renderer :: render(const Scanline&sl)中 https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp

 (this->*composite_span)(..args..);

对应于:

   0x0000000000c40a6c <+480>:   test   dl,0x1
   0x0000000000c40a6f <+483>:   mov    rax,rdx
   0x0000000000c40a72 <+486>:   je     0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
   0x0000000000c40a74 <+488>:   mov    rax,QWORD PTR [rbx+rdi*1]
   0x0000000000c40a78 <+492>:   mov    rax,QWORD PTR [rdx+rax*1-0x1]
   0x0000000000c40a7d <+497>:   mov    edx,DWORD PTR [r15+0x4]

2 个答案:

答案 0 :(得分:3)

我猜测编译器使用奇数地址表示时髦的函数指针,也许表明通过类vtable的间接访问,或者某些其他映射,也许对于动态加载的代码,其中r8是类指针?

我的最佳猜测是这是指向成员函数的指针,并且可以是简单成员函数或虚函数。如果简单,则为原始地址;如果为虚拟,则为时髦的指针值。

如果“指针”是偶数,则直接使用它,如果它是奇数,它将通过r8(可能是对象实例)和rdi插入,而在将rax用作偏移量之前,我不知道rdi是什么减去1。这将在很大程度上取决于平台的调用约定以及编译器在玩什么技巧。

<section id="home">
  <div id="home-image">
    <img src="img/background.jpg" alt="">
  </div>
  <div id="home-menu">
    <ul>
      <li>HOME</li>
      <li>PAGES</li>
      <li>CONTACT</li>
      <li>ABOUT</li>
    </ul>
  </div>
</section>

答案 1 :(得分:2)

最后一个值用于指定函数指针是否为虚拟指针。

如果函数指针的LSB为1,则表示该函数是虚函数,因此二进制文件应使用虚函数表来调用此函数。否则,二进制文件将函数指针用作函数地址。

您可以从以下代码中看到此行为(大部分来自https://www.geeksforgeeks.org/virtual-function-cpp/)。

// CPP program to illustrate
// concept of Virtual Functions
#include<iostream>
using namespace std;

class base
{
public:
    virtual void print ()
    { cout<< "print base class" <<endl; }

    virtual void wow ()
    { cout<< "print base class" <<endl; }

    void show ()
    { cout<< "show base class" <<endl; }
};

class derived:public base
{
public:
    void print ()
    { cout<< "print derived class" <<endl; }

    void show ()
    { cout<< "show derived class" <<endl; }
};

int main()
{
    base *bptr;
    derived d;
    bptr = &d;

    //virtual function, binded at runtime
    printf("%p\n", &derived::print);

    //virtual function, binded at runtime
    printf("%p\n", &derived::wow);

    // Non-virtual function, binded at compile time
    printf("%p\n", &derived::show);
}

如果运行此程序,您将得到

$ ./run
0x1
0x9
0x5594923dac92

这表明虚拟函数的指针实际上是LSB 1的偏移量 非虚拟函数的指针是函数地址本身。