用布尔逻辑 - 执行时间相同替换IF语句(随机条件)?

时间:2014-06-07 14:34:55

标签: c++ performance optimization branch cpu

(设置:Win 7 64,MSVC,第3代Core i7,64位补码,-O2启用)

下面的代码有三个函数 - 一个有一个IF语句,它根据条件是否满足而执行不同的代码。我用一些布尔逻辑替换了这个IF语句。然而,时间是相同的....我期待缺乏分支预测来产生更快的代码:

#include <iostream>

unsigned long long iterations = 1000000000;

void test1(){
    volatile int c = 0;

    for(int i=0; i<iterations; i++){
        bool condition = __rdtsc() % 2 == 0;
        if(condition){
            c = 4;
        }
        else{
            c = 5;
        }
    }
}

void test2(){
    volatile int c = 0;

    for(int i=0; i<iterations; i++){
        bool condition = __rdtsc() % 2 == 0;
        c = (4 * condition) + (5 * !condition);
    }
}

int main(){
    unsigned long long s = 0;
    unsigned long long f = 0;
    unsigned long long s2 = 0;
    unsigned long long f2 = 0;
    unsigned int x = 0;
    unsigned int y = 0;

    start = __rdtscp(&x);
    test1();
    finish = __rdtscp(&y);

    start2 = __rdtscp(&x);
    test2();
    finish2 = __rdtscp(&y);

    std::cout << "1: " << f - s<< std::endl;
    std::cout << "2: " << f2- s2<< std::endl;
}

更新asm:

int main(){
 push        rbp  
 push        rsi  
 push        rdi  
 push        r14  
 sub         rsp,20h  
    unsigned long long start = 0;
    unsigned long long finish = 0;
    unsigned long long start2 = 0;
    unsigned long long finish2 = 0;
    unsigned long long start3 = 0;
    unsigned long long finish3 = 0;
    unsigned int x = 0;
 xor         r8d,r8d  
 mov         dword ptr [x],r8d  
    unsigned int y = 0;
 mov         dword ptr [y],r8d  

    start = __rdtscp(&x);
 rdtscp  
 lea         r9,[x]  
 shl         rdx,20h  
 mov         dword ptr [r9],ecx  
 or          rax,rdx  
    test1();
 mov         dword ptr [rsp+60h],r8d  
 mov         ecx,r8d  

    start = __rdtscp(&x);
 mov         r10,rax  
 nop         word ptr [rax+rax]  
    test1();
 rdtsc  
 shl         rdx,20h  
 or          rax,rdx  
 xor         al,0FFh  
 and         al,1  
 neg         al  
 sbb         eax,eax  
 inc         ecx  
 add         eax,5  
 mov         dword ptr [rsp+60h],eax  
 movsxd      rax,ecx  
 cmp         rax,3E8h  
    test1();
 jb          main+40h (013FFE1280h)  
    finish = __rdtscp(&y);
 rdtscp  
 lea         r9,[y]  
 shl         rdx,20h  
 or          rax,rdx  
 mov         dword ptr [r9],ecx  
 mov         rbp,rax  

    start2 = __rdtscp(&x);
 rdtscp  
 lea         r9,[x]  
 shl         rdx,20h  
 mov         dword ptr [r9],ecx  
 or          rax,rdx  
    test2();
 mov         dword ptr [rsp+60h],r8d  
 mov         r9d,r8d  

    start2 = __rdtscp(&x);
 mov         r14,rax  
 nop         word ptr [rax+rax]  
    test2();
 rdtsc  
 shl         rdx,20h  
 inc         r9d  
 or          rax,rdx  
 xor         al,0FFh  
 and         al,1  
    test2();
 movzx       ecx,al  
 lea         eax,[rcx+rcx*8]  
 mov         dword ptr [rsp+60h],eax  
 movsxd      rax,r9d  
 cmp         rax,3E8h  
 jb          main+0A0h (013FFE12E0h)  
    finish2 = __rdtscp(&y);

1 个答案:

答案 0 :(得分:2)

生成的代码不包含任何函数的任何内部分支,这就是没有错误预测惩罚的原因。

在第一个中,它将布尔值转换为零或-1(大约sbb eax,eax)并将其添加到5.这是使用布尔值时非常标准的优化。

在第二个中,它乘以九(rcx+rcx*8),因为5 * condition不是5 * !condition