(设置:Win 7 64,MSVC,第3代Core i7,64位补码,-O2启用)
下面的代码有三个函数 - 一个有一个IF语句,它根据条件是否满足而执行不同的代码。我用一些布尔逻辑替换了这个IF语句。然而,时间是相同的....我期待缺乏分支预测来产生更快的代码:
#include <iostream>
unsigned long long iterations = 1000000000;
void test1(){
volatile int c = 0;
for(int i=0; i<iterations; i++){
bool condition = __rdtsc() % 2 == 0;
if(condition){
c = 4;
}
else{
c = 5;
}
}
}
void test2(){
volatile int c = 0;
for(int i=0; i<iterations; i++){
bool condition = __rdtsc() % 2 == 0;
c = (4 * condition) + (5 * !condition);
}
}
int main(){
unsigned long long s = 0;
unsigned long long f = 0;
unsigned long long s2 = 0;
unsigned long long f2 = 0;
unsigned int x = 0;
unsigned int y = 0;
start = __rdtscp(&x);
test1();
finish = __rdtscp(&y);
start2 = __rdtscp(&x);
test2();
finish2 = __rdtscp(&y);
std::cout << "1: " << f - s<< std::endl;
std::cout << "2: " << f2- s2<< std::endl;
}
更新asm:
int main(){
push rbp
push rsi
push rdi
push r14
sub rsp,20h
unsigned long long start = 0;
unsigned long long finish = 0;
unsigned long long start2 = 0;
unsigned long long finish2 = 0;
unsigned long long start3 = 0;
unsigned long long finish3 = 0;
unsigned int x = 0;
xor r8d,r8d
mov dword ptr [x],r8d
unsigned int y = 0;
mov dword ptr [y],r8d
start = __rdtscp(&x);
rdtscp
lea r9,[x]
shl rdx,20h
mov dword ptr [r9],ecx
or rax,rdx
test1();
mov dword ptr [rsp+60h],r8d
mov ecx,r8d
start = __rdtscp(&x);
mov r10,rax
nop word ptr [rax+rax]
test1();
rdtsc
shl rdx,20h
or rax,rdx
xor al,0FFh
and al,1
neg al
sbb eax,eax
inc ecx
add eax,5
mov dword ptr [rsp+60h],eax
movsxd rax,ecx
cmp rax,3E8h
test1();
jb main+40h (013FFE1280h)
finish = __rdtscp(&y);
rdtscp
lea r9,[y]
shl rdx,20h
or rax,rdx
mov dword ptr [r9],ecx
mov rbp,rax
start2 = __rdtscp(&x);
rdtscp
lea r9,[x]
shl rdx,20h
mov dword ptr [r9],ecx
or rax,rdx
test2();
mov dword ptr [rsp+60h],r8d
mov r9d,r8d
start2 = __rdtscp(&x);
mov r14,rax
nop word ptr [rax+rax]
test2();
rdtsc
shl rdx,20h
inc r9d
or rax,rdx
xor al,0FFh
and al,1
test2();
movzx ecx,al
lea eax,[rcx+rcx*8]
mov dword ptr [rsp+60h],eax
movsxd rax,r9d
cmp rax,3E8h
jb main+0A0h (013FFE12E0h)
finish2 = __rdtscp(&y);
答案 0 :(得分:2)
生成的代码不包含任何函数的任何内部分支,这就是没有错误预测惩罚的原因。
在第一个中,它将布尔值转换为零或-1(大约sbb eax,eax
)并将其添加到5.这是使用布尔值时非常标准的优化。
在第二个中,它乘以九(rcx+rcx*8
),因为5 * condition
不是5 * !condition
。