Clang优化打破代码?

时间:2015-12-24 12:30:48

标签: c optimization clang compiler-optimization

我试图准备最不可行的代码示例:

#include <stdio.h>
#include <inttypes.h>
#include <string.h>

typedef struct FECPUFlags {
    uint16_t CF:1; // carry flag
    uint16_t PF:1; // parity flag
    uint16_t AF:1; // adjust flag
    uint16_t ZF:1; // zero flag
    uint16_t SF:1; // sign flag
    uint16_t TF:1; // trap flag
    uint16_t IF:1; // interrupt enable flag
    uint16_t DF:1; // direction flag
    uint16_t OF:1; // overflow flag
} FECPUFlags;

uint16_t fe_cpuflags_16(FECPUFlags cpuRegister) {
    uint16_t result = 0;

    result = result | cpuRegister.CF;
    result = result | (cpuRegister.PF << 2);
    result = result | (cpuRegister.AF << 4);
    result = result | (cpuRegister.ZF << 6);
    result = result | (cpuRegister.SF << 7);
    result = result | (cpuRegister.DF << 10);
    result = result | (cpuRegister.OF << 11);

    return result;
}

int main() {
    FECPUFlags flag;
#define ENUM(F) for(int i=0;i<=1;i++,flag.F=i)

    ENUM(CF) ENUM(PF) ENUM(AF) ENUM(ZF) ENUM(SF) ENUM(DF) ENUM(OF) {
        printf("0x%X\n",fe_cpuflags_16(flag));
    }

    return 0;
}

clang版

Apple LLVM version 7.0.2 (clang-700.1.81)
Target: x86_64-apple-darwin15.0.0

应用程序的输出因不同的优化模式而异。 clang -O0 vs clang -O3。

这是优化版本

的弊端
_fe_cpuflags_16:
0000000100000d70        pushq   %rbp
0000000100000d71        movq    %rsp, %rbp
0000000100000d74        movl    %edi, %eax
0000000100000d76        andl    $0x1, %eax
0000000100000d79        leal    (%rdi,%rdi), %ecx
0000000100000d7c        andl    $0x4, %ecx
0000000100000d7f        orl     %eax, %ecx
0000000100000d81        leal    (,%rdi,4), %eax
0000000100000d88        andl    $0x10, %eax
0000000100000d8b        orl     %ecx, %eax
0000000100000d8d        shll    $0x3, %edi
0000000100000d90        movl    %edi, %ecx
0000000100000d92        andl    $0x40, %ecx
0000000100000d95        orl     %eax, %ecx
0000000100000d97        movl    %edi, %eax
0000000100000d99        andl    $0x80, %eax
0000000100000d9e        orl     %ecx, %eax
0000000100000da0        movl    %edi, %ecx
0000000100000da2        andl    $0x400, %ecx            ## imm = 0x400
0000000100000da8        orl     %eax, %ecx
0000000100000daa        andl    $0x800, %edi            ## imm = 0x800
0000000100000db0        orl     %ecx, %edi
0000000100000db2        movzwl  %di, %eax
0000000100000db5        popq    %rbp
0000000100000db6        retq

有趣的是,如果我在枚举之前将memset置于零 - 代码工作正常。

哪种优化可能会破坏此代码?或者这段代码可能已经以某种方式破坏了?

2 个答案:

答案 0 :(得分:4)

您正在使用FECPUFlags flag;而未初始化它。

这是undefined behavior。任何事情都可能发生。

答案 1 :(得分:3)

您有一个未初始化的变量,稍后会被访问,因此您有未定义的行为。

FECPUFlags flag;

(某些)标志的初始化仅在循环的第二次执行中发生:

#define ENUM(F) for(int i=0;i<=1;i++,flag.F=i)
                                     ^^^