为什么优化器会删除我的代码?

时间:2015-02-15 13:44:58

标签: c++ optimization

今天我偶然发现了一个奇怪的问题。考虑这个简单的程序,我尝试模仿MMX's PADDW instruction

#include <cstdint>
#include <cstdio>

int main()
{
    uint64_t a = 0;
    uint64_t b = 0x1234123412341234;

    uint64_t c = 0;
    uint16_t *a_words = reinterpret_cast<uint16_t*>(&a);
    uint16_t *b_words = reinterpret_cast<uint16_t*>(&b);
    uint16_t *c_words = reinterpret_cast<uint16_t*>(&c);

    for (size_t i = 0; i < 4; i ++)
        c_words[i] = a_words[i] + b_words[i];

    printf("%d %d %d %d\n", a_words[0], a_words[1], a_words[2], a_words[3]);
    printf("%d %d %d %d\n", b_words[0], b_words[1], b_words[2], b_words[3]);
    printf("%d %d %d %d\n", c_words[0], c_words[1], c_words[2], c_words[3]);
    printf("%016llx\n", c);
    return 0;
}

对此进行编译并使用g++ -std=c++11 test.cpp -o test && ./test运行会产生以下结果:

0 0 0 0
4660 4660 4660 4660
4660 4660 4660 4660
1234123412341234

但是,如果我启用-O2,则会显示错误的值(-O1仍然有效):

0 0 0 0
4660 4660 4660 4660
4660 4660 4660 4660
0000000000000000

为什么?


其他观察:

  1. 如果我展开循环,使用-O2进行编译(!!):

    #include <cstdint>
    #include <cstdio>
    
    int main()
    {
        uint64_t a = 0;
        uint64_t b = 0x1234123412341234;
    
        uint64_t c = 0;
        uint16_t *a_words = reinterpret_cast<uint16_t*>(&a);
        uint16_t *b_words = reinterpret_cast<uint16_t*>(&b);
        uint16_t *c_words = reinterpret_cast<uint16_t*>(&c);
    
        c_words[0] = a_words[0] + b_words[0];
        c_words[1] = a_words[1] + b_words[1];
        c_words[2] = a_words[2] + b_words[2];
        c_words[3] = a_words[3] + b_words[3];
    
        printf("%d %d %d %d\n", a_words[0], a_words[1], a_words[2], a_words[3]);
        printf("%d %d %d %d\n", b_words[0], b_words[1], b_words[2], b_words[3]);
        printf("%d %d %d %d\n", c_words[0], c_words[1], c_words[2], c_words[3]);
        printf("%016llx\n", c);
        return 0;
    }
    
  2. 如果我处理非常类似的问题但是对于32位整数而不是64位整数,它也可以工作:

    #include <cstdint>
    #include <cstdio>
    
    int main()
    {
        uint32_t a = 0;
        uint32_t b = 0x12121212;
    
        uint32_t c = 0;
        uint8_t *a_words = reinterpret_cast<uint8_t*>(&a);
        uint8_t *b_words = reinterpret_cast<uint8_t*>(&b);
        uint8_t *c_words = reinterpret_cast<uint8_t*>(&c);
    
        for (size_t i = 0; i < 4; i ++)
            c_words[i] = a_words[i] + b_words[i];
    
        printf("%d %d %d %d\n", a_words[0], a_words[1], a_words[2], a_words[3]);
        printf("%d %d %d %d\n", b_words[0], b_words[1], b_words[2], b_words[3]);
        printf("%d %d %d %d\n", c_words[0], c_words[1], c_words[2], c_words[3]);
        printf("%08x\n", c);
        return 0;
    }
    
  3. 32位和64位计算机上都出现问题。在Cygwin上尝试g++ (GCC) 4.9.2,在GNU / Linux上尝试g++ (Debian 4.9.1-19) 4.9.1

1 个答案:

答案 0 :(得分:4)

这是严格的别名违规。您将类型A的值写入存储器,该存储器存储类型B的对象.C ++标准表示您不能这样做(此规则的例外是char及其unsignedsigned变体)

这是非便携式代码,但是,如果您仍然想要合法地执行此操作,您可以采取哪些措施?

  • uint64_t复制到uint16_t数组(memcpystd::copy),修改值,然后将其复制回来。
  • 或者使用直接转换为矢量化指令的编译器内插
  • 或禁用严格别名。