Question

这是我的示例代码：

struct AAA {
    union{
        struct{
            int a;
            int b; 
        };
        long A;
    };

    union{
        struct{
            short c;
            char d;
            char e;
        };
        int B;
    };
} __attribute__((packed));

void fun1(struct AAA *aaa){
    aaa->a = 1;
    aaa->b = 2;
    aaa->c = 3;
    aaa->d = 4;
    aaa->e = 5;
}

void fun2(struct AAA *aaa){
    aaa->A = (2L<<32)+1;
    aaa->B = (5 << 24) + (4<<16) + 3;
}

当我使用gcc 5.4.0将其编译为asm代码时，我得到了：

fun1:
.LFB0:
        .cfi_startproc
        movl    $3, %eax
        movl    $1, (%rdi)
        movl    $2, 4(%rdi)
        movw    %ax, 8(%rdi)
        movb    $4, 10(%rdi)
        movb    $5, 11(%rdi)
        ret
        .cfi_endproc
.LFE0:
        .size   fun1, .-fun1
        .section        .text.unlikely
.LCOLDE0:
        .text
.LHOTE0:
        .section        .text.unlikely
.LCOLDB1:
        .text
.LHOTB1:
        .p2align 4,,15
        .globl  fun2
        .type   fun2, @function
fun2:
.LFB1:
        .cfi_startproc
        movabsq $8589934593, %rax
        movl    $84148227, 8(%rdi)
        movq    %rax, (%rdi)
        ret
        .cfi_endproc

当我用gcc 7.3.0编译时，我得到了

fun1:
.LFB0:
        .cfi_startproc
        movabsq $8589934593, %rax
        movl    $84148227, 8(%rdi)
        movq    %rax, (%rdi)
        ret
        .cfi_endproc
.LFE0:
        .size   fun1, .-fun1
        .p2align 4,,15
        .globl  fun2
        .type   fun2, @function
fun2:
.LFB1:
        .cfi_startproc
        movabsq $8589934593, %rax
        movl    $84148227, 8(%rdi)
        movq    %rax, (%rdi)
        ret
        .cfi_endproc

两者都使用-O3选项。区别是显而易见的。较新版本的gcc像fun1一样优化了fun2。

用gcc 5.4.0生成时fun2真的比fun1快吗？

我有一些旧项目，它们甚至使用旧版本的gcc（4.x）进行编译，并且我发现了许多类似示例的代码。如果要进行优化，将fun1更改为fun2是个好主意吗？我暂时无法更新gcc。

Answer 1

假设这些程序在现代CPU架构上运行，则差异将以纳秒为单位。

除非您的代码主要由这些任务组成，并且您确实需要压缩一点点性能，否则我将使其与rows ---------------------- [{"a": false, "b": false, "c": false, "d":false, "e":false}]相同，以提高可读性和可维护性。

gcc优化结构分配

1 个答案: