我想旋转一个原子,使其变为某个值,然后获取与商店存储memory_order_release
在一起的内存。
我认为通过优化
#include <stdatomic.h>
void wait_for_flag_to_become2(_Atomic unsigned char *xp)
{
while(2!=atomic_load_explicit(xp,memory_order_acquire));
}
void wait_for_flag_to_become2_(_Atomic unsigned char *xp)
{
while(2!=atomic_load_explicit(xp,memory_order_relaxed)){}
atomic_thread_fence(memory_order_acquire);
}
将生成相同的代码,并且该代码将类似于后者的功能,但是在输出程序集不同的平台上,它们不会。
以上两个函数在语义上是否相等,并且是由于简单的优化失败而导致输出程序集的差异,还是实际上在语义上不相等?
https://gcc.godbolt.org/z/hNcf-i
power64-at12,gcc8:
wait_for_flag_to_become2:
.quad .L.wait_for_flag_to_become2,.TOC.@tocbase,0
.L.wait_for_flag_to_become2:
.L2:
lbz 9,0(3)
cmpw 7,9,9
bne- 7,$+4
isync
rlwinm 9,9,0,0xff
cmplwi 7,9,2
bne 7,.L2
blr
.long 0
.byte 0,0,0,0,0,0,0,0
wait_for_flag_to_become2_:
.quad .L.wait_for_flag_to_become2_,.TOC.@tocbase,0
.L.wait_for_flag_to_become2_:
.L6:
lbz 9,0(3)
cmplwi 7,9,2
bne 7,.L6
lwsync
blr
.long 0
.byte 0,0,0,0,0,0,0,0
ARM64,gcc 8.2:
wait_for_flag_to_become2:
.L2:
ldarb w1, [x0]
and w1, w1, 255
cmp w1, 2
bne .L2
ret
wait_for_flag_to_become2_:
.L5:
ldrb w1, [x0]
and w1, w1, 255
cmp w1, 2
bne .L5
dmb ishld
ret