使用-O3
使用GCC 4.8 / 4.9 / 5.1编译程序时,我无法确定段错误的原因。对于GCC 4.9.x,我在Cygwin,Debian 8(x64)和Fedora 21(x64)上看过它。其他人在GCC 4.8 and 5.1上体验过它。
该程序在-O2
下很好,对其他版本的GCC很好,在其他编译器下很好(如MSVC,ICC和Clang)。
以下是GDB下的崩溃,但没有任何事情在我身上跳出来。来自misc.cpp:26
的源代码如下,但它只是一个简单的XOR:
((word64*)buf)[i] ^= ((word64*)mask)[i];
有问题的代码检查演员的64位字对齐 之前 。从-O3
下的反汇编中,我知道它与vmovdqa
指令有关:
(gdb) disass 0x0000000000539fc3
...
0x0000000000539fbc <+220>: vxorps 0x0(%r13,%r10,1),%ymm0,%ymm0
=> 0x0000000000539fc3 <+227>: vmovdqa %ymm0,0x0(%r13,%r10,1)
0x0000000000539fca <+234>: add $0x20,%r10
看来GCC正在-O3
使用SSE向量,而不是在-O2
使用它们。 (感谢亚历杭德罗提出的建议)。
我会天真地问:vmovdqa
对齐要求是否大于64位字?是这样,为什么GCC在单词不是128位对齐时选择它?
这里导致段错误的原因是什么?我该如何进一步排除故障?
另见Bug 66852 - vmovdqa instructions issued on 64-bit aligned array, causes segfault。它是针对这个问题提出的,所以目前尚未证实。
$ gdb ./cryptest.exe
GNU gdb (Debian 7.7.1+dfsg-5) 7.7.1
...
(gdb) r v
...
Testing MessageDigest algorithm SHA-3-224.
.....
Program received signal SIGSEGV, Segmentation fault.
0x0000000000539fc3 in CryptoPP::xorbuf (buf=0x98549a "efghijde",
mask=mask@entry=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>..., count=count@entry=0x5e) at misc.cpp:26
26 ((word64*)buf)[i] ^= ((word64*)mask)[i];
(gdb) where
#0 0x0000000000539fc3 in CryptoPP::xorbuf (buf=0x98549a "efghijde",
mask=mask@entry=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>..., count=count@entry=0x5e) at misc.cpp:26
#1 0x0000000000561eb0 in CryptoPP::SHA3::Update (this=0x985480,
input=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>...,
length=0x5e) at sha3.cpp:264
#2 0x00000000005bac1a in CryptoPP::HashVerificationFilter::NextPutMultiple (
this=0x7fffffffd390,
inString=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>...,
length=0x5e) at filters.cpp:786
#3 0x00000000005bd8a2 in NextPutMaybeModifiable (modifiable=<optimized out>,
length=0x5e,
inString=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>...,
this=0x7fffffffd390) at filters.h:200
#4 CryptoPP::FilterWithBufferedInput::PutMaybeModifiable (
this=0x7fffffffd390,
inString=0x7fffffffbfeb "efghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 'a' <repeats 106 times>...,
length=<optimized out>, messageEnd=0x0, blocking=<optimized out>,
...
-O3
反汇编和注册值。
(gdb) disass 0x0000000000539fc3
Dump of assembler code for function CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long):
0x0000000000539ee0 <+0>: lea 0x8(%rsp),%r10
0x0000000000539ee5 <+5>: and $0xffffffffffffffe0,%rsp
0x0000000000539ee9 <+9>: mov %rdx,%rax
0x0000000000539eec <+12>: pushq -0x8(%r10)
0x0000000000539ef0 <+16>: push %rbp
0x0000000000539ef1 <+17>: shr $0x3,%rax
0x0000000000539ef5 <+21>: mov %rsp,%rbp
0x0000000000539ef8 <+24>: push %r15
0x0000000000539efa <+26>: push %r14
0x0000000000539efc <+28>: push %r13
0x0000000000539efe <+30>: push %r12
0x0000000000539f00 <+32>: push %r10
0x0000000000539f02 <+34>: push %rbx
0x0000000000539f03 <+35>: je 0x53a00a <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+298>
0x0000000000539f09 <+41>: lea 0x20(%rdi),%rcx
0x0000000000539f0d <+45>: cmp %rcx,%rsi
0x0000000000539f10 <+48>: lea 0x20(%rsi),%rcx
0x0000000000539f14 <+52>: setae %r8b
0x0000000000539f18 <+56>: cmp %rcx,%rdi
0x0000000000539f1b <+59>: setae %cl
0x0000000000539f1e <+62>: or %cl,%r8b
0x0000000000539f21 <+65>: je 0x53a300 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+1056>
0x0000000000539f27 <+71>: cmp $0x8,%rax
0x0000000000539f2b <+75>: jbe 0x53a300 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+1056>
0x0000000000539f31 <+81>: mov %rdi,%rcx
0x0000000000539f34 <+84>: and $0x1f,%ecx
0x0000000000539f37 <+87>: shr $0x3,%rcx
0x0000000000539f3b <+91>: neg %rcx
0x0000000000539f3e <+94>: and $0x3,%ecx
0x0000000000539f41 <+97>: cmp %rax,%rcx
0x0000000000539f44 <+100>: cmova %rax,%rcx
0x0000000000539f48 <+104>: xor %r8d,%r8d
0x0000000000539f4b <+107>: test %rcx,%rcx
0x0000000000539f4e <+110>: je 0x539f80 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+160>
0x0000000000539f50 <+112>: mov (%rsi),%r8
0x0000000000539f53 <+115>: xor %r8,(%rdi)
0x0000000000539f56 <+118>: cmp $0x1,%rcx
0x0000000000539f5a <+122>: je 0x53a371 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+1169>
0x0000000000539f60 <+128>: mov 0x8(%rsi),%r8
0x0000000000539f64 <+132>: xor %r8,0x8(%rdi)
0x0000000000539f68 <+136>: cmp $0x3,%rcx
0x0000000000539f6c <+140>: jne 0x53a366 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+1158>
0x0000000000539f72 <+146>: mov 0x10(%rsi),%r8
0x0000000000539f76 <+150>: xor %r8,0x10(%rdi)
0x0000000000539f7a <+154>: mov $0x3,%r8d
0x0000000000539f80 <+160>: mov %rax,%r11
0x0000000000539f83 <+163>: xor %r10d,%r10d
0x0000000000539f86 <+166>: sub %rcx,%r11
0x0000000000539f89 <+169>: shl $0x3,%rcx
0x0000000000539f8d <+173>: xor %ebx,%ebx
0x0000000000539f8f <+175>: lea -0x4(%r11),%r9
0x0000000000539f93 <+179>: lea (%rdi,%rcx,1),%r13
0x0000000000539f97 <+183>: shr $0x2,%r9
0x0000000000539f9b <+187>: add %rsi,%rcx
0x0000000000539f9e <+190>: add $0x1,%r9
0x0000000000539fa2 <+194>: lea 0x0(,%r9,4),%r12
0x0000000000539faa <+202>: add $0x1,%rbx
0x0000000000539fae <+206>: vmovdqu (%rcx,%r10,1),%xmm0
0x0000000000539fb4 <+212>: vinsertf128 $0x1,0x10(%rcx,%r10,1),%ymm0,%ymm0
0x0000000000539fbc <+220>: vxorps 0x0(%r13,%r10,1),%ymm0,%ymm0
=> 0x0000000000539fc3 <+227>: vmovdqa %ymm0,0x0(%r13,%r10,1)
0x0000000000539fca <+234>: add $0x20,%r10
0x0000000000539fce <+238>: cmp %r9,%rbx
0x0000000000539fd1 <+241>: jb 0x539faa <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+202>
0x0000000000539fd3 <+243>: lea (%r8,%r12,1),%rcx
0x0000000000539fd7 <+247>: cmp %r12,%r11
0x0000000000539fda <+250>: je 0x53a006 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+294>
0x0000000000539fdc <+252>: mov (%rsi,%rcx,8),%r8
0x0000000000539fe0 <+256>: xor %r8,(%rdi,%rcx,8)
0x0000000000539fe4 <+260>: lea 0x1(%rcx),%r8
0x0000000000539fe8 <+264>: cmp %r8,%rax
0x0000000000539feb <+267>: jbe 0x53a006 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+294>
0x0000000000539fed <+269>: add $0x2,%rcx
0x0000000000539ff1 <+273>: mov (%rsi,%r8,8),%r9
0x0000000000539ff5 <+277>: xor %r9,(%rdi,%r8,8)
0x0000000000539ff9 <+281>: cmp %rcx,%rax
0x0000000000539ffc <+284>: jbe 0x53a006 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+294>
0x0000000000539ffe <+286>: mov (%rsi,%rcx,8),%r8
0x000000000053a002 <+290>: xor %r8,(%rdi,%rcx,8)
0x000000000053a006 <+294>: shl $0x3,%rax
和
(gdb) info r ymm0 r13 r10
ymm0 {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
v4_double = {0x8000000000000000, 0x8000000000000000, 0x8000000000000000,
0x8000000000000000}, v32_int8 = {0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x66, 0x67, 0x68, 0x69, 0x6a,
0x6b, 0x6c, 0x6d, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x68,
0x69}, v16_int16 = {0x6766, 0x6968, 0x6b6a, 0x6665, 0x6867, 0x6a69,
0x6c6b, 0x6766, 0x6968, 0x6b6a, 0x6d6c, 0x6867, 0x6a69, 0x6c6b, 0x6e6d,
0x6968}, v8_int32 = {0x69686766, 0x66656b6a, 0x6a696867, 0x67666c6b,
0x6b6a6968, 0x68676d6c, 0x6c6b6a69, 0x69686e6d}, v4_int64 = {
0x66656b6a69686766, 0x67666c6b6a696867, 0x68676d6c6b6a6968,
0x69686e6d6c6b6a69}, v2_int128 = {0x67666c6b6a69686766656b6a69686766,
0x69686e6d6c6b6a6968676d6c6b6a6968}}
r13 0x9854a2 0x9854a2
r10 0x0 0x0
使用-O2
和相关行上的断点编译时,这里是反汇编。 ((word64*)buf)[i] ^= ((word64*)mask)[i];
移至第31行:
Breakpoint 1, CryptoPP::xorbuf (buf=0x985488 "",
mask=mask@entry=0x7fffffffc01d "The quick brown fox", 'a' <repeats 181 times>..., count=count@entry=0x13) at misc.cpp:31
31 ((word64*)buf)[i] ^= ((word64*)mask)[i];
(gdb) disass
Dump of assembler code for function CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long):
0x0000000000532150 <+0>: mov %rdx,%rcx
0x0000000000532153 <+3>: shr $0x3,%rcx
0x0000000000532157 <+7>: je 0x532170 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+32>
0x0000000000532159 <+9>: xor %eax,%eax
=> 0x000000000053215b <+11>: mov (%rsi,%rax,8),%r8
0x000000000053215f <+15>: xor %r8,(%rdi,%rax,8)
0x0000000000532163 <+19>: add $0x1,%rax
0x0000000000532167 <+23>: cmp %rcx,%rax
0x000000000053216a <+26>: jne 0x53215b <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+11>
0x000000000053216c <+28>: shl $0x3,%rcx
0x0000000000532170 <+32>: sub %rcx,%rdx
0x0000000000532173 <+35>: je 0x5321d0 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+128>
0x0000000000532175 <+37>: mov %rdx,%r8
0x0000000000532178 <+40>: add %rcx,%rdi
0x000000000053217b <+43>: add %rcx,%rsi
0x000000000053217e <+46>: shr $0x2,%r8
0x0000000000532182 <+50>: je 0x5321a8 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+88>
0x0000000000532184 <+52>: xor %eax,%eax
0x0000000000532186 <+54>: nopw %cs:0x0(%rax,%rax,1)
0x0000000000532190 <+64>: mov (%rsi,%rax,4),%ecx
0x0000000000532193 <+67>: xor %ecx,(%rdi,%rax,4)
0x0000000000532196 <+70>: add $0x1,%rax
0x000000000053219a <+74>: cmp %r8,%rax
0x000000000053219d <+77>: jne 0x532190 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+64>
0x000000000053219f <+79>: shl $0x2,%r8
0x00000000005321a3 <+83>: sub %r8,%rdx
0x00000000005321a6 <+86>: je 0x5321d8 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+136>
0x00000000005321a8 <+88>: lea (%rdi,%r8,1),%rcx
0x00000000005321ac <+92>: xor %eax,%eax
0x00000000005321ae <+94>: lea (%rsi,%r8,1),%rdi
0x00000000005321b2 <+98>: nopw 0x0(%rax,%rax,1)
0x00000000005321b8 <+104>: movzbl (%rdi,%rax,1),%esi
0x00000000005321bc <+108>: xor %sil,(%rcx,%rax,1)
0x00000000005321c0 <+112>: add $0x1,%rax
0x00000000005321c4 <+116>: cmp %rdx,%rax
0x00000000005321c7 <+119>: jb 0x5321b8 <CryptoPP::xorbuf(unsigned char*, unsigned char const*, unsigned long)+104>
0x00000000005321c9 <+121>: retq
0x00000000005321ca <+122>: nopw 0x0(%rax,%rax,1)
0x00000000005321d0 <+128>: retq
0x00000000005321d1 <+129>: nopl 0x0(%rax)
0x00000000005321d8 <+136>: retq
End of assembler dump.
从misc.cpp
开始,第26行为((word64*)buf)[i] ^= ((word64*)mask)[i];
。
void xorbuf(byte *buf, const byte *mask, size_t count)
{
size_t i;
if (IsAligned<word32>(buf) && IsAligned<word32>(mask))
{
if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask))
{
for (i=0; i<count/8; i++)
((word64*)buf)[i] ^= ((word64*)mask)[i];
count -= 8*i;
if (!count)
return;
buf += 8*i;
mask += 8*i;
}
for (i=0; i<count/4; i++)
((word32*)buf)[i] ^= ((word32*)mask)[i];
count -= 4*i;
if (!count)
return;
buf += 4*i;
mask += 4*i;
}
for (i=0; i<count; i++)
buf[i] ^= mask[i];
}
答案 0 :(得分:4)
您可以使用g++ -Wall -Wextra -O3 -g
进行编译;您想要启用警告,因为其中一些警告可能仅在-O3
通过-g
通行证中生成;您希望启用调试信息(gdb
)以使用restrict
,但请注意,通过强大的优化,调试信息并不总是可靠的。
您可能会遇到一些GCC问题。也许使用(或删除)-fsanitize=
关键字。
请务必避免pointer aliasing。您可以将-fsanitize=address
选项(特别是-fsanitize=undefined
和g++
....)用于-fdump-tree-all
编译器(最好是版本5)也可以使用undefined behavior。
顺便说一句,您可以使用g++
之类的转储选项(警告,它们可以生成数百个文件!)来更好地了解g++ -Wall -S -O3 -fverbose-asm
的内部行为;您甚至可以使用valgrind自定义GCC编译器。
另外,如果查看生成的汇编程序,请使用-fverbose-asm
进行编译,因为{{1}}要求GCC发出一些汇编程序注释&#34;解释&#34; (不多,但一点点)编译的代码。