我目前的尝试:
/**simplified from
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
**/
#include <stdio.h>
#include <stdint.h>
#if defined(_MSC_VER)
# include <intrin.h>
#endif
void get_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd){
#if defined(_MSC_VER)
__cpuidex(abcd,eax,ecx);
#else
uint32_t ebx,edx;
#if defined( __i386__ ) && defined ( __PIC__ )
/*in case of PIC, under 32-bit EBX cannot be clobbered*/
__asm__( "movl %%ebx, %%edi \n\t xchgl %%ebx, %%edi" : "=D"(ebx),
#else
__asm__( "cpuid" : "+b"(ebx),
#endif
"+a"(eax), "+c"(ecx), "=d"(edx));
abcd[0]=eax;abcd[1]=ebx;abcd[2]=ecx;abcd[3]=edx;
#endif
}
int has_RTM_support(){
uint32_t abcd[4];
/*processor supports RTM execution if CPUID.07H.EBX.RTM [bit 11] = 1*/
get_cpuid(0x7,0x0,abcd);
return (abcd[1] & (1 << 11)) != 0;
}
int main(int argc, char **argv){
if(has_RTM_support()){
printf("This CPU supports RTM.");
}else{
printf("This CPU does NOT support RTM.");
}
return 0;
}
我有一个Intel® Core™ i7-7600U(下面是cpuinfo),从方舟中可以看出,它应该支持TSX-NI。
仍然,上面的检查将返回
此CPU不支持RTM。
来自tsx-tools的has_tsx
实施同意:
RTM:否
HLE:没有
但与此同时,我可以很好地执行此代码段......
#include <stdio.h>
int main()
{
volatile int i = 0;
while (i < 100000000) {
__asm__ ("xbegin ABORT");
i++;
__asm__ ("xend");
__asm__ ("ABORT:");
}
printf("%d\n", i);
return 0;
}
我的理解是,这些asm指令&#34;会在不支持RTM&#34;的处理器上使用时产生#UD异常,或者至少是{{{ 3}}在这件事上说(第387页)。
我也检查了asm代码,这些说明仍然存在(请参阅下面的.s文件内容)。
因此,由于这些指令似乎已被执行,这些检查是否完全错误?
如果是这样,您将如何正确测试RTM支持?
代码段的ASM代码
.file "rtm_simple.c"
# GNU C11 (Ubuntu 6.3.0-12ubuntu2) version 6.3.0 20170406 (x86_64-linux-gnu)
# compiled by GNU C version 6.3.0 20170406, GMP version 6.1.2, MPFR version 3.1.5, MPC version 1.0.3, isl version 0.15
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -imultiarch x86_64-linux-gnu rtm_simple.c -mtune=generic
# -march=x86-64 -fverbose-asm -fstack-protector-strong -Wformat
# -Wformat-security
# options enabled: -fPIC -fPIE -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type
# -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls
# -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds
# -fchkp-use-static-bounds -fchkp-use-static-const-bounds
# -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique -fident
# -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -flifetime-dse -flto-odr-type-merging -fmath-errno
# -fmerge-debug-strings -fpeephole -fplt -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
# -fsemantic-interposition -fshow-column -fsigned-zeros
# -fsplit-ivs-in-unroller -fssa-backprop -fstack-protector-strong
# -fstdarg-opt -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time
# -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -malign-stringops
# -mavx256-split-unaligned-load -mavx256-split-unaligned-store
# -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp
# -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2
# -mstv -mtls-direct-seg-refs -mvzeroupper
.section .rodata
.LC0:
.string "%d\n"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $16, %rsp #,
movl $0, -4(%rbp) #, i
jmp .L2 #
.L3:
#APP
# 7 "rtm_simple.c" 1
xbegin ABORT
# 0 "" 2
#NO_APP
movl -4(%rbp), %eax # i, i.0_5
addl $1, %eax #, i.1_6
movl %eax, -4(%rbp) # i.1_6, i
#APP
# 9 "rtm_simple.c" 1
xend
# 0 "" 2
# 10 "rtm_simple.c" 1
ABORT:
# 0 "" 2
#NO_APP
.L2:
movl -4(%rbp), %eax # i, i.2_4
cmpl $99999999, %eax #, i.2_4
jle .L3 #,
movl -4(%rbp), %eax # i, i.3_8
movl %eax, %esi # i.3_8,
leaq .LC0(%rip), %rdi #,
movl $0, %eax #,
call printf@PLT #
movl $0, %eax #, _10
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"
.section .note.GNU-stack,"",@progbits
cpuinfo中
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 142
model name : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping : 9
cpu MHz : 2904.004
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 0
cpu cores : 2
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 22
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs :
bogomips : 5808.00
clflush size : 64
cache_alignment : 64
address sizes : 39 bits physical, 48 bits virtual
power management:
processor : 1
vendor_id : GenuineIntel
cpu family : 6
model : 142
model name : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping : 9
cpu MHz : 2904.004
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 1
cpu cores : 2
apicid : 1
initial apicid : 1
fpu : yes
fpu_exception : yes
cpuid level : 22
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs :
bogomips : 5808.00
clflush size : 64
cache_alignment : 64
address sizes : 39 bits physical, 48 bits virtual
power management: