我有一个程序,需要将许多字符串连接在一起(更精确的整数转换为字符串)。在我的Ubuntu计算机(运行g++
7.3.0)上,代码运行了1.5秒。但是代码也必须在Windows上运行(使用MinGW运行g++
6.3.0),在此过程中需要15秒才能完成。此外,Ubuntu安装程序在使用2.30GHz i7-4712MQ CPU的笔记本电脑上运行速度要慢得多,而Windows机器在4.20GHz i7-7700K CPU上运行。
再现时间的代码如下所示。我使用g++ tester.cpp -O2 -o tester
(或Windows的tester.exe
)编译代码
#include <iostream>
#include <chrono>
int main(int argc, char const *argv[]) {
auto started = std::chrono::high_resolution_clock::now();
std::string str = "";
const int n = 10000000;
str.reserve(2 * n);
int a = 1;
for (int i = 0; i < n; ++i) {
str += std::to_string(a) + " ";
}
auto done = std::chrono::high_resolution_clock::now();
double secs = (double) std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() / 1000;
std::cout << "Done in " << secs << "\n";
return 0;
}
任何想法可能会导致巨大的性能差距吗?
反汇编如下:
.file "tester.cpp"
.text
.align 2
.p2align 4,,15
.type _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, @function
_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19:
.LFB2389:
.cfi_startproc
pushq %r12
.cfi_def_cfa_offset 16
.cfi_offset 12, -16
pushq %rbp
.cfi_def_cfa_offset 24
.cfi_offset 6, -24
movq %rsi, %r12
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset 3, -32
movq %rdx, %rbx
movq %rdi, %rbp
subq %rsi, %rbx
subq $16, %rsp
.cfi_def_cfa_offset 48
movq %fs:40, %rax
movq %rax, 8(%rsp)
xorl %eax, %eax
cmpq $15, %rbx
movq %rbx, (%rsp)
ja .L12
movq (%rdi), %rdx
cmpq $1, %rbx
movq %rdx, %rax
jne .L4
movzbl (%rsi), %eax
movb %al, (%rdx)
movq (%rdi), %rdx
.L5:
movq (%rsp), %rax
movq %rax, 8(%rbp)
movb $0, (%rdx,%rax)
movq 8(%rsp), %rax
xorq %fs:40, %rax
jne .L13
addq $16, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 32
popq %rbx
.cfi_def_cfa_offset 24
popq %rbp
.cfi_def_cfa_offset 16
popq %r12
.cfi_def_cfa_offset 8
ret
.L12:
.cfi_restore_state
xorl %edx, %edx
movq %rsp, %rsi
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm@PLT
movq (%rsp), %rdx
movq %rax, 0(%rbp)
movq %rdx, 16(%rbp)
.L3:
movq %rbx, %rdx
movq %r12, %rsi
movq %rax, %rdi
call memcpy@PLT
movq 0(%rbp), %rdx
jmp .L5
.L4:
testq %rbx, %rbx
je .L5
jmp .L3
.L13:
call __stack_chk_fail@PLT
.cfi_endproc
.LFE2389:
.size _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, .-_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
.set _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23,_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
.section .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,"axG",@progbits,_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,comdat
.p2align 4,,15
.weak _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
.type _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, @function
_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z:
.LFB1953:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsi, %r10
movq %rdx, %rsi
movq %rcx, %rdx
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %r12
pushq %rbx
.cfi_offset 12, -24
.cfi_offset 3, -32
movq %rdi, %r12
subq $208, %rsp
testb %al, %al
movq %r8, -160(%rbp)
movq %r9, -152(%rbp)
je .L15
movaps %xmm0, -144(%rbp)
movaps %xmm1, -128(%rbp)
movaps %xmm2, -112(%rbp)
movaps %xmm3, -96(%rbp)
movaps %xmm4, -80(%rbp)
movaps %xmm5, -64(%rbp)
movaps %xmm6, -48(%rbp)
movaps %xmm7, -32(%rbp)
.L15:
movq %fs:40, %rax
movq %rax, -200(%rbp)
xorl %eax, %eax
leaq 30(%rsi), %rax
leaq -224(%rbp), %rcx
andq $-16, %rax
movl $32, -224(%rbp)
movl $48, -220(%rbp)
subq %rax, %rsp
leaq 16(%rbp), %rax
leaq 15(%rsp), %rbx
movq %rax, -216(%rbp)
leaq -192(%rbp), %rax
andq $-16, %rbx
movq %rbx, %rdi
movq %rax, -208(%rbp)
call *%r10
leaq 16(%r12), %rdx
movq %r12, %rdi
movq %rbx, %rsi
movq %rdx, (%r12)
movslq %eax, %rdx
addq %rbx, %rdx
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23
movq -200(%rbp), %rdi
xorq %fs:40, %rdi
movq %r12, %rax
jne .L18
leaq -16(%rbp), %rsp
popq %rbx
popq %r12
popq %rbp
.cfi_remember_state
.cfi_def_cfa 7, 8
ret
.L18:
.cfi_restore_state
call __stack_chk_fail@PLT
.cfi_endproc
.LFE1953:
.size _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, .-_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string ""
.LC1:
.string "%d"
.LC2:
.string "basic_string::append"
.LC3:
.string " "
.LC5:
.string "Done in "
.LC6:
.string "\n"
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB1871:
.cfi_startproc
.cfi_personality 0x9b,DW.ref.__gxx_personality_v0
.cfi_lsda 0x1b,.LLSDA1871
pushq %r15
.cfi_def_cfa_offset 16
.cfi_offset 15, -16
pushq %r14
.cfi_def_cfa_offset 24
.cfi_offset 14, -24
pushq %r13
.cfi_def_cfa_offset 32
.cfi_offset 13, -32
pushq %r12
.cfi_def_cfa_offset 40
.cfi_offset 12, -40
pushq %rbp
.cfi_def_cfa_offset 48
.cfi_offset 6, -48
pushq %rbx
.cfi_def_cfa_offset 56
.cfi_offset 3, -56
subq $136, %rsp
.cfi_def_cfa_offset 192
leaq 16(%rsp), %r13
movq %fs:40, %rax
movq %rax, 120(%rsp)
xorl %eax, %eax
call _ZNSt6chrono3_V212system_clock3nowEv@PLT
leaq .LC0(%rip), %rdx
movq %rax, (%rsp)
leaq 16(%r13), %rax
movq %r13, %rdi
movq %rdx, %rsi
movq %rax, 16(%rsp)
.LEHB0:
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
.LEHE0:
movl $20000000, %esi
movq %r13, %rdi
.LEHB1:
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm@PLT
.LEHE1:
leaq 48(%rsp), %rbp
leaq 80(%rsp), %rax
movl $10000000, %ebx
movabsq $9223372036854775807, %r14
leaq 96(%rsp), %r12
movq %rax, 8(%rsp)
leaq 16(%rbp), %r15
jmp .L25
.p2align 4,,10
.p2align 3
.L21:
movq %rcx, 80(%rsp)
movq 16(%rax), %rcx
movq %rcx, 96(%rsp)
.L22:
movq 8(%rax), %rcx
movb $0, 16(%rax)
movq %r13, %rdi
movq %rcx, 88(%rsp)
movq %rdx, (%rax)
movq $0, 8(%rax)
movq 80(%rsp), %rsi
movq 88(%rsp), %rdx
.LEHB2:
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT
.LEHE2:
movq 80(%rsp), %rdi
cmpq %r12, %rdi
je .L23
call _ZdlPv@PLT
.L23:
movq 48(%rsp), %rdi
cmpq %r15, %rdi
je .L24
call _ZdlPv@PLT
.L24:
subl $1, %ebx
je .L40
.L25:
movq vsnprintf@GOTPCREL(%rip), %rsi
leaq .LC1(%rip), %rcx
movl $1, %r8d
movl $16, %edx
movq %rbp, %rdi
xorl %eax, %eax
.LEHB3:
call _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
.LEHE3:
cmpq %r14, 56(%rsp)
je .L41
leaq .LC3(%rip), %rsi
movl $1, %edx
movq %rbp, %rdi
.LEHB4:
call _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT
.LEHE4:
movq %r12, 80(%rsp)
movq (%rax), %rcx
leaq 16(%rax), %rdx
cmpq %rdx, %rcx
jne .L21
movdqu 16(%rax), %xmm0
movaps %xmm0, 96(%rsp)
jmp .L22
.p2align 4,,10
.p2align 3
.L40:
call _ZNSt6chrono3_V212system_clock3nowEv@PLT
subq (%rsp), %rax
movabsq $4835703278458516699, %rdx
leaq .LC5(%rip), %rsi
pxor %xmm0, %xmm0
leaq _ZSt4cout(%rip), %rdi
movq %rax, %rcx
imulq %rdx
sarq $63, %rcx
sarq $18, %rdx
subq %rcx, %rdx
cvtsi2sdq %rdx, %xmm0
movl $8, %edx
divsd .LC4(%rip), %xmm0
movsd %xmm0, (%rsp)
.LEHB5:
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movsd (%rsp), %xmm0
leaq _ZSt4cout(%rip), %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
leaq .LC6(%rip), %rsi
movq %rax, %rdi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
.LEHE5:
movq 16(%rsp), %rdi
addq $16, %r13
cmpq %r13, %rdi
je .L26
call _ZdlPv@PLT
.L26:
xorl %eax, %eax
movq 120(%rsp), %rbx
xorq %fs:40, %rbx
jne .L42
addq $136, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 56
popq %rbx
.cfi_def_cfa_offset 48
popq %rbp
.cfi_def_cfa_offset 40
popq %r12
.cfi_def_cfa_offset 32
popq %r13
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %r15
.cfi_def_cfa_offset 8
ret
.L41:
.cfi_restore_state
leaq .LC2(%rip), %rdi
.LEHB6:
call _ZSt20__throw_length_errorPKc@PLT
.LEHE6:
.L35:
movq %rax, %rbx
.L29:
movq 48(%rsp), %rdi
addq $16, %rbp
cmpq %rbp, %rdi
je .L31
call _ZdlPv@PLT
.L31:
movq 16(%rsp), %rdi
addq $16, %r13
cmpq %r13, %rdi
je .L32
call _ZdlPv@PLT
.L32:
movq %rbx, %rdi
.LEHB7:
call _Unwind_Resume@PLT
.LEHE7:
.L34:
movq %rax, %rbx
jmp .L31
.L36:
movq 8(%rsp), %rdx
movq 80(%rsp), %rdi
movq %rax, %rbx
addq $16, %rdx
cmpq %rdx, %rdi
je .L29
call _ZdlPv@PLT
jmp .L29
.L42:
call __stack_chk_fail@PLT
.cfi_endproc
.LFE1871:
.globl __gxx_personality_v0
.section .gcc_except_table,"a",@progbits
.LLSDA1871:
.byte 0xff
.byte 0xff
.byte 0x1
.uleb128 .LLSDACSE1871-.LLSDACSB1871
.LLSDACSB1871:
.uleb128 .LEHB0-.LFB1871
.uleb128 .LEHE0-.LEHB0
.uleb128 0
.uleb128 0
.uleb128 .LEHB1-.LFB1871
.uleb128 .LEHE1-.LEHB1
.uleb128 .L34-.LFB1871
.uleb128 0
.uleb128 .LEHB2-.LFB1871
.uleb128 .LEHE2-.LEHB2
.uleb128 .L36-.LFB1871
.uleb128 0
.uleb128 .LEHB3-.LFB1871
.uleb128 .LEHE3-.LEHB3
.uleb128 .L34-.LFB1871
.uleb128 0
.uleb128 .LEHB4-.LFB1871
.uleb128 .LEHE4-.LEHB4
.uleb128 .L35-.LFB1871
.uleb128 0
.uleb128 .LEHB5-.LFB1871
.uleb128 .LEHE5-.LEHB5
.uleb128 .L34-.LFB1871
.uleb128 0
.uleb128 .LEHB6-.LFB1871
.uleb128 .LEHE6-.LEHB6
.uleb128 .L35-.LFB1871
.uleb128 0
.uleb128 .LEHB7-.LFB1871
.uleb128 .LEHE7-.LEHB7
.uleb128 0
.uleb128 0
.LLSDACSE1871:
.section .text.startup
.size main, .-main
.p2align 4,,15
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB2369:
.cfi_startproc
leaq _ZStL8__ioinit(%rip), %rdi
subq $8, %rsp
.cfi_def_cfa_offset 16
call _ZNSt8ios_base4InitC1Ev@PLT
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
addq $8, %rsp
.cfi_def_cfa_offset 8
jmp __cxa_atexit@PLT
.cfi_endproc
.LFE2369:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC4:
.long 0
.long 1083129856
.hidden DW.ref.__gxx_personality_v0
.weak DW.ref.__gxx_personality_v0
.section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
.align 8
.type DW.ref.__gxx_personality_v0, @object
.size DW.ref.__gxx_personality_v0, 8
DW.ref.__gxx_personality_v0:
.quad __gxx_personality_v0
.hidden __dso_handle
.ident "GCC: (Ubuntu 7.3.0-16ubuntu3) 7.3.0"
.section .note.GNU-stack,"",@progbits
.file "tester.cpp"
.text
.p2align 4,,15
.def ___tcf_0; .scl 3; .type 32; .endef
___tcf_0:
LFB2556:
.cfi_startproc
movl $__ZStL8__ioinit, %ecx
jmp __ZNSt8ios_base4InitD1Ev
.cfi_endproc
LFE2556:
.section .rdata,"dr"
.align 4
LC0:
.ascii "basic_string::_M_construct null not valid\0"
.text
.align 2
.p2align 4,,15
.def __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29; .scl 3; .type 32; .endef
__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29:
LFB2587:
.cfi_startproc
pushl %edi
.cfi_def_cfa_offset 8
.cfi_offset 7, -8
pushl %esi
.cfi_def_cfa_offset 12
.cfi_offset 6, -12
movl %ecx, %esi
pushl %ebx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
subl $32, %esp
.cfi_def_cfa_offset 48
movl 48(%esp), %edi
movl 52(%esp), %ebx
testl %edi, %edi
jne L5
testl %ebx, %ebx
je L5
movl $LC0, (%esp)
call __ZSt19__throw_logic_errorPKc
.p2align 4,,10
L5:
subl %edi, %ebx
cmpl $15, %ebx
movl %ebx, 28(%esp)
ja L22
movl (%esi), %edx
cmpl $1, %ebx
movl %edx, %eax
je L23
testl %ebx, %ebx
jne L6
L8:
movl 28(%esp), %eax
movl %eax, 4(%esi)
movb $0, (%edx,%eax)
addl $32, %esp
.cfi_remember_state
.cfi_def_cfa_offset 16
popl %ebx
.cfi_restore 3
.cfi_def_cfa_offset 12
popl %esi
.cfi_restore 6
.cfi_def_cfa_offset 8
popl %edi
.cfi_restore 7
.cfi_def_cfa_offset 4
ret $8
.p2align 4,,10
L22:
.cfi_restore_state
leal 28(%esp), %eax
movl $0, 4(%esp)
movl %esi, %ecx
movl %eax, (%esp)
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj
.cfi_def_cfa_offset 40
subl $8, %esp
.cfi_def_cfa_offset 48
movl %eax, (%esi)
movl 28(%esp), %edx
movl %edx, 8(%esi)
L6:
movl %ebx, 8(%esp)
movl %edi, 4(%esp)
movl %eax, (%esp)
call _memcpy
movl (%esi), %edx
jmp L8
.p2align 4,,10
L23:
movzbl (%edi), %eax
movb %al, (%edx)
movl (%esi), %edx
jmp L8
.cfi_endproc
LFE2587:
.def __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21; .scl 3; .type 32; .endef
.set __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21,__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29
.section .text$_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z,"x"
.linkonce discard
.p2align 4,,15
.globl __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z
.def __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z; .scl 2; .type 32; .endef
__ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z:
LFB2177:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
pushl %esi
pushl %ebx
subl $16, %esp
.cfi_offset 6, -12
.cfi_offset 3, -16
movl 16(%ebp), %edx
movl 8(%ebp), %esi
leal 30(%edx), %eax
andl $-16, %eax
call ___chkstk_ms
subl %eax, %esp
leal 24(%ebp), %eax
leal 31(%esp), %ebx
movl %edx, 4(%esp)
movl %eax, 12(%esp)
movl 20(%ebp), %eax
andl $-16, %ebx
movl %ebx, (%esp)
movl %eax, 8(%esp)
call *12(%ebp)
leal 8(%esi), %edx
addl %ebx, %eax
movl %esi, %ecx
movl %edx, (%esi)
movl %eax, 4(%esp)
movl %ebx, (%esp)
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29
subl $8, %esp
leal -8(%ebp), %esp
movl %esi, %eax
popl %ebx
.cfi_restore 3
popl %esi
.cfi_restore 6
popl %ebp
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE2177:
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC1:
.ascii "\0"
LC2:
.ascii "%d\0"
LC3:
.ascii "basic_string::append\0"
LC4:
.ascii " \0"
.def ___divdi3; .scl 2; .type 32; .endef
LC6:
.ascii "Done in \0"
LC7:
.ascii "\12\0"
.section .text.startup,"x"
.p2align 4,,15
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
LFB2111:
.cfi_startproc
.cfi_personality 0,___gxx_personality_v0
.cfi_lsda 0,LLSDA2111
leal 4(%esp), %ecx
.cfi_def_cfa 1, 0
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
.cfi_escape 0x10,0x5,0x2,0x75,0
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
pushl %ecx
.cfi_escape 0xf,0x3,0x75,0x70,0x6
.cfi_escape 0x10,0x7,0x2,0x75,0x7c
.cfi_escape 0x10,0x6,0x2,0x75,0x78
.cfi_escape 0x10,0x3,0x2,0x75,0x74
subl $152, %esp
call ___main
call __ZNSt6chrono3_V212system_clock3nowEv
leal -96(%ebp), %ecx
movl %eax, -136(%ebp)
leal -88(%ebp), %eax
movl $LC1, 4(%esp)
movl $LC1, (%esp)
movl %edx, -132(%ebp)
movl %eax, -96(%ebp)
LEHB0:
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21
LEHE0:
leal -96(%ebp), %ecx
subl $8, %esp
movl $20000000, (%esp)
LEHB1:
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj
LEHE1:
subl $4, %esp
movl $10000000, %edi
leal -72(%ebp), %esi
leal -40(%ebp), %ebx
jmp L32
.p2align 4,,10
L28:
movl %ecx, -48(%ebp)
movl 8(%eax), %ecx
movl %ecx, -40(%ebp)
L29:
movl 4(%eax), %ecx
movb $0, 8(%eax)
movl %ecx, -44(%ebp)
movl %edx, (%eax)
leal -96(%ebp), %ecx
movl $0, 4(%eax)
movl -44(%ebp), %eax
movl %eax, 4(%esp)
movl -48(%ebp), %eax
movl %eax, (%esp)
LEHB2:
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj
LEHE2:
movl -48(%ebp), %eax
subl $8, %esp
cmpl %ebx, %eax
je L30
movl %eax, (%esp)
call __ZdlPv
L30:
movl -72(%ebp), %eax
leal -64(%ebp), %edx
cmpl %edx, %eax
je L31
movl %eax, (%esp)
call __ZdlPv
L31:
subl $1, %edi
je L46
L32:
movl $1, 16(%esp)
movl $LC2, 12(%esp)
movl $16, 8(%esp)
movl $_vsnprintf, 4(%esp)
movl %esi, (%esp)
LEHB3:
call __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z
LEHE3:
cmpl $2147483647, -68(%ebp)
je L47
movl $1, 4(%esp)
movl $LC4, (%esp)
movl %esi, %ecx
LEHB4:
call __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj
LEHE4:
movl %ebx, -48(%ebp)
movl (%eax), %ecx
leal 8(%eax), %edx
subl $8, %esp
cmpl %edx, %ecx
jne L28
movl 12(%eax), %ecx
movl %ecx, -120(%ebp)
movl 16(%eax), %ecx
movl %ecx, -124(%ebp)
movl 20(%eax), %ecx
movl %ecx, -128(%ebp)
movl 8(%eax), %ecx
movl %ecx, -40(%ebp)
movl -120(%ebp), %ecx
movl %ecx, -36(%ebp)
movl -124(%ebp), %ecx
movl %ecx, -32(%ebp)
movl -128(%ebp), %ecx
movl %ecx, -28(%ebp)
jmp L29
.p2align 4,,10
L46:
call __ZNSt6chrono3_V212system_clock3nowEv
subl -136(%ebp), %eax
movl $1000000, 8(%esp)
sbbl -132(%ebp), %edx
movl $0, 12(%esp)
movl %eax, (%esp)
movl %edx, 4(%esp)
call ___divdi3
movl %eax, -120(%ebp)
movl %edx, -116(%ebp)
fildq -120(%ebp)
movl $8, 8(%esp)
movl $LC6, 4(%esp)
movl $__ZSt4cout, (%esp)
fdivs LC5
fstpl -120(%ebp)
LEHB5:
call __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
fldl -120(%ebp)
movl $__ZSt4cout, %ecx
fstpl (%esp)
call __ZNSo9_M_insertIdEERSoT_
subl $8, %esp
movl $LC7, 4(%esp)
movl %eax, (%esp)
call __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
LEHE5:
movl -96(%ebp), %eax
leal -88(%ebp), %edi
cmpl %edi, %eax
je L43
movl %eax, (%esp)
call __ZdlPv
L43:
leal -16(%ebp), %esp
xorl %eax, %eax
popl %ecx
.cfi_remember_state
.cfi_restore 1
.cfi_def_cfa 1, 0
popl %ebx
.cfi_restore 3
popl %esi
.cfi_restore 6
popl %edi
.cfi_restore 7
popl %ebp
.cfi_restore 5
leal -4(%ecx), %esp
.cfi_def_cfa 4, 4
ret
L47:
.cfi_restore_state
movl $LC3, (%esp)
LEHB6:
call __ZSt20__throw_length_errorPKc
LEHE6:
L41:
movl %eax, %ebx
L36:
movl -72(%ebp), %eax
leal -64(%ebp), %edx
cmpl %edx, %eax
je L38
movl %eax, (%esp)
call __ZdlPv
L38:
movl -96(%ebp), %eax
leal -88(%ebp), %edi
cmpl %edi, %eax
je L39
movl %eax, (%esp)
call __ZdlPv
L39:
movl %ebx, (%esp)
LEHB7:
call __Unwind_Resume
LEHE7:
L42:
movl %eax, %esi
movl -48(%ebp), %eax
cmpl %ebx, %eax
je L35
movl %eax, (%esp)
call __ZdlPv
L35:
movl %esi, %ebx
jmp L36
L40:
movl %eax, %ebx
jmp L38
.cfi_endproc
LFE2111:
.def ___gxx_personality_v0; .scl 2; .type 32; .endef
.section .gcc_except_table,"w"
LLSDA2111:
.byte 0xff
.byte 0xff
.byte 0x1
.uleb128 LLSDACSE2111-LLSDACSB2111
LLSDACSB2111:
.uleb128 LEHB0-LFB2111
.uleb128 LEHE0-LEHB0
.uleb128 0
.uleb128 0
.uleb128 LEHB1-LFB2111
.uleb128 LEHE1-LEHB1
.uleb128 L40-LFB2111
.uleb128 0
.uleb128 LEHB2-LFB2111
.uleb128 LEHE2-LEHB2
.uleb128 L42-LFB2111
.uleb128 0
.uleb128 LEHB3-LFB2111
.uleb128 LEHE3-LEHB3
.uleb128 L40-LFB2111
.uleb128 0
.uleb128 LEHB4-LFB2111
.uleb128 LEHE4-LEHB4
.uleb128 L41-LFB2111
.uleb128 0
.uleb128 LEHB5-LFB2111
.uleb128 LEHE5-LEHB5
.uleb128 L40-LFB2111
.uleb128 0
.uleb128 LEHB6-LFB2111
.uleb128 LEHE6-LEHB6
.uleb128 L41-LFB2111
.uleb128 0
.uleb128 LEHB7-LFB2111
.uleb128 LEHE7-LEHB7
.uleb128 0
.uleb128 0
LLSDACSE2111:
.section .text.startup,"x"
.p2align 4,,15
.def __GLOBAL__sub_I_main; .scl 3; .type 32; .endef
__GLOBAL__sub_I_main:
LFB2557:
.cfi_startproc
subl $28, %esp
.cfi_def_cfa_offset 32
movl $__ZStL8__ioinit, %ecx
call __ZNSt8ios_base4InitC1Ev
movl $___tcf_0, (%esp)
call _atexit
addl $28, %esp
.cfi_def_cfa_offset 4
ret
.cfi_endproc
LFE2557:
.section .ctors,"w"
.align 4
.long __GLOBAL__sub_I_main
.lcomm __ZStL8__ioinit,1,1
.section .rdata,"dr"
.align 4
LC5:
.long 1148846080
.ident "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
.def __ZNSt8ios_base4InitD1Ev; .scl 2; .type 32; .endef
.def __ZSt19__throw_logic_errorPKc; .scl 2; .type 32; .endef
.def __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj; .scl 2; .type 32; .endef
.def _memcpy; .scl 2; .type 32; .endef
.def __ZNSt6chrono3_V212system_clock3nowEv; .scl 2; .type 32; .endef
.def __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj; .scl 2; .type 32; .endef
.def __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj; .scl 2; .type 32; .endef
.def __ZdlPv; .scl 2; .type 32; .endef
.def _vsnprintf; .scl 2; .type 32; .endef
.def __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i; .scl 2; .type 32; .endef
.def __ZNSo9_M_insertIdEERSoT_; .scl 2; .type 32; .endef
.def __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc; .scl 2; .type 32; .endef
.def __ZSt20__throw_length_errorPKc; .scl 2; .type 32; .endef
.def __Unwind_Resume; .scl 2; .type 32; .endef
.def __ZNSt8ios_base4InitC1Ev; .scl 2; .type 32; .endef
.def _atexit; .scl 2; .type 32; .endef
答案 0 :(得分:3)
快速查看反汇编可以发现Windows版本使用movl
(即长字,32位移动),Linux版本使用movq
(四字,64位)和SSE寄存器xmm
我敢打赌,在Linux上,您针对x86-64进行编译,而在Windows上,则针对32位x86。
x86-64包括SSE2扩展名,而x86不包括,因此MinGW默认为无SSE模式。
在这种情况下,在Windows上使用64位工具链进行构建应可实现可比的性能。或者,您可以为32位版本启用SSE(如果我没记错的话,可以使用-msse2
编译器标志)。
答案 1 :(得分:2)
mingw.org的实现似乎比linux,Visual Studio或mingw-w64.org效率低得多。
>g++ --version
g++ (MinGW.org GCC-6.3.0-1) 6.3.0
完成于24.808
>g++ --version
g++ (i686-posix-dwarf-rev2, Built by MinGW-W64 project) 6.3.0
完成于0.679
答案 2 :(得分:1)
使用MSYS2 MinGW64测试:
Access-Accept
g++ --version
g++.exe (Rev2, Built by MSYS2 project) 7.3.0
g++.exe -Wall -O3 -mtune=native -fno-exceptions -fno-rtti -c main.cpp -o main.o
g++.exe -o test.exe main.o -s
Env:Windows 10 x64 处理器:Intel Core i5-6300U,2.4GH 内存:16GB DDR4
无论如何,MinGW使用mswcrt.dll而不是GNU libc(Windows捆绑了一个,不是通用的CRT / Visual Studio CRT等),因此从我的经验来看,速度差距可能来自C标准库。
P.S。进行了一些更改(相同的编译器标志)
Done in 0.547
#include <iostream>
#include <chrono>
#ifdef _WIN32
#include <windows.h>
static std::size_t page_size() noexcept {
::SYSTEM_INFO si;
::GetSystemInfo(&si);
return si.dwPageSize;
}
#else
#include <sys/types.h>
#include <unistd.h>
static std::size_t page_size() noexcept {
return static_cast<std::size_t>( ::sysconf(_SC_PAGESIZE) );
}
#endif // _WIN32
int main(int argc, char const *argv[]) {
auto started = std::chrono::high_resolution_clock::now();
const std::size_t n = 10000000;
// align size to page boundary
const std::size_t al = page_size() - 1;
const std::size_t buff_size = ( (n << 1) + al) & ~al;
std::string str;
str.reserve(buff_size);
const std::string to_append( std::to_string(1) );
for (std::size_t i = 0; i < n; ++i) {
str.append( to_append );
str.push_back(' ');
}
auto done = std::chrono::high_resolution_clock::now();
double secs = (double) std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() / 1000;
std::cout << "Done in " << secs << "\n";
return 0;
}
主要功能的汇编输出:
Done in 0.046
答案 3 :(得分:0)
(仅按比例计算)Visual Studio C ++上的Windows Release
目标与Debug
目标:默认情况下,Debug目标编译行未进行优化,而Release目标编译行则具有< strong> / O2 优化, / Oi (“启用固有功能”)和 / GL (“整个程序优化”)。在我的工作站上,您的代码是Debug x64 vs Relesae x64:
Debug
:70秒。
Release
:0.27秒。
您使用MinGW(我不熟悉)进行构建。但是从fast search开始,有关于Debug / Release模式 ...的话题,而MinGW具有等效的 / O2 优化, / Oi (“启用内部函数”)和 / Og (“启用全局优化”)标志it seems。
-
使用这3个标志(x64目标)进行编译,并与VS Release x64基准进行比较。无论如何,这是MS针对Release目标的默认编译优化。
-
测试环境: HP 8100,Windows 10 Pro 64位,CPU i7 870、16 GB DDR3 RAM,Visual Studio 2017,目标:Debug x64 / Release x64
答案 4 :(得分:-1)
我在Windows上使用MinGW 4.8.0
尝试了您的代码,获得了约20秒的时间。当我将字符串缩写改为std::stringstream时,我得到了0.5秒:
...
std::stringstream ss;
for (int i = 0; i < n; ++i) {
//str += std::to_string(a) + " ";
ss << a << " ";
}
str = ss.str();
...