Windows上的std :: string连接缓慢

时间:2018-07-10 09:17:05

标签: c++ string optimization string-concatenation

我有一个程序,需要将许多字符串连接在一起(更精确的整数转换为字符串)。在我的Ubuntu计算机(运行g++ 7.3.0)上,代码运行了1.5秒。但是代码也必须在Windows上运行(使用MinGW运行g++ 6.3.0),在此过程中需要15秒才能完成。此外,Ubuntu安装程序在使用2.30GHz i7-4712MQ CPU的笔记本电脑上运行速度要慢得多,而Windows机器在4.20GHz i7-7700K CPU上运行。

再现时间的代码如下所示。我使用g++ tester.cpp -O2 -o tester(或Windows的tester.exe)编译代码

#include <iostream>
#include <chrono>

int main(int argc, char const *argv[]) {

    auto started = std::chrono::high_resolution_clock::now();
    std::string str = "";
    const int n = 10000000;
    str.reserve(2 * n);
    int a = 1;

    for (int i = 0; i < n; ++i) {
        str += std::to_string(a) + " ";
    }

    auto done = std::chrono::high_resolution_clock::now();
    double secs = (double) std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() / 1000;
    std::cout << "Done in " << secs << "\n";
    return 0;
}

任何想法可能会导致巨大的性能差距吗?

反汇编如下:

Ubuntu:

.file   "tester.cpp"


.text
    .align 2
    .p2align 4,,15
    .type   _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, @function
_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19:
.LFB2389:
    .cfi_startproc
    pushq   %r12
    .cfi_def_cfa_offset 16
    .cfi_offset 12, -16
    pushq   %rbp
    .cfi_def_cfa_offset 24
    .cfi_offset 6, -24
    movq    %rsi, %r12
    pushq   %rbx
    .cfi_def_cfa_offset 32
    .cfi_offset 3, -32
    movq    %rdx, %rbx
    movq    %rdi, %rbp
    subq    %rsi, %rbx
    subq    $16, %rsp
    .cfi_def_cfa_offset 48
    movq    %fs:40, %rax
    movq    %rax, 8(%rsp)
    xorl    %eax, %eax
    cmpq    $15, %rbx
    movq    %rbx, (%rsp)
    ja  .L12
    movq    (%rdi), %rdx
    cmpq    $1, %rbx
    movq    %rdx, %rax
    jne .L4
    movzbl  (%rsi), %eax
    movb    %al, (%rdx)
    movq    (%rdi), %rdx
.L5:
    movq    (%rsp), %rax
    movq    %rax, 8(%rbp)
    movb    $0, (%rdx,%rax)
    movq    8(%rsp), %rax
    xorq    %fs:40, %rax
    jne .L13
    addq    $16, %rsp
    .cfi_remember_state
    .cfi_def_cfa_offset 32
    popq    %rbx
    .cfi_def_cfa_offset 24
    popq    %rbp
    .cfi_def_cfa_offset 16
    popq    %r12
    .cfi_def_cfa_offset 8
    ret
.L12:
    .cfi_restore_state
    xorl    %edx, %edx
    movq    %rsp, %rsi
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm@PLT
    movq    (%rsp), %rdx
    movq    %rax, 0(%rbp)
    movq    %rdx, 16(%rbp)
.L3:
    movq    %rbx, %rdx
    movq    %r12, %rsi
    movq    %rax, %rdi
    call    memcpy@PLT
    movq    0(%rbp), %rdx
    jmp .L5
.L4:
    testq   %rbx, %rbx
    je  .L5
    jmp .L3
.L13:
    call    __stack_chk_fail@PLT
    .cfi_endproc
.LFE2389:
    .size   _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, .-_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
    .set    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23,_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
    .section    .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,"axG",@progbits,_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,comdat
    .p2align 4,,15
    .weak   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
    .type   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, @function
_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z:
.LFB1953:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsi, %r10
    movq    %rdx, %rsi
    movq    %rcx, %rdx
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    pushq   %r12
    pushq   %rbx
    .cfi_offset 12, -24
    .cfi_offset 3, -32
    movq    %rdi, %r12
    subq    $208, %rsp
    testb   %al, %al
    movq    %r8, -160(%rbp)
    movq    %r9, -152(%rbp)
    je  .L15
    movaps  %xmm0, -144(%rbp)
    movaps  %xmm1, -128(%rbp)
    movaps  %xmm2, -112(%rbp)
    movaps  %xmm3, -96(%rbp)
    movaps  %xmm4, -80(%rbp)
    movaps  %xmm5, -64(%rbp)
    movaps  %xmm6, -48(%rbp)
    movaps  %xmm7, -32(%rbp)
.L15:
    movq    %fs:40, %rax
    movq    %rax, -200(%rbp)
    xorl    %eax, %eax
    leaq    30(%rsi), %rax
    leaq    -224(%rbp), %rcx
    andq    $-16, %rax
    movl    $32, -224(%rbp)
    movl    $48, -220(%rbp)
    subq    %rax, %rsp
    leaq    16(%rbp), %rax
    leaq    15(%rsp), %rbx
    movq    %rax, -216(%rbp)
    leaq    -192(%rbp), %rax
    andq    $-16, %rbx
    movq    %rbx, %rdi
    movq    %rax, -208(%rbp)
    call    *%r10
    leaq    16(%r12), %rdx
    movq    %r12, %rdi
    movq    %rbx, %rsi
    movq    %rdx, (%r12)
    movslq  %eax, %rdx
    addq    %rbx, %rdx
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23
    movq    -200(%rbp), %rdi
    xorq    %fs:40, %rdi
    movq    %r12, %rax
    jne .L18
    leaq    -16(%rbp), %rsp
    popq    %rbx
    popq    %r12
    popq    %rbp
    .cfi_remember_state
    .cfi_def_cfa 7, 8
    ret
.L18:
    .cfi_restore_state
    call    __stack_chk_fail@PLT
    .cfi_endproc
.LFE1953:
    .size   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, .-_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string ""
.LC1:
    .string "%d"
.LC2:
    .string "basic_string::append"
.LC3:
    .string " "
.LC5:
    .string "Done in "
.LC6:
    .string "\n"
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB1871:
    .cfi_startproc
    .cfi_personality 0x9b,DW.ref.__gxx_personality_v0
    .cfi_lsda 0x1b,.LLSDA1871
    pushq   %r15
    .cfi_def_cfa_offset 16
    .cfi_offset 15, -16
    pushq   %r14
    .cfi_def_cfa_offset 24
    .cfi_offset 14, -24
    pushq   %r13
    .cfi_def_cfa_offset 32
    .cfi_offset 13, -32
    pushq   %r12
    .cfi_def_cfa_offset 40
    .cfi_offset 12, -40
    pushq   %rbp
    .cfi_def_cfa_offset 48
    .cfi_offset 6, -48
    pushq   %rbx
    .cfi_def_cfa_offset 56
    .cfi_offset 3, -56
    subq    $136, %rsp
    .cfi_def_cfa_offset 192
    leaq    16(%rsp), %r13
    movq    %fs:40, %rax
    movq    %rax, 120(%rsp)
    xorl    %eax, %eax
    call    _ZNSt6chrono3_V212system_clock3nowEv@PLT
    leaq    .LC0(%rip), %rdx
    movq    %rax, (%rsp)
    leaq    16(%r13), %rax
    movq    %r13, %rdi
    movq    %rdx, %rsi
    movq    %rax, 16(%rsp)
.LEHB0:
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19
.LEHE0:
    movl    $20000000, %esi
    movq    %r13, %rdi
.LEHB1:
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm@PLT
.LEHE1:
    leaq    48(%rsp), %rbp
    leaq    80(%rsp), %rax
    movl    $10000000, %ebx
    movabsq $9223372036854775807, %r14
    leaq    96(%rsp), %r12
    movq    %rax, 8(%rsp)
    leaq    16(%rbp), %r15
    jmp .L25
    .p2align 4,,10
    .p2align 3
.L21:
    movq    %rcx, 80(%rsp)
    movq    16(%rax), %rcx
    movq    %rcx, 96(%rsp)
.L22:
    movq    8(%rax), %rcx
    movb    $0, 16(%rax)
    movq    %r13, %rdi
    movq    %rcx, 88(%rsp)
    movq    %rdx, (%rax)
    movq    $0, 8(%rax)
    movq    80(%rsp), %rsi
    movq    88(%rsp), %rdx
.LEHB2:
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT
.LEHE2:
    movq    80(%rsp), %rdi
    cmpq    %r12, %rdi
    je  .L23
    call    _ZdlPv@PLT
.L23:
    movq    48(%rsp), %rdi
    cmpq    %r15, %rdi
    je  .L24
    call    _ZdlPv@PLT
.L24:
    subl    $1, %ebx
    je  .L40
.L25:
    movq    vsnprintf@GOTPCREL(%rip), %rsi
    leaq    .LC1(%rip), %rcx
    movl    $1, %r8d
    movl    $16, %edx
    movq    %rbp, %rdi
    xorl    %eax, %eax
.LEHB3:
    call    _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
.LEHE3:
    cmpq    %r14, 56(%rsp)
    je  .L41
    leaq    .LC3(%rip), %rsi
    movl    $1, %edx
    movq    %rbp, %rdi
.LEHB4:
    call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT
.LEHE4:
    movq    %r12, 80(%rsp)
    movq    (%rax), %rcx
    leaq    16(%rax), %rdx
    cmpq    %rdx, %rcx
    jne .L21
    movdqu  16(%rax), %xmm0
    movaps  %xmm0, 96(%rsp)
    jmp .L22
    .p2align 4,,10
    .p2align 3
.L40:
    call    _ZNSt6chrono3_V212system_clock3nowEv@PLT
    subq    (%rsp), %rax
    movabsq $4835703278458516699, %rdx
    leaq    .LC5(%rip), %rsi
    pxor    %xmm0, %xmm0
    leaq    _ZSt4cout(%rip), %rdi
    movq    %rax, %rcx
    imulq   %rdx
    sarq    $63, %rcx
    sarq    $18, %rdx
    subq    %rcx, %rdx
    cvtsi2sdq   %rdx, %xmm0
    movl    $8, %edx
    divsd   .LC4(%rip), %xmm0
    movsd   %xmm0, (%rsp)
.LEHB5:
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
    movsd   (%rsp), %xmm0
    leaq    _ZSt4cout(%rip), %rdi
    call    _ZNSo9_M_insertIdEERSoT_@PLT
    leaq    .LC6(%rip), %rsi
    movq    %rax, %rdi
    call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
.LEHE5:
    movq    16(%rsp), %rdi
    addq    $16, %r13
    cmpq    %r13, %rdi
    je  .L26
    call    _ZdlPv@PLT
.L26:
    xorl    %eax, %eax
    movq    120(%rsp), %rbx
    xorq    %fs:40, %rbx
    jne .L42
    addq    $136, %rsp
    .cfi_remember_state
    .cfi_def_cfa_offset 56
    popq    %rbx
    .cfi_def_cfa_offset 48
    popq    %rbp
    .cfi_def_cfa_offset 40
    popq    %r12
    .cfi_def_cfa_offset 32
    popq    %r13
    .cfi_def_cfa_offset 24
    popq    %r14
    .cfi_def_cfa_offset 16
    popq    %r15
    .cfi_def_cfa_offset 8
    ret
.L41:
    .cfi_restore_state
    leaq    .LC2(%rip), %rdi
.LEHB6:
    call    _ZSt20__throw_length_errorPKc@PLT
.LEHE6:
.L35:
    movq    %rax, %rbx
.L29:
    movq    48(%rsp), %rdi
    addq    $16, %rbp
    cmpq    %rbp, %rdi
    je  .L31
    call    _ZdlPv@PLT
.L31:
    movq    16(%rsp), %rdi
    addq    $16, %r13
    cmpq    %r13, %rdi
    je  .L32
    call    _ZdlPv@PLT
.L32:
    movq    %rbx, %rdi
.LEHB7:
    call    _Unwind_Resume@PLT
.LEHE7:
.L34:
    movq    %rax, %rbx
    jmp .L31
.L36:
    movq    8(%rsp), %rdx
    movq    80(%rsp), %rdi
    movq    %rax, %rbx
    addq    $16, %rdx
    cmpq    %rdx, %rdi
    je  .L29
    call    _ZdlPv@PLT
    jmp .L29
.L42:
    call    __stack_chk_fail@PLT
    .cfi_endproc
.LFE1871:
    .globl  __gxx_personality_v0
    .section    .gcc_except_table,"a",@progbits
.LLSDA1871:
    .byte   0xff
    .byte   0xff
    .byte   0x1
    .uleb128 .LLSDACSE1871-.LLSDACSB1871
.LLSDACSB1871:
    .uleb128 .LEHB0-.LFB1871
    .uleb128 .LEHE0-.LEHB0
    .uleb128 0
    .uleb128 0
    .uleb128 .LEHB1-.LFB1871
    .uleb128 .LEHE1-.LEHB1
    .uleb128 .L34-.LFB1871
    .uleb128 0
    .uleb128 .LEHB2-.LFB1871
    .uleb128 .LEHE2-.LEHB2
    .uleb128 .L36-.LFB1871
    .uleb128 0
    .uleb128 .LEHB3-.LFB1871
    .uleb128 .LEHE3-.LEHB3
    .uleb128 .L34-.LFB1871
    .uleb128 0
    .uleb128 .LEHB4-.LFB1871
    .uleb128 .LEHE4-.LEHB4
    .uleb128 .L35-.LFB1871
    .uleb128 0
    .uleb128 .LEHB5-.LFB1871
    .uleb128 .LEHE5-.LEHB5
    .uleb128 .L34-.LFB1871
    .uleb128 0
    .uleb128 .LEHB6-.LFB1871
    .uleb128 .LEHE6-.LEHB6
    .uleb128 .L35-.LFB1871
    .uleb128 0
    .uleb128 .LEHB7-.LFB1871
    .uleb128 .LEHE7-.LEHB7
    .uleb128 0
    .uleb128 0
.LLSDACSE1871:
    .section    .text.startup
    .size   main, .-main
    .p2align 4,,15
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB2369:
    .cfi_startproc
    leaq    _ZStL8__ioinit(%rip), %rdi
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    call    _ZNSt8ios_base4InitC1Ev@PLT
    movq    _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
    leaq    __dso_handle(%rip), %rdx
    leaq    _ZStL8__ioinit(%rip), %rsi
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    jmp __cxa_atexit@PLT
    .cfi_endproc
.LFE2369:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .init_array,"aw"
    .align 8
    .quad   _GLOBAL__sub_I_main
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC4:
    .long   0
    .long   1083129856
    .hidden DW.ref.__gxx_personality_v0
    .weak   DW.ref.__gxx_personality_v0
    .section    .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
    .align 8
    .type   DW.ref.__gxx_personality_v0, @object
    .size   DW.ref.__gxx_personality_v0, 8
DW.ref.__gxx_personality_v0:
    .quad   __gxx_personality_v0
    .hidden __dso_handle
    .ident  "GCC: (Ubuntu 7.3.0-16ubuntu3) 7.3.0"
    .section    .note.GNU-stack,"",@progbits

Windows:

.file   "tester.cpp"
    .text
    .p2align 4,,15
    .def    ___tcf_0;   .scl    3;  .type   32; .endef
___tcf_0:
LFB2556:
    .cfi_startproc
    movl    $__ZStL8__ioinit, %ecx
    jmp __ZNSt8ios_base4InitD1Ev
    .cfi_endproc
LFE2556:
    .section .rdata,"dr"
    .align 4
LC0:
    .ascii "basic_string::_M_construct null not valid\0"
    .text
    .align 2
    .p2align 4,,15
    .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29;    .scl    3;  .type   32; .endef
__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29:
LFB2587:
    .cfi_startproc
    pushl   %edi
    .cfi_def_cfa_offset 8
    .cfi_offset 7, -8
    pushl   %esi
    .cfi_def_cfa_offset 12
    .cfi_offset 6, -12
    movl    %ecx, %esi
    pushl   %ebx
    .cfi_def_cfa_offset 16
    .cfi_offset 3, -16
    subl    $32, %esp
    .cfi_def_cfa_offset 48
    movl    48(%esp), %edi
    movl    52(%esp), %ebx
    testl   %edi, %edi
    jne L5
    testl   %ebx, %ebx
    je  L5
    movl    $LC0, (%esp)
    call    __ZSt19__throw_logic_errorPKc
    .p2align 4,,10
L5:
    subl    %edi, %ebx
    cmpl    $15, %ebx
    movl    %ebx, 28(%esp)
    ja  L22
    movl    (%esi), %edx
    cmpl    $1, %ebx
    movl    %edx, %eax
    je  L23
    testl   %ebx, %ebx
    jne L6
L8:
    movl    28(%esp), %eax
    movl    %eax, 4(%esi)
    movb    $0, (%edx,%eax)
    addl    $32, %esp
    .cfi_remember_state
    .cfi_def_cfa_offset 16
    popl    %ebx
    .cfi_restore 3
    .cfi_def_cfa_offset 12
    popl    %esi
    .cfi_restore 6
    .cfi_def_cfa_offset 8
    popl    %edi
    .cfi_restore 7
    .cfi_def_cfa_offset 4
    ret $8
    .p2align 4,,10
L22:
    .cfi_restore_state
    leal    28(%esp), %eax
    movl    $0, 4(%esp)
    movl    %esi, %ecx
    movl    %eax, (%esp)
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj
    .cfi_def_cfa_offset 40
    subl    $8, %esp
    .cfi_def_cfa_offset 48
    movl    %eax, (%esi)
    movl    28(%esp), %edx
    movl    %edx, 8(%esi)
L6:
    movl    %ebx, 8(%esp)
    movl    %edi, 4(%esp)
    movl    %eax, (%esp)
    call    _memcpy
    movl    (%esi), %edx
    jmp L8
    .p2align 4,,10
L23:
    movzbl  (%edi), %eax
    movb    %al, (%edx)
    movl    (%esi), %edx
    jmp L8
    .cfi_endproc
LFE2587:
    .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21;   .scl    3;  .type   32; .endef
    .set    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21,__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29
    .section    .text$_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z,"x"
    .linkonce discard
    .p2align 4,,15
    .globl  __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z
    .def    __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z;    .scl    2;  .type   32; .endef
__ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z:
LFB2177:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    pushl   %esi
    pushl   %ebx
    subl    $16, %esp
    .cfi_offset 6, -12
    .cfi_offset 3, -16
    movl    16(%ebp), %edx
    movl    8(%ebp), %esi
    leal    30(%edx), %eax
    andl    $-16, %eax
    call    ___chkstk_ms
    subl    %eax, %esp
    leal    24(%ebp), %eax
    leal    31(%esp), %ebx
    movl    %edx, 4(%esp)
    movl    %eax, 12(%esp)
    movl    20(%ebp), %eax
    andl    $-16, %ebx
    movl    %ebx, (%esp)
    movl    %eax, 8(%esp)
    call    *12(%ebp)
    leal    8(%esi), %edx
    addl    %ebx, %eax
    movl    %esi, %ecx
    movl    %edx, (%esi)
    movl    %eax, 4(%esp)
    movl    %ebx, (%esp)
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29
    subl    $8, %esp
    leal    -8(%ebp), %esp
    movl    %esi, %eax
    popl    %ebx
    .cfi_restore 3
    popl    %esi
    .cfi_restore 6
    popl    %ebp
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE2177:
    .def    ___main;    .scl    2;  .type   32; .endef
    .section .rdata,"dr"
LC1:
    .ascii "\0"
LC2:
    .ascii "%d\0"
LC3:
    .ascii "basic_string::append\0"
LC4:
    .ascii " \0"
    .def    ___divdi3;  .scl    2;  .type   32; .endef
LC6:
    .ascii "Done in \0"
LC7:
    .ascii "\12\0"
    .section    .text.startup,"x"
    .p2align 4,,15
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB2111:
    .cfi_startproc
    .cfi_personality 0,___gxx_personality_v0
    .cfi_lsda 0,LLSDA2111
    leal    4(%esp), %ecx
    .cfi_def_cfa 1, 0
    andl    $-16, %esp
    pushl   -4(%ecx)
    pushl   %ebp
    .cfi_escape 0x10,0x5,0x2,0x75,0
    movl    %esp, %ebp
    pushl   %edi
    pushl   %esi
    pushl   %ebx
    pushl   %ecx
    .cfi_escape 0xf,0x3,0x75,0x70,0x6
    .cfi_escape 0x10,0x7,0x2,0x75,0x7c
    .cfi_escape 0x10,0x6,0x2,0x75,0x78
    .cfi_escape 0x10,0x3,0x2,0x75,0x74
    subl    $152, %esp
    call    ___main
    call    __ZNSt6chrono3_V212system_clock3nowEv
    leal    -96(%ebp), %ecx
    movl    %eax, -136(%ebp)
    leal    -88(%ebp), %eax
    movl    $LC1, 4(%esp)
    movl    $LC1, (%esp)
    movl    %edx, -132(%ebp)
    movl    %eax, -96(%ebp)
LEHB0:
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21
LEHE0:
    leal    -96(%ebp), %ecx
    subl    $8, %esp
    movl    $20000000, (%esp)
LEHB1:
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj
LEHE1:
    subl    $4, %esp
    movl    $10000000, %edi
    leal    -72(%ebp), %esi
    leal    -40(%ebp), %ebx
    jmp L32
    .p2align 4,,10
L28:
    movl    %ecx, -48(%ebp)
    movl    8(%eax), %ecx
    movl    %ecx, -40(%ebp)
L29:
    movl    4(%eax), %ecx
    movb    $0, 8(%eax)
    movl    %ecx, -44(%ebp)
    movl    %edx, (%eax)
    leal    -96(%ebp), %ecx
    movl    $0, 4(%eax)
    movl    -44(%ebp), %eax
    movl    %eax, 4(%esp)
    movl    -48(%ebp), %eax
    movl    %eax, (%esp)
LEHB2:
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj
LEHE2:
    movl    -48(%ebp), %eax
    subl    $8, %esp
    cmpl    %ebx, %eax
    je  L30
    movl    %eax, (%esp)
    call    __ZdlPv
L30:
    movl    -72(%ebp), %eax
    leal    -64(%ebp), %edx
    cmpl    %edx, %eax
    je  L31
    movl    %eax, (%esp)
    call    __ZdlPv
L31:
    subl    $1, %edi
    je  L46
L32:
    movl    $1, 16(%esp)
    movl    $LC2, 12(%esp)
    movl    $16, 8(%esp)
    movl    $_vsnprintf, 4(%esp)
    movl    %esi, (%esp)
LEHB3:
    call    __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z
LEHE3:
    cmpl    $2147483647, -68(%ebp)
    je  L47
    movl    $1, 4(%esp)
    movl    $LC4, (%esp)
    movl    %esi, %ecx
LEHB4:
    call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj
LEHE4:
    movl    %ebx, -48(%ebp)
    movl    (%eax), %ecx
    leal    8(%eax), %edx
    subl    $8, %esp
    cmpl    %edx, %ecx
    jne L28
    movl    12(%eax), %ecx
    movl    %ecx, -120(%ebp)
    movl    16(%eax), %ecx
    movl    %ecx, -124(%ebp)
    movl    20(%eax), %ecx
    movl    %ecx, -128(%ebp)
    movl    8(%eax), %ecx
    movl    %ecx, -40(%ebp)
    movl    -120(%ebp), %ecx
    movl    %ecx, -36(%ebp)
    movl    -124(%ebp), %ecx
    movl    %ecx, -32(%ebp)
    movl    -128(%ebp), %ecx
    movl    %ecx, -28(%ebp)
    jmp L29
    .p2align 4,,10
L46:
    call    __ZNSt6chrono3_V212system_clock3nowEv
    subl    -136(%ebp), %eax
    movl    $1000000, 8(%esp)
    sbbl    -132(%ebp), %edx
    movl    $0, 12(%esp)
    movl    %eax, (%esp)
    movl    %edx, 4(%esp)
    call    ___divdi3
    movl    %eax, -120(%ebp)
    movl    %edx, -116(%ebp)
    fildq   -120(%ebp)
    movl    $8, 8(%esp)
    movl    $LC6, 4(%esp)
    movl    $__ZSt4cout, (%esp)
    fdivs   LC5
    fstpl   -120(%ebp)
LEHB5:
    call    __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    fldl    -120(%ebp)
    movl    $__ZSt4cout, %ecx
    fstpl   (%esp)
    call    __ZNSo9_M_insertIdEERSoT_
    subl    $8, %esp
    movl    $LC7, 4(%esp)
    movl    %eax, (%esp)
    call    __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
LEHE5:
    movl    -96(%ebp), %eax
    leal    -88(%ebp), %edi
    cmpl    %edi, %eax
    je  L43
    movl    %eax, (%esp)
    call    __ZdlPv
L43:
    leal    -16(%ebp), %esp
    xorl    %eax, %eax
    popl    %ecx
    .cfi_remember_state
    .cfi_restore 1
    .cfi_def_cfa 1, 0
    popl    %ebx
    .cfi_restore 3
    popl    %esi
    .cfi_restore 6
    popl    %edi
    .cfi_restore 7
    popl    %ebp
    .cfi_restore 5
    leal    -4(%ecx), %esp
    .cfi_def_cfa 4, 4
    ret
L47:
    .cfi_restore_state
    movl    $LC3, (%esp)
LEHB6:
    call    __ZSt20__throw_length_errorPKc
LEHE6:
L41:
    movl    %eax, %ebx
L36:
    movl    -72(%ebp), %eax
    leal    -64(%ebp), %edx
    cmpl    %edx, %eax
    je  L38
    movl    %eax, (%esp)
    call    __ZdlPv
L38:
    movl    -96(%ebp), %eax
    leal    -88(%ebp), %edi
    cmpl    %edi, %eax
    je  L39
    movl    %eax, (%esp)
    call    __ZdlPv
L39:
    movl    %ebx, (%esp)
LEHB7:
    call    __Unwind_Resume
LEHE7:
L42:
    movl    %eax, %esi
    movl    -48(%ebp), %eax
    cmpl    %ebx, %eax
    je  L35
    movl    %eax, (%esp)
    call    __ZdlPv
L35:
    movl    %esi, %ebx
    jmp L36
L40:
    movl    %eax, %ebx
    jmp L38
    .cfi_endproc
LFE2111:
    .def    ___gxx_personality_v0;  .scl    2;  .type   32; .endef
    .section    .gcc_except_table,"w"
LLSDA2111:
    .byte   0xff
    .byte   0xff
    .byte   0x1
    .uleb128 LLSDACSE2111-LLSDACSB2111
LLSDACSB2111:
    .uleb128 LEHB0-LFB2111
    .uleb128 LEHE0-LEHB0
    .uleb128 0
    .uleb128 0
    .uleb128 LEHB1-LFB2111
    .uleb128 LEHE1-LEHB1
    .uleb128 L40-LFB2111
    .uleb128 0
    .uleb128 LEHB2-LFB2111
    .uleb128 LEHE2-LEHB2
    .uleb128 L42-LFB2111
    .uleb128 0
    .uleb128 LEHB3-LFB2111
    .uleb128 LEHE3-LEHB3
    .uleb128 L40-LFB2111
    .uleb128 0
    .uleb128 LEHB4-LFB2111
    .uleb128 LEHE4-LEHB4
    .uleb128 L41-LFB2111
    .uleb128 0
    .uleb128 LEHB5-LFB2111
    .uleb128 LEHE5-LEHB5
    .uleb128 L40-LFB2111
    .uleb128 0
    .uleb128 LEHB6-LFB2111
    .uleb128 LEHE6-LEHB6
    .uleb128 L41-LFB2111
    .uleb128 0
    .uleb128 LEHB7-LFB2111
    .uleb128 LEHE7-LEHB7
    .uleb128 0
    .uleb128 0
LLSDACSE2111:
    .section    .text.startup,"x"
    .p2align 4,,15
    .def    __GLOBAL__sub_I_main;   .scl    3;  .type   32; .endef
__GLOBAL__sub_I_main:
LFB2557:
    .cfi_startproc
    subl    $28, %esp
    .cfi_def_cfa_offset 32
    movl    $__ZStL8__ioinit, %ecx
    call    __ZNSt8ios_base4InitC1Ev
    movl    $___tcf_0, (%esp)
    call    _atexit
    addl    $28, %esp
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
LFE2557:
    .section    .ctors,"w"
    .align 4
    .long   __GLOBAL__sub_I_main
.lcomm __ZStL8__ioinit,1,1
    .section .rdata,"dr"
    .align 4
LC5:
    .long   1148846080
    .ident  "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
    .def    __ZNSt8ios_base4InitD1Ev;   .scl    2;  .type   32; .endef
    .def    __ZSt19__throw_logic_errorPKc;  .scl    2;  .type   32; .endef
    .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj;   .scl    2;  .type   32; .endef
    .def    _memcpy;    .scl    2;  .type   32; .endef
    .def    __ZNSt6chrono3_V212system_clock3nowEv;  .scl    2;  .type   32; .endef
    .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj;   .scl    2;  .type   32; .endef
    .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj;  .scl    2;  .type   32; .endef
    .def    __ZdlPv;    .scl    2;  .type   32; .endef
    .def    _vsnprintf; .scl    2;  .type   32; .endef
    .def    __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i; .scl    2;  .type   32; .endef
    .def    __ZNSo9_M_insertIdEERSoT_;  .scl    2;  .type   32; .endef
    .def    __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc;   .scl    2;  .type   32; .endef
    .def    __ZSt20__throw_length_errorPKc; .scl    2;  .type   32; .endef
    .def    __Unwind_Resume;    .scl    2;  .type   32; .endef
    .def    __ZNSt8ios_base4InitC1Ev;   .scl    2;  .type   32; .endef
    .def    _atexit;    .scl    2;  .type   32; .endef

5 个答案:

答案 0 :(得分:3)

快速查看反汇编可以发现Windows版本使用movl(即长字,32位移动),Linux版本使用movq(四字,64位)和SSE寄存器xmm

我敢打赌,在Linux上,您针对x86-64进行编译,而在Windows上,则针对32位x86。

x86-64包括SSE2扩展名,而x86不包括,因此MinGW默认为无SSE模式。

在这种情况下,在Windows上使用64位工具链进行构建应可实现可比的性能。或者,您可以为32位版本启用SSE(如果我没记错的话,可以使用-msse2编译器标志)。

答案 1 :(得分:2)

mingw.org的实现似乎比linux,Visual Studio或mingw-w64.org效率低得多。

>g++ --version
g++ (MinGW.org GCC-6.3.0-1) 6.3.0
  

完成于24.808

enter image description here

>g++ --version
g++ (i686-posix-dwarf-rev2, Built by MinGW-W64 project) 6.3.0
  

完成于0.679

enter image description here

答案 2 :(得分:1)

使用MSYS2 MinGW64测试:

Access-Accept

g++ --version g++.exe (Rev2, Built by MSYS2 project) 7.3.0 g++.exe -Wall -O3 -mtune=native -fno-exceptions -fno-rtti -c main.cpp -o main.o g++.exe -o test.exe main.o -s

Env:Windows 10 x64 处理器:Intel Core i5-6300U,2.4GH 内存:16GB DDR4

无论如何,MinGW使用mswcrt.dll而不是GNU libc(Windows捆绑了一个,不是通用的CRT / Visual Studio CRT等),因此从我的经验来看,速度差距可能来自C标准库。

P.S。进行了一些更改(相同的编译器标志)

Done in 0.547

#include <iostream> #include <chrono> #ifdef _WIN32 #include <windows.h> static std::size_t page_size() noexcept { ::SYSTEM_INFO si; ::GetSystemInfo(&si); return si.dwPageSize; } #else #include <sys/types.h> #include <unistd.h> static std::size_t page_size() noexcept { return static_cast<std::size_t>( ::sysconf(_SC_PAGESIZE) ); } #endif // _WIN32 int main(int argc, char const *argv[]) { auto started = std::chrono::high_resolution_clock::now(); const std::size_t n = 10000000; // align size to page boundary const std::size_t al = page_size() - 1; const std::size_t buff_size = ( (n << 1) + al) & ~al; std::string str; str.reserve(buff_size); const std::string to_append( std::to_string(1) ); for (std::size_t i = 0; i < n; ++i) { str.append( to_append ); str.push_back(' '); } auto done = std::chrono::high_resolution_clock::now(); double secs = (double) std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() / 1000; std::cout << "Done in " << secs << "\n"; return 0; }

主要功能的汇编输出:

Done in 0.046

答案 3 :(得分:0)

(仅按比例计算)Visual Studio C ++上的Windows Release目标与Debug目标:默认情况下,Debug目标编译行未进行优化,而Release目标编译行则具有< strong> / O2 优化, / Oi (“启用固有功能”)和 / GL (“整个程序优化”)。在我的工作站上,您的代码是Debug x64 vs Relesae x64:

Debug:70秒。

Release:0.27秒。

您使用MinGW(我不熟悉)进行构建。但是从fast search开始,有关于Debug / Release模式 ...的话题,而MinGW具有等效的 / O2 优化, / Oi (“启用内部函数”)和 / Og (“启用全局优化”)标志it seems

-

使用这3个标志(x64目标)进行编译,并与VS Release x64基准进行比较。无论如何,这是MS针对Release目标的默认编译优化。

-

测试环境: HP 8100,Windows 10 Pro 64位,CPU i7 870、16 GB DDR3 RAM,Visual Studio 2017,目标:Debug x64 / Release x64

答案 4 :(得分:-1)

我在Windows上使用MinGW 4.8.0尝试了您的代码,获得了约20秒的时间。当我将字符串缩写改为std::stringstream时,我得到了0.5秒:

...
std::stringstream ss;

for (int i = 0; i < n; ++i) {
    //str += std::to_string(a) + " ";
    ss << a << " ";
}
str = ss.str();
...