Question

我一直在使用memcmp函数来比较我的性能关键应用程序中的2个整数。除了使用相等的运算符之外我不得不使用它，因为我必须一般地处理其他数据类型。但是，我怀疑原始数据类型的memcpy性能，并将其更改为等于运算符。但是，性能的提高。

我刚做了一些简单的测试。

使用memcmp

#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <string.h>

using namespace std;

int main(int argc, char **argv)
{
        int iValue1 = atoi(argv[1]);
        int iValue2 = atoi(argv[2]);

        struct timeval start;
        gettimeofday(&start, NULL);

        for (int i = 0; i < 2000000000; i++)
        {
//              if (iValue1 == iValue2)
                if (memcmp(&iValue1, &iValue2, sizeof(int)) == 0)
                {
                        cout << "Hello" << endl;
                };
        };

        struct timeval end;
        gettimeofday(&end, NULL);

        cout << "Time taken : " << ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) << " us" << endl;
        return 0;
}

该计划的结果如下。

sujith@linux-1xs7:~> g++ -m64 -O3 Main.cpp
sujith@linux-1xs7:~> ./a.out 3424 234
Time taken : 13539618 us
sujith@linux-1xs7:~> ./a.out 3424 234
Time taken : 13534932 us
sujith@linux-1xs7:~> ./a.out 3424 234
Time taken : 13599818 us
sujith@linux-1xs7:~> ./a.out 3424 234
Time taken : 13639394 us

使用等号运算符

#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <string.h>

using namespace std;

int main(int argc, char **argv)
{
        int iValue1 = atoi(argv[1]);
        int iValue2 = atoi(argv[2]);

        struct timeval start;
        gettimeofday(&start, NULL);

        for (int i = 0; i < 2000000000; i++)
        {
                if (iValue1 == iValue2)
//              if (memcmp(&iValue1, &iValue2, sizeof(int)) == 0)
                {
                        cout << "Hello" << endl;
                };
        };

        struct timeval end;
        gettimeofday(&end, NULL);

        cout << "Time taken : " << ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) << " us" << endl;
        return 0;
}

该计划的结果如下。

sujith@linux-1xs7:~> g++ -m64 -O3 Main.cpp
sujith@linux-1xs7:~> ./a.out 234 23423
Time taken : 9 us
sujith@linux-1xs7:~> ./a.out 234 23423
Time taken : 13 us
sujith@linux-1xs7:~> ./a.out 234 23423
Time taken : 14 us
sujith@linux-1xs7:~> ./a.out 234 23423
Time taken : 15 us
sujith@linux-1xs7:~> ./a.out 234 23423
Time taken : 16 us

有人可以告诉我，等于运算符的工作原理是否比原始数据类型的memcmp快？如果是这样，那里发生了什么？不等同的操作员在内部使用memcmp吗？

Answer 1

微量标记很难写。

第一种情况下的循环编译为（g++ -O3）：

    movl    $2000000000, %ebx
    jmp .L3
.L2:
    subl    $1, %ebx
    je  .L7
.L3:
    leaq    12(%rsp), %rsi
    leaq    8(%rsp), %rdi
    movl    $4, %edx
    call    memcmp
    testl   %eax, %eax
    jne .L2
    ; code to do the printing omitted
    subl    $1, %ebx
    jne .L3
.L7:
    addq    $16, %rsp
    xorl    %eax, %eax
    popq    %rbx
    ret

第二种情况下的循环编译为

    cmpl    %eax, %ebp
    je  .L7
.L2:
    addq    $8, %rsp
    xorl    %eax, %eax
    popq    %rbx
    popq    %rbp
    ret
.L7:
    movl    $2000000000, %ebx
.L3:
    ; code to do the printing omitted
    subl    $1, %ebx
    jne .L3
    jmp .L2

请注意，在第一种情况下，memcmp被称为2000000000次。在第二种情况下，优化器将循环中的比较提升，因此只执行一次。此外，在第二种情况下，编译器将两个变量完全放在寄存器中，而在第一种情况下，它们需要放在堆栈中，因为你要使用它们的地址。

即使只是查看比较，比较两个int只需要一条cmpl指令。使用memcmp会导致函数调用，而内部memcmp可能需要一些额外的检查。

在这种特殊情况下，clang++ -O3会将memcmp编译为单个cmpl指令。但是，如果您使用memcmp，它不会在循环外提升检查。

对于原始数据类型，Equal运算符Vs memcmp的性能

1 个答案: