Question

在性能方面，现代C++编译器中的以下函数之间是否有任何区别？

std::string ConcatA(const std::string& a, const std::string& b, const std::string& c)
{
    return a + b + c;
}

std::string ConcatB(const std::string& a, const std::string& b, const std::string& c)
{
    std::string r = a;
    r += b;
    r += c;
    return r;
}

Answer 1

ConcatB有1个临时字符串，而ConcatA有2个临时字符串，因此ConcatB快两倍。

$ cat cata.cpp

#include <string>
#include <iostream>
std::string ConcatA(const std::string& a, const std::string& b, const std::string& c)
{
    return a + b + c;
}
int main(){
  std::string aa="aa";
  std::string bb="bb";
  std::string cc="cc";
  int count = 0;
  for(int ii = 0; ii < 10000000; ++ii) {
    count += ConcatA(aa, bb, cc).size();
  }
    std::cout<< count <<std::endl;
}

$ cat catb.cpp

#include <string>
#include <iostream>
std::string ConcatB(const std::string& a, const std::string& b, const std::string& c)
{
    std::string r = a;
    r += b;
    r += c;
    return r;
}
int main(){
  std::string aa="aa";
  std::string bb="bb";
  std::string cc="cc";
  int count = 0;
  for(int ii = 0; ii < 10000000; ++ii) {
    count += ConcatB(aa, bb, cc).size();
  }
    std::cout<< count <<std::endl;
}

$ clang ++ -v

Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn)
Target: x86_64-apple-darwin13.1.0
Thread model: posix

$ clang++ cata.cpp
$ time ./a.out

60000000

real    0m1.122s
user    0m1.118s
sys 0m0.003s

$ clang++ catb.cpp
$ time ./a.out
60000000

real    0m0.599s
user    0m0.596s
sys 0m0.002s
$

Answer 2

我用MinGW（TDM）4.8.1编译了它，选项-fdump-tree-optimized，没有-O2

第一个做像

这样的动作

string tmp = a+b; // that mean create new string g, g += b, tmp = g (+dispose g)
tmp += c;
return tmp; // and dispose tmp

第二种以另一种方式做到了

string tmp = a; // just copy a to tmp
tmp += b;
tmp += c;
return tmp; // and dispose tmp

看起来就像这样

  void * D.20477;
  struct basic_string D.20179;

  <bb 2>:
  D.20179 = std::operator+<char, std::char_traits<char>, std::allocator<char> > (a_1(D), b_2(D)); [return slot optimization]
  *_3(D) = std::operator+<char, std::char_traits<char>, std::allocator<char> > (&D.20179, c_4(D)); [return slot optimization]

  <bb 3>:

  <bb 4>:
  std::basic_string<char>::~basic_string (&D.20179);
  D.20179 ={v} {CLOBBER};

<L1>:
  return _3(D);

<L2>:
  std::basic_string<char>::~basic_string (&D.20179);
  _5 = __builtin_eh_pointer (1);
  __builtin_unwind_resume (_5);

和

  void * D.20482;
  struct string r [value-expr: *<retval>];

  <bb 2>:
  std::basic_string<char>::basic_string (r_1(D), a_2(D));
  std::basic_string<char>::operator+= (r_1(D), b_3(D));

  <bb 3>:
  std::basic_string<char>::operator+= (r_1(D), c_4(D));

  <bb 4>:

<L0>:
  return r_1(D);

<L1>:
  std::basic_string<char>::~basic_string (r_1(D));
  _5 = __builtin_eh_pointer (1);
  __builtin_unwind_resume (_5);

因此，在应用-O2优化编译器之后，将ConcatB函数保持在几乎相同的视图中，并通过内联函数为ConcatA带来一些魔力，为内存分配部件添加常量值，声明新函数，但最有价值的部分保持不变。

ConcatA：

  D.20292 = std::operator+<char, std::char_traits<char>, std::allocator<char> > (a_2(D), b_3(D)); [return slot optimization]
  *_5(D) = std::operator+<char, std::char_traits<char>, std::allocator<char> > (&D.20292, c_6(D));

ConcatB：

  std::basic_string<char>::basic_string (r_3(D), a_4(D));
  std::basic_string<char>::append (r_3(D), b_6(D));
  std::basic_string<char>::append (r_3(D), c_8(D));

因此，很明显ConcatB比ConcatA更好，因为它减少了分配操作，当你尝试优化这么小的代码时，这是非常昂贵的。

std字符串连接性能

2 个答案: