我将Java GC测试程序移植到C ++(参见下面的代码)以及Python。 Java和Python的性能远远超过C ++,我认为这是因为每次都需要完成对new
的所有调用才能创建字符串。我尝试过使用Boost的fast_pool_allocator
,但这实际上使性能从700毫秒降低到1200毫秒。我使用分配器是错误的,还是我应该做的其他事情呢?
编辑:与g++ -O3 -march=native --std=c++11 garbage.cpp -lboost_system
一起编译。 g ++是4.8.1版
Python中的一次迭代大约需要300ms,Java大约需要50ms。 std::allocator
提供约700毫秒,boost::fast_pool_allocator
提供约1200毫秒。
#include <string>
#include <vector>
#include <chrono>
#include <list>
#include <iostream>
#include <boost/pool/pool_alloc.hpp>
#include <memory>
//#include <gc/gc_allocator.h>
using namespace std;
#include <sstream>
typedef boost::fast_pool_allocator<char> c_allocator;
//typedef std::allocator<char> c_allocator;
typedef basic_string<char, char_traits<char>, c_allocator> pool_string;
namespace patch {
template <typename T> pool_string to_string(const T& in) {
std::basic_stringstream<char, char_traits<char>, c_allocator> stm;
stm << in;
return stm.str();
}
}
#include "mytime.hpp"
class Garbage {
public:
vector<pool_string> outer;
vector<pool_string> old;
const int nThreads = 1;
//static auto time = chrono::high_resolution_clock();
void go() {
// outer.resize(1000000);
//old.reserve(1000000);
auto tt = mytime::msecs();
for (int i = 0; i < 10; ++i) {
if (i % 100 == 0) {
cout << "DOING AN OLD" << endl;
doOld();
tt = mytime::msecs();
}
for (int j = 0; j < 1000000/nThreads; ++j)
outer.push_back(patch::to_string(j));
outer.clear();
auto t = mytime::msecs();
cout << (t - tt) << endl;
tt = t;
}
}
void doOld() {
old.clear();
for (int i = 0; i < 1000000/nThreads; ++i)
old.push_back(patch::to_string(i));
}
};
int main() {
Garbage().go();
}
答案 0 :(得分:5)
问题是你每次都使用一个新的字符串流来转换整数。
修复它:
namespace patch {
template <typename T> pool_string to_string(const T& in) {
return boost::lexical_cast<pool_string>(in);
}
}
现在的时间是:
DOING AN OLD
0.175462
0.0670085
0.0669926
0.0687969
0.0692518
0.0669318
0.0669196
0.0669187
0.0668962
0.0669185
real 0m0.801s
user 0m0.784s
sys 0m0.016s
完整的参考代码:
#include <boost/pool/pool_alloc.hpp>
#include <chrono>
#include <iostream>
#include <list>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <boost/lexical_cast.hpp>
//#include <gc/gc_allocator.h>
using string = std::string;
namespace patch {
template <typename T> string to_string(const T& in) {
return boost::lexical_cast<string>(in);
}
}
class Timer
{
typedef std::chrono::high_resolution_clock clock;
clock::time_point _start;
public:
Timer() { reset(); }
void reset() { _start = now(); }
double elapsed()
{
using namespace std::chrono;
auto e = now() - _start;
return duration_cast<nanoseconds>(e).count()*1.0e-9;
}
clock::time_point now()
{
return clock::now();
}
};
class Garbage {
public:
std::vector<string> outer;
std::vector<string> old;
const int nThreads = 1;
void go() {
outer.resize(1000000);
//old.reserve(1000000);
Timer timer;
for (int i = 0; i < 10; ++i) {
if (i % 100 == 0) {
std::cout << "DOING AN OLD" << std::endl;
doOld();
}
for (int j = 0; j < 1000000/nThreads; ++j)
outer.push_back(patch::to_string(j));
outer.clear();
std::cout << timer.elapsed() << std::endl;
timer.reset();
}
}
void doOld() {
old.clear();
for (int i = 0; i < 1000000/nThreads; ++i)
old.push_back(patch::to_string(i));
}
};
int main() {
Garbage().go();
}
答案 1 :(得分:2)
由于我没有在我的机器上使用boost,我将代码简化为使用标准C ++ 11 to_string
(因此意外“修复”了所发现的问题),并得到了这个:
#include <string>
#include <vector>
#include <chrono>
#include <list>
#include <iostream>
#include <memory>
//#include <gc/gc_allocator.h>
#include <sstream>
using namespace std;
class Timer
{
typedef std::chrono::high_resolution_clock clock;
clock::time_point _start;
public:
Timer() { reset(); }
void reset() { _start = now(); }
double elapsed()
{
using namespace std::chrono;
auto e = now() - _start;
return duration_cast<nanoseconds>(e).count()*1.0e-9;
}
clock::time_point now()
{
return clock::now();
}
};
class Garbage {
public:
vector<string> outer;
vector<string> old;
const int nThreads = 1;
Timer timer;
void go() {
// outer.resize(1000000);
//old.reserve(1000000);
for (int i = 0; i < 10; ++i) {
if (i % 100 == 0) {
cout << "DOING AN OLD" << endl;
doOld();
}
for (int j = 0; j < 1000000/nThreads; ++j)
outer.push_back(to_string(j));
outer.clear();
cout << timer.elapsed() << endl;
timer.reset();
}
}
void doOld() {
old.clear();
for (int i = 0; i < 1000000/nThreads; ++i)
old.push_back(to_string(i));
}
};
int main() {
Garbage().go();
}
编译:
$ g++ -O3 -std=c++11 gc.cpp
$ ./a.out
DOING AN OLD
0.414637
0.189082
0.189143
0.186336
0.184449
0.18504
0.186302
0.186055
0.183123
0.186835
2014年4月18日星期五的clang 3.5 build with source使用相同的编译器选项得到了类似的结果。
我的处理器是AMD Phenom(tm)II X4 965,运行频率为3.6GHz(如果我没记错的话)。