我编写代码来测量forward_list
的排序性能,而且直观地说,使用内存池分配器实际上会使它变慢:
// clang++ -O3 -std=c++11 -I/usr/local/include -lboost_system sort-test.cc -o sort-test
#include <boost/pool/pool_alloc.hpp>
#include <algorithm>
#include <cmath>
#include <chrono>
#include <forward_list>
#include <fstream>
#include <iostream>
#include <vector>
using namespace std;
using namespace std::chrono;
using namespace boost;
class TSampler {
public:
void AddSample(double value) {
NumSamples++;
if (NumSamples == 1) {
Mean = value;
M = value;
M2 = value * value;
return;
}
double dillutionFactor = 1.0 / NumSamples;
// variance recurrence
double cm = value - M;
M += dillutionFactor * cm;
M2 += cm * (value - M);
// mean recurrence
Mean += dillutionFactor * (value - Mean);
}
double GetMean() const {
return Mean;
}
double GetStdDev() const {
Recalc();
return StdDev;
}
double GetConfidenceRadius() const {
Recalc();
return Radius;
}
double GetConfidenceLo() const {
Recalc();
return Mean - Radius;
}
double GetConfidenceHi() const {
Recalc();
return Mean + Radius;
}
private:
void Recalc() const {
StdDev = sqrt(M2 / (NumSamples - 1.0));
Radius = 1.96 * StdDev / sqrt(NumSamples + 0.0);
}
private:
size_t NumSamples = 0;
double Mean = NAN, M = NAN, M2 = NAN;
mutable double StdDev = NAN, Radius = NAN;
};
void FillSamples(int *samples, size_t size) {
ifstream r("/dev/urandom");
r.read(reinterpret_cast<char *>(samples), size * sizeof(*samples));
}
using TList = forward_list<int>;
using TPooledList = forward_list<int, fast_pool_allocator<int>>;
using TVector = vector<int>;
void InitContainer(TList &xs, int *input, size_t size) {
for (int i = size - 1; i >= 0; --i) {
xs.push_front(input[i]);
}
}
void InitContainer(TPooledList &xs, int *input, size_t size) {
for (int i = size - 1; i >= 0; --i) {
xs.push_front(input[i]);
}
}
void InitContainer(TVector &xs, int *input, size_t size) {
xs.reserve(size);
copy(input, input + size, back_inserter(xs));
}
void Sort(TPooledList &xs) {
xs.sort();
}
void Sort(TList &xs) {
xs.sort();
}
void Sort(TVector &xs) {
sort(xs.begin(), xs.end());
}
template<typename TContainer>
void RunSort(TSampler &sampler, int *input, size_t size) {
TContainer xs;
InitContainer(xs, input, size);
steady_clock::time_point before = steady_clock::now();
Sort(xs);
steady_clock::time_point after = steady_clock::now();
auto delta = duration_cast<duration<double>>(after - before);
sampler.AddSample(delta.count());
}
int main(int argc, char **argv) {
argc--; argv++;
if (argc != 2) {
cerr << "Usage: sort-test T N\nT: number of trials\nN: size of the vector/list" << endl;
exit(1);
}
int t = atoi(argv[0]);
int n = atoi(argv[1]);
if (t <= 0 || n <= 0) {
cerr << "Invalid arguments" << endl;
exit(1);
}
TSampler listSampler;
TSampler pooledListSampler;
TSampler vectorSampler;
TVector input;
input.resize(n);
for (int trial = 0; trial < t; ++trial) {
FillSamples(&input[0], n);
RunSort<TList>(listSampler, &input[0], n);
RunSort<TPooledList>(pooledListSampler, &input[0], n);
RunSort<TVector>(vectorSampler, &input[0], n);
}
cout << "List: " << listSampler.GetMean()
<< " ± " << listSampler.GetConfidenceRadius()
<< " (95% confidence)" << endl;
cout << "Pooled list: " << pooledListSampler.GetMean()
<< " ± " << pooledListSampler.GetConfidenceRadius()
<< " (95% confidence)" << endl;
cout << "Vector: " << vectorSampler.GetMean()
<< " ± " << vectorSampler.GetConfidenceRadius()
<< " (95% confidence)" << endl;
}
在我的2013款MacBook Air上:
[03:04:59 dev]$ ./sort-test 10 10000
List: 0.00082735 ± 0.000189312 (95% confidence)
Pooled list: 0.00091835 ± 0.000201482 (95% confidence)
Vector: 0.000484268 ± 0.000107911 (95% confidence)
[03:05:03 dev]$ ./sort-test 10 10000
List: 0.000764033 ± 0.000188491 (95% confidence)
Pooled list: 0.00103925 ± 0.00038322 (95% confidence)
Vector: 0.000482046 ± 0.000111277 (95% confidence)
[03:05:08 dev]$ ./sort-test 30 10000
List: 0.000747641 ± 6.93474e-05 (95% confidence)
Pooled list: 0.000912401 ± 9.43897e-05 (95% confidence)
Vector: 0.000526488 ± 9.34585e-05 (95% confidence)
[03:05:13 dev]$ ./sort-test 30 10000
List: 0.000810124 ± 8.71386e-05 (95% confidence)
Pooled list: 0.000946477 ± 0.000115813 (95% confidence)
Vector: 0.000495523 ± 5.44617e-05 (95% confidence)
[03:05:18 dev]$ ./sort-test 30 100000
List: 0.0107134 ± 0.00110686 (95% confidence)
Pooled list: 0.018314 ± 0.00102368 (95% confidence)
Vector: 0.00583989 ± 0.000514794 (95% confidence)
为什么会这样?一个原因可能是内存区域在使用内存池时实际上更糟,因为列表节点是使用普通operator ::new
分配的,但后来我不得不问如何正确使用fast_pool_allocator
。
答案 0 :(得分:2)
使用https://github.com/rmartinho/nonius代替基准测试,我得到了以下结果:
clock resolution: mean is 75.9223 ns (5120002 iterations)
benchmarking list
collecting 100 samples, 1 iterations each, in estimated 381.414 ms
mean: 479.875 μs, lb 448.911 μs, ub 629.948 μs, ci 0.95
std dev: 300.223 μs, lb 13.0769 μs, ub 715.676 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers
benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 432.377 ms
mean: 326.049 μs, lb 311.569 μs, ub 393.567 μs, ci 0.95
std dev: 136.228 μs, lb 13.3412 μs, ub 323.482 μs, ci 0.95
found 4 outliers among 100 samples (4%)
variance is severely inflated by outliers
benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 311.1 ms
mean: 82.4482 μs, lb 81.3539 μs, ub 84.4259 μs, ci 0.95
std dev: 7.31431 μs, lb 4.91517 μs, ub 12.4147 μs, ci 0.95
found 20 outliers among 100 samples (20%)
variance is severely inflated by outliers
benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 400.889 ms
mean: 492.731 μs, lb 489.809 μs, ub 495.912 μs, ci 0.95
std dev: 15.5061 μs, lb 13.6999 μs, ub 17.8827 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is moderately inflated by outliers
clock resolution: mean is 75.8616 ns (5120002 iterations)
benchmarking list
collecting 100 samples, 1 iterations each, in estimated 390.905 ms
mean: 488.722 μs, lb 475.528 μs, ub 532.547 μs, ci 0.95
std dev: 109.262 μs, lb 33.3498 μs, ub 242.045 μs, ci 0.95
found 3 outliers among 100 samples (3%)
variance is severely inflated by outliers
benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 460.04 ms
mean: 371.775 μs, lb 359.049 μs, ub 421.965 μs, ci 0.95
std dev: 119.361 μs, lb 12.3961 μs, ub 283.336 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers
benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 312.925 ms
mean: 74.4602 μs, lb 71.1761 μs, ub 80.5051 μs, ci 0.95
std dev: 21.8959 μs, lb 14.3879 μs, ub 36.7203 μs, ci 0.95
found 18 outliers among 100 samples (18%)
variance is severely inflated by outliers
benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 412.003 ms
mean: 542.015 μs, lb 538.517 μs, ub 548.913 μs, ci 0.95
std dev: 24.0988 μs, lb 14.6781 μs, ub 46.3466 μs, ci 0.95
found 4 outliers among 100 samples (4%)
variance is moderately inflated by outliers
clock resolution: mean is 80.7181 ns (5120002 iterations)
benchmarking list
collecting 100 samples, 1 iterations each, in estimated 379.471 ms
mean: 245.714 μs, lb 234.609 μs, ub 296.895 μs, ci 0.95
std dev: 103.979 μs, lb 11.1603 μs, ub 246.781 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers
benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 403.439 ms
mean: 196.096 μs, lb 194.302 μs, ub 198.409 μs, ci 0.95
std dev: 10.3566 μs, lb 8.65354 μs, ub 14.8478 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers
benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 328.543 ms
mean: 13.8026 μs, lb 13.2373 μs, ub 15.107 μs, ci 0.95
std dev: 4.12654 μs, lb 2.22994 μs, ub 8.16186 μs, ci 0.95
found 23 outliers among 100 samples (23%)
variance is severely inflated by outliers
benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 419.92 ms
mean: 552.445 μs, lb 544.444 μs, ub 568.603 μs, ci 0.95
std dev: 55.5666 μs, lb 31.562 μs, ub 94.2535 μs, ci 0.95
found 5 outliers among 100 samples (5%)
variance is severely inflated by outliers
#include <boost/pool/pool_alloc.hpp>
#include <boost/container/slist.hpp>
#include <nonius/benchmark.h++>
#include <forward_list>
#include <iostream>
#include <fstream>
#include <vector>
void FillSamples(int *samples, size_t size) {
std::ifstream r("/dev/urandom");
r.read(reinterpret_cast<char *>(samples), size * sizeof(*samples));
}
using TList = std::forward_list<int>;
using TPooledList = std::forward_list<int, boost::fast_pool_allocator<int>>;
using TBoostList = boost::container::slist<int, boost::fast_pool_allocator<int>>;
using TVector = std::vector<int>;
void InitContainer(TVector &xs, int *input, size_t size) {
xs.reserve(size);
copy(input, input + size, back_inserter(xs));
}
template <typename SomeList>
void InitContainer(SomeList &xs, int *input, size_t size) {
for (int i = size - 1; i >= 0; --i) {
xs.push_front(input[i]);
}
}
template <typename SomeList>
void Sort(SomeList &xs) {
xs.sort();
}
void Sort(TVector &xs) {
sort(xs.begin(), xs.end());
}
#ifndef VECSIZE
#define VECSIZE 10000
#endif
template<typename TContainer>
void RunSort(nonius::chronometer &sampler, int *input, size_t size) {
TContainer xs;
InitContainer(xs, input, size);
FillSamples(&input[0], VECSIZE);
sampler.measure([&]{
Sort(xs);
});
}
NONIUS_BENCHMARK("list", [](nonius::chronometer cm) {
TVector input;
input.resize(VECSIZE);
RunSort<TList>(cm, &input[0], VECSIZE);
})
NONIUS_BENCHMARK("pooledList", [](nonius::chronometer cm) {
TVector input;
input.resize(VECSIZE);
RunSort<TPooledList>(cm, &input[0], VECSIZE);
})
NONIUS_BENCHMARK("vector", [](nonius::chronometer cm) {
TVector input;
input.resize(VECSIZE);
RunSort<TVector>(cm, &input[0], VECSIZE);
})
NONIUS_BENCHMARK("boost.slist", [](nonius::chronometer cm) {
TVector input;
input.resize(VECSIZE);
RunSort<TBoostList>(cm, &input[0], VECSIZE);
})
#include <nonius/main.h++>