我在同一台计算机上使用Visual C ++和MINGW-64编译的相同代码之间的速度差异大约是15到45倍。
MSVC可以加速吗?
最近我一直在寻找一个C ++随机数生成器,它可以生成介于0和1之间的双重类型。我使用了stdlib的erand48()函数,因为我不需要高质量的PRN,但它是不适用于MSVC。我在MSVC 14(2015)社区中编译了代码(下面),并通过MSYS编译了MINGW-64。 MINGW编译的结果比MSVC编译快15到45倍。
两者都在同一台计算机上运行。
g ++结果(所有时间以毫秒为单位),使用命令行
编译g++ -o RandSpeedTest -std=c++11 -O3 RandSpeedTest.cpp
可生产
| ranlux64_base_01 | 169 | ms |
| linear_congruential | 184 | ms |
| minstd_rand0 | 78 | ms |
| minstd_rand | 68 | ms |
| mt19937 (default_random_engine) | 53 | ms |
MSVC 2015社区结果 用x64目标编译。使用/ O2启用优化。我尝试使用vary / arch:SSE,/ arch:SSE2,/ arch:AVX和/ arch:AVX2。它们都是微不足道的。
| ranlux64_base_01 | 7794 | ms |
| linear_congruential | 7746 | ms |
| minstd_rand0 | 1181 | ms |
| minstd_rand | 1221 | ms |
| mt19937 (default_random_engine) | 1045 | ms |
| knuth_b | 1575 | ms |
| mt19937_64 | 1009 | ms |
| ranlux24 | 5639 | ms |
| ranlux48 | 10687 | ms |
我还没有找到一种模板引擎的聪明方法,因此代码有大量的重复。遗憾。
#ifdef __GNUC__
#include <tr1/random>
//#include <cstdlib>
#elif _MSC_VER
#include <random>
#endif //__GNUC__
#include <iostream>
#include <chrono>
#include <vector>
using namespace std;
using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
void dump_time(
std::chrono::time_point<std::chrono::high_resolution_clock> t1,
std::chrono::time_point<std::chrono::high_resolution_clock> t2)
{
cout << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() << " | ms |\n";
}
int main()
{
const unsigned long numToGen = 10000000;
std::tr1::uniform_real<double> dist(0,1.);
volatile double f;
std::chrono::time_point<std::chrono::high_resolution_clock> t1, t2;
cout << "| ranlux64_base_01 | ";
dist.reset(); // discard any cached values
std::tr1::ranlux64_base_01 eng0;
eng0.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng0));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
/*
// verify that it produces output
for(int i =0; i < 10; i++)
{
cout << dist(eng0) << endl;
}
*/
cout << "| linear_congruential | ";
std::tr1::ranlux64_base_01 eng1;
eng1.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng1));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| minstd_rand0 | ";
std::tr1::minstd_rand0 eng2;
eng2.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng2));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| minstd_rand |";
std::tr1::minstd_rand eng4;
eng4.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng4));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| mt19937 (default_random_engine) | ";
std::tr1::mt19937 eng5;
eng5.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng5));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
#ifdef _MSC_VER
cout << "| knuth_b | ";
std::tr1::knuth_b eng3;
eng3.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng3));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| mt19937_64 | ";
std::tr1::mt19937_64 eng6;
eng6.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng6));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| ranlux24 | ";
std::tr1::ranlux24 eng7;
eng7.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng7));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
cout << "| ranlux48 |";
std::tr1::ranlux48 eng8;
eng8.seed( (unsigned int) 357);
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=dist(eng8));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
#endif // _MSC_VER
/*
// Not available in MINGW-64, not available in MSVC
cout << "erand48" << endl;
unsigned short int seed16v[3];
t1 = high_resolution_clock::now();
for (int i = 0; i < numToGen; i++, f=erand48(seed16v));
t2 = high_resolution_clock::now();
dump_time(t1, t2);
*/
return (0);
}
答案 0 :(得分:0)
根据@NeilButterworth评论,手动删除正在替换的呼叫,例如
dist(eng6())
与
eng6()
产生与g ++类似的速度结果。如下图所示。
| ranlux64_base_01 | 162 | ms |
| linear_congruential | 127 | ms |
| minstd_rand0 | 71 | ms |
| minstd_rand | 71 | ms |
| mt19937 (default_random_engine) | 69 | ms |
| knuth_b | 115 | ms |
| mt19937_64 (__int64 not divided) | 151 | ms |
| ranlux24 | 1211 | ms |
| ranlux48 (__int64 not divided) | 4046 | ms |
两个引擎生成__int64值,这些值不会转换为0到1之间的浮点数。但是,其他引擎与GNU编译器一致,删除了对dist的调用。