为了提供上下文,我正在研究Programming Praxis Bingo Challenge,并想看看我能以多快的速度运行此代码。
static void fisher_yates(T& source) {
const size_t len = source.size();
for(size_t i = 1; i < len;++i) {
std::swap(source[i],source[rand() % (i+1)]);
}
}
std::array<int,25> generate_table() {
std::array<int,25> bingo_grid;
for(int i = 0 ; i < 25;++i) {
switch(i) {
case 0: case 1: case 2: case 3: case 4:
bingo_grid[i] = rand() % 15 + 1;
break;
case 5: case 6: case 7: case 8: case 9:
bingo_grid[i] = rand() % 15 + 16;
break;
case 10: case 11: case 12: case 13: case 14:
bingo_grid[i] = rand() % 15 + 31;
break;
case 15: case 16: case 17: case 18: case 19:
bingo_grid[i] = rand() % 15 + 46;
break;
case 20: case 21: case 22: case 23: case 24:
bingo_grid[i] = rand() % 15 + 61;
break;
}
}
bingo_grid[12] = 0;
return bingo_grid;
}
bool is_bingoed(const std::array<int,25>& grid) {
// Check columns
if(grid[0] == 0) {
if(grid[1] == 0 && grid[2] == 0 && grid[3] == 0 && grid[4] == 0)
return true;
if(grid[0] == 0 && grid[6] == 0 && grid[18] == 0 && grid[24] == 0)
return true;
if(grid[5] == 0 && grid[10] == 0 && grid[15] == 0 && grid[20] == 0)
return true;
}
if(grid[1] == 0) {
if(grid[6] == 0 && grid[11] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[2] == 0) {
if(grid[7] == 0 && grid[17] == 0 && grid[22] == 0)
return true;
}
if(grid[3] == 0) {
if(grid[8] == 0 && grid[13] == 0 && grid[18] == 0 && grid[23] == 0)
return true;
}
if(grid[4] == 0) {
if(grid[9] == 0 && grid[14] == 0 && grid[19] == 0 && grid[24] == 0)
return true;
if(grid[8] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[6] == 0) {
if(grid[6] == 0 && grid[7] == 0 && grid[8] == 0 && grid[9] == 0)
return true;
}
if(grid[12] == 0) {
if(grid[10] == 0 && grid[11] == 0 && grid[13] == 0 && grid[14] == 0)
return true;
}
if(grid[18] == 0) {
if(grid[15] == 0 && grid[16] == 0 && grid[17] == 0 && grid[19] == 0)
return true;
}
return false;
}
static bool mark_card(const int card,std::array<int,25>& bingo_grid) {
for(auto &i : bingo_grid)
if(card == i) {
i = 0;
return true;
}
return false;
}
int play_game() {
// Bingo is 5 columns, each column(n) is random permutation of 1-15*n
// Fisher-Yates to generate random permutations
// Create 500 playing cards
const int max = 500;
std::vector<std::array<int,25>> bingo_cards;
bingo_cards.reserve(max);
for(int i = 0; i<max;++i) {
bingo_cards.push_back(generate_table());
//display_bingo(bingo_cards[i]);
}
// Random shuffle 75 cards
auto iter = boost::counting_range(1,76);
std::vector<int> cards(std::begin(iter),std::end(iter));
fisher_yates(cards);
bool is_finished = false;
int counter = 0;
for(auto card : cards) {
for(auto& playing_card : bingo_cards) {
if(mark_card(card,playing_card)) {
//display_bingo(playing_card);
if(is_bingoed(playing_card))
return counter;
}
}
counter++;
}
return counter;
}
int bingo() {
srand(time(NULL));
int total = 0;
for(int i = 0 ; i < 10000;i++) {
total+=play_game();
}
boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::release_memory();
return total / 10000;
}
原始版本使用boost :: multi_array来表示网格。在进行性能分析后,我将其更改为std :: array,这让我的速度显着提升。然后我转而使用fisher_yates shuffle来生成宾果卡以使用随机数生成器。 最后我改变了is_bingoed测试功能,以减少每次通话的检查次数,加快游戏结束检查。
这一切都有所帮助。现在如果我描述这段代码,generate_table函数占72%的时间,mark_card()占18%,is_bingoed()约占6%。我正在寻找提示,看看可以采取哪些措施来提高速度。
我首先想到的是is_bingoed()是使用SSE内在函数与0进行比较(也许使用XOR?)但我对generate_table()或mark_car()没有任何想法。这更像是一种有趣的自我挑战,却想知道其他人的想法?
当前时间是2Ghz Q6660需要4.6秒(原来是35秒)
答案 0 :(得分:1)
只关注最昂贵的函数generate_table
,您可以简化这部分代码并使其不那么粗糙,这可能会有所帮助:
for(int i = 0 ; i < 25;++i) {
switch(i) {
case 0: case 1: case 2: case 3: case 4:
bingo_grid[i] = rand() % 15 + 1;
break;
case 5: case 6: case 7: case 8: case 9:
bingo_grid[i] = rand() % 15 + 16;
break;
case 10: case 11: case 12: case 13: case 14:
bingo_grid[i] = rand() % 15 + 31;
break;
case 15: case 16: case 17: case 18: case 19:
bingo_grid[i] = rand() % 15 + 46;
break;
case 20: case 21: case 22: case 23: case 24:
bingo_grid[i] = rand() % 15 + 61;
break;
}
}
e.g。
for(int i = 0 ; i < 25;++i) {
int r = rand() % 15 + 1;
bingo_grid[i] = r + (i / 5) * 15;
}
除此之外,我会看一个更快的兰德(),看看你是否可以摆脱鸿沟和模数。
另外,您的算法可能存在缺陷,因为没有什么可以阻止bingo_grid中的重复数字。
答案 1 :(得分:0)
更改is_bingoed()方法以使用SSE指令(使用Agner Fog's库)和Paul R的generate_table()将时间缩短到1.05秒。使用Intel's fast_rand()函数将其降低到0.38秒。 所以我想我会为其他可能感兴趣的人粘贴代码更改。
static unsigned int g_seed;
//Used to seed the generator.
inline void fast_srand( int seed )
{
g_seed = seed;
}
//fastrand routine returns one integer, similar output value range as C lib.
inline int fastrand()
{
g_seed = (214013*g_seed+2531011);
return (g_seed>>16)&0x7FFF;
}
bool is_bingoed(const std::array<int,25>& grid) {
// Check columns
Vec8i vec(grid[0],grid[1],grid[2],grid[3],grid[4],0,0,0);
Vec4i vec2(grid[6],grid[18],grid[24],0);
Vec4i vec3(grid[5],grid[10],grid[15],20);
Vec8i vec4(grid[1],grid[6],grid[11],grid[16],grid[21],0,0,0);
Vec4i vec5(grid[2],grid[7],grid[17],grid[22]);
Vec8i vec6(grid[3],grid[8],grid[13],grid[18],grid[23],0,0,0);
Vec8i vec7(grid[4],grid[9],grid[14],grid[19],grid[24],0,0,0);
Vec4i vec8(grid[8],grid[16],grid[21],grid[4]);
Vec4i vec9(grid[6],grid[7],grid[8],grid[9]);
Vec8i vec10(grid[12],grid[10],grid[11],grid[13],grid[14],0,0,0);
Vec8i vec11(grid[18],grid[15],grid[16],grid[17],grid[19],0,0,0);
if(horizontal_and(vec) && horizontal_and(vec2) && horizontal_and(vec3) && horizontal_and(vec4) &&
horizontal_and(vec5) && horizontal_and(vec6) && horizontal_and(vec7) && horizontal_and(vec8)) {
return false;
}
if(horizontal_and(vec9) && horizontal_and(vec10) && horizontal_and(vec11)) {
return false;
}
return true;
}