我正在实现一种模式搜索算法,该算法具有一个至关重要的if语句,该语句似乎在其结果中是不可预测的。搜索随机文件,因此有时分支预测是可以的,如果文件完全随机,有时它们会很糟糕。我的目标是消除if语句,我已经尝试过,但它已经产生了缓慢的结果,比如预先分配一个向量。模式可能性的数量可能非常大,因此预分配会占用大量时间。因此,我有动态向量,我在前面使用NULL初始化它们,然后检查if语句是否存在模式。 if似乎是杀了我,特别是cmp程序集声明。错误的分支预测正在大量废弃管道并导致巨大的减速。任何想法都可以很好地消除第17行的if语句......陷入困境。
for (PListType i = 0; i < prevLocalPListArray->size(); i++)
{
vector<vector<PListType>*> newPList(256, NULL);
vector<PListType>* pList = (*prevLocalPListArray)[i];
PListType pListLength = (*prevLocalPListArray)[i]->size();
PListType earlyApproximation = ceil(pListLength/256);
for (PListType k = 0; k < pListLength; k++)
{
//If pattern is past end of string stream then stop counting this pattern
if ((*pList)[k] < file->fileStringSize)
{
uint8_t indexer = ((uint8_t)file->fileString[(*pList)[k]]);
if(newPList[indexer] != NULL) //Problem if statement!!!!!!!!!!!!!!!!!!!!!
{
newPList[indexer]->push_back(++(*pList)[k]);
}
else
{
newPList[indexer] = new vector<PListType>(1, ++(*pList)[k]);
newPList[indexer]->reserve(earlyApproximation);
}
}
}
//Deallocate or stuff patterns in global list
for (int z = 0; z < newPList.size(); z++)
{
if(newPList[z] != NULL)
{
if (newPList[z]->size() >= minOccurrence)
{
globalLocalPListArray->push_back(newPList[z]);
}
else
{
delete newPList[z];
}
}
}
delete (*prevLocalPListArray)[i];
}
这是没有间接建议的代码的代码......
vector<vector<PListType>> newPList(256);
for (PListType i = 0; i < prevLocalPListArray.size(); i++)
{
const vector<PListType>& pList = prevLocalPListArray[i];
PListType pListLength = prevLocalPListArray[i].size();
for (PListType k = 0; k < pListLength; k++)
{
//If pattern is past end of string stream then stop counting this pattern
if (pList[k] < file->fileStringSize)
{
uint8_t indexer = ((uint8_t)file->fileString[pList[k]]);
newPList[indexer].push_back((pList[k] + 1));
}
else
{
totalTallyRemovedPatterns++;
}
}
for (int z = 0; z < 256; z++)
{
if (newPList[z].size() >= minOccurrence/* || (Forest::outlierScans && pList->size() == 1)*/)
{
globalLocalPListArray.push_back(newPList[z]);
}
else
{
totalTallyRemovedPatterns++;
}
newPList[z].clear();
}
vector<PListType> temp;
temp.swap(prevLocalPListArray[i]);
}
这是最新的程序,设法不使用3次内存,不需要if语句。唯一的瓶颈似乎是newPList [indexIntoFile] .push_back(++ index);声明。因为模式是随机的,所以当索引数组时,这个瓶颈可能是缓存一致性问题。当我用1和0搜索二进制文件时,我没有任何延迟来索引推回语句。这就是为什么我认为它是缓存颠簸。你们看到这个代码还有优化空间吗?到目前为止,你们都是一个很好的帮助。 @bogdan @harold
vector<PListType> newPList[256];
PListType prevPListSize = prevLocalPListArray->size();
PListType indexes[256] = {0};
PListType indexesToPush[256] = {0};
for (PListType i = 0; i < prevPListSize; i++)
{
vector<PListType>* pList = (*prevLocalPListArray)[i];
PListType pListLength = (*prevLocalPListArray)[i]->size();
if(pListLength > 1)
{
for (PListType k = 0; k < pListLength; k++)
{
//If pattern is past end of string stream then stop counting this pattern
PListType index = (*pList)[k];
if (index < file->fileStringSize)
{
uint_fast8_t indexIntoFile = (uint8_t)file->fileString[index];
newPList[indexIntoFile].push_back(++index);
indexes[indexIntoFile]++;
}
else
{
totalTallyRemovedPatterns++;
}
}
int listLength = 0;
for (PListType k = 0; k < 256; k++)
{
if( indexes[k])
{
indexesToPush[listLength++] = k;
}
}
for (PListType k = 0; k < listLength; k++)
{
int insert = indexes[indexesToPush[k]];
if (insert >= minOccurrence)
{
int index = globalLocalPListArray->size();
globalLocalPListArray->push_back(new vector<PListType>());
(*globalLocalPListArray)[index]->insert((*globalLocalPListArray)[index]->end(), newPList[indexesToPush[k]].begin(), newPList[indexesToPush[k]].end());
indexes[indexesToPush[k]] = 0;
newPList[indexesToPush[k]].clear();
}
else if(insert == 1)
{
totalTallyRemovedPatterns++;
indexes[indexesToPush[k]] = 0;
newPList[indexesToPush[k]].clear();
}
}
}
else
{
totalTallyRemovedPatterns++;
}
delete (*prevLocalPListArray)[i];
}
以下是基准测试。我不认为它在评论中是可读的,所以我将它放在答案类别中。左边的百分比定义了在一行代码上花费了多少时间百分比。