如果语句分支预测问题

时间:2016-07-12 10:47:41

标签: c++ if-statement branch-prediction

我正在实现一种模式搜索算法,该算法具有一个至关重要的if语句,该语句似乎在其结果中是不可预测的。搜索随机文件,因此有时分支预测是可以的,如果文件完全随机,有时它们会很糟糕。我的目标是消除if语句,我已经尝试过,但它已经产生了缓慢的结果,比如预先分配一个向量。模式可能性的数量可能非常大,因此预分配会占用大量时间。因此,我有动态向量,我在前面使用NULL初始化它们,然后检查if语句是否存在模式。 if似乎是杀了我,特别是cmp程序集声明。错误的分支预测正在大量废弃管道并导致巨大的减速。任何想法都可以很好地消除第17行的if语句......陷入困境。

for (PListType i = 0; i < prevLocalPListArray->size(); i++)
{
    vector<vector<PListType>*> newPList(256, NULL);

    vector<PListType>* pList = (*prevLocalPListArray)[i];
    PListType pListLength = (*prevLocalPListArray)[i]->size();

    PListType earlyApproximation = ceil(pListLength/256);

    for (PListType k = 0; k < pListLength; k++)
    {
        //If pattern is past end of string stream then stop counting this pattern
        if ((*pList)[k] < file->fileStringSize)
        {
            uint8_t indexer = ((uint8_t)file->fileString[(*pList)[k]]);

            if(newPList[indexer] != NULL) //Problem if statement!!!!!!!!!!!!!!!!!!!!!
            {
                newPList[indexer]->push_back(++(*pList)[k]);
            }
            else
            {
                newPList[indexer] = new vector<PListType>(1, ++(*pList)[k]);
                newPList[indexer]->reserve(earlyApproximation);
            }
        }
    }

    //Deallocate or stuff patterns in global list
    for (int z = 0; z < newPList.size(); z++)
    {
        if(newPList[z] != NULL)
        {
            if (newPList[z]->size() >= minOccurrence)
            {
                globalLocalPListArray->push_back(newPList[z]);
            }
            else
            {
                delete newPList[z];
            }
        }
    }
    delete (*prevLocalPListArray)[i];
}

这是没有间接建议的代码的代码......

    vector<vector<PListType>> newPList(256);

    for (PListType i = 0; i < prevLocalPListArray.size(); i++)
    {
        const vector<PListType>& pList = prevLocalPListArray[i];
        PListType pListLength = prevLocalPListArray[i].size();

        for (PListType k = 0; k < pListLength; k++)
        {
            //If pattern is past end of string stream then stop counting this pattern
            if (pList[k] < file->fileStringSize)
            {
                uint8_t indexer = ((uint8_t)file->fileString[pList[k]]);

                newPList[indexer].push_back((pList[k] + 1));
            }
            else
            {
                totalTallyRemovedPatterns++;
            }
        }
        for (int z = 0; z < 256; z++)
        {

            if (newPList[z].size() >= minOccurrence/* || (Forest::outlierScans && pList->size() == 1)*/)
            {
                globalLocalPListArray.push_back(newPList[z]);
            }
            else
            {
                totalTallyRemovedPatterns++;
            }
            newPList[z].clear();
        }
        vector<PListType> temp;
        temp.swap(prevLocalPListArray[i]);
    }

这是最新的程序,设法不使用3次内存,不需要if语句。唯一的瓶颈似乎是newPList [indexIntoFile] .push_back(++ index);声明。因为模式是随机的,所以当索引数组时,这个瓶颈可能是缓存一致性问题。当我用1和0搜索二进制文件时,我没有任何延迟来索引推回语句。这就是为什么我认为它是缓存颠簸。你们看到这个代码还有优化空间吗?到目前为止,你们都是一个很好的帮助。 @bogdan @harold

vector<PListType> newPList[256];
PListType prevPListSize = prevLocalPListArray->size();
PListType indexes[256] = {0};
PListType indexesToPush[256] = {0};
for (PListType i = 0; i < prevPListSize; i++)
{
    vector<PListType>* pList = (*prevLocalPListArray)[i];
    PListType pListLength = (*prevLocalPListArray)[i]->size();
    if(pListLength > 1)
    {
        for (PListType k = 0; k < pListLength; k++)
        {
            //If pattern is past end of string stream then stop counting this pattern
            PListType index = (*pList)[k];
            if (index < file->fileStringSize)
            {
                uint_fast8_t indexIntoFile = (uint8_t)file->fileString[index];
                newPList[indexIntoFile].push_back(++index); 
                indexes[indexIntoFile]++;

            }
            else
            {
                totalTallyRemovedPatterns++;
            }
        }

        int listLength = 0;
        for (PListType k = 0; k < 256; k++)
        {
            if( indexes[k])
            {
                indexesToPush[listLength++] = k;
            }
        }

        for (PListType k = 0; k < listLength; k++)
        {
            int insert = indexes[indexesToPush[k]];
            if (insert >= minOccurrence)
            {
                int index = globalLocalPListArray->size();

                globalLocalPListArray->push_back(new vector<PListType>());
                (*globalLocalPListArray)[index]->insert((*globalLocalPListArray)[index]->end(), newPList[indexesToPush[k]].begin(), newPList[indexesToPush[k]].end());
                indexes[indexesToPush[k]] = 0;
                newPList[indexesToPush[k]].clear();
            }
            else if(insert == 1)
            {
                totalTallyRemovedPatterns++;
                indexes[indexesToPush[k]] = 0;
                newPList[indexesToPush[k]].clear();
            }

        }
    }
    else
    {
        totalTallyRemovedPatterns++;
    }
    delete (*prevLocalPListArray)[i];
}

以下是基准测试。我不认为它在评论中是可读的,所以我将它放在答案类别中。左边的百分比定义了在一行代码上花费了多少时间百分比。

enter image description here

0 个答案:

没有答案