从文件中读取和写入大对象

时间:2015-10-10 22:28:12

标签: c++

我使用opencv和surf描述符,为每个图像我得到一个描述符矩阵, 然后我创建一个大矩阵(Mat)用于使用flann索引搜索。 问题是,当大矩阵大小增长(超过30 gb)时,我将没有足够的可用内存。 现在我将矩阵序列化为二进制文件,我有一个解决方案:我限制最大大小并将其拆分为多个文件,因此在每次搜索操作后,我按队列读取文件然后合并结果。 有没有更好的解决方案来阅读它?

//代码

   class FlannIndexModel
{
    cv::Ptr<cv::flann::Index> flannIndex;
    cv::Mat dbDescs;
    string fileName;
public:
    vector<IndecesMappingModel> imap;

    FlannIndexModel(string fileName)
    {
        this->fileName = fileName;
        flannIndex = new cv::flann::Index();
    }

    size_t Size()
    {
        size_t sizeInBytes = dbDescs.total() * dbDescs.elemSize();
        return sizeInBytes/1000000;
    }
    void Load()
    {
        FileManager::LoadMat(dbDescs, (fileName + "_desc.bin"));
        FileManager::LoadImap(imap, (fileName + "_imap.bin"));
        flannIndex->load(dbDescs, (fileName + "_flann.bin"));

        cout << " Flann Load: " << " dbDescs rows= " << dbDescs.rows << " imap= " << imap.size() << endl;
    }
    void Save()
    {
        FileManager::SaveMat(dbDescs, (fileName + "_desc.bin"));
        FileManager::SaveImap(imap, (fileName + "_imap.bin"));
        flannIndex->save((fileName + "_flann.bin"));
    }
    void Add(vector<ImageDescModel> imges)
    {
        vector<cv::Mat> descs;

        int r = dbDescs.rows;

        for (int i = 0; i < imges.size(); i++)
        {
            auto desc = imges[i].Desc;
            if (desc.empty())
                continue;
            descs.push_back(desc);
            imap.push_back(IndecesMappingModel(imges[i].FileName, r, r + desc.rows - 1));
            r += desc.rows;
        }
        if (!dbDescs.empty())
            descs.push_back(dbDescs);
        vconcat(descs, dbDescs);
    }
    void Calcul()
    {
        flannIndex->build(dbDescs, cv::flann::KDTreeIndexParams::KDTreeIndexParams(4));
    }


    vector<IndecesMappingModel> Search(cv::Mat queryDescriptors, int num)
    {
        for (auto &img : imap)
        {
            img.Similarity = 0;
        }

        cv::Mat indices(queryDescriptors.rows, 2, CV_32S);
        cv::Mat dists(queryDescriptors.rows, 2, CV_32F);
        flannIndex->knnSearch(queryDescriptors, indices, dists, 2, cv::flann::SearchParams(24));

#pragma omp for
        for (int i = 0; i < indices.rows; i++)
        {
            if (dists.at<float>(i, 0) < (0.6 * dists.at<float>(i, 1)))
            {
                for (auto &img : imap)
                {
                    if (img.IndexStart <= indices.at<int>(i, 0) && img.IndexEnd >= indices.at<int>(i, 0))
                    {
                        img.Similarity++;
                        break;
                    }
                }
            }
        }

        std::sort(imap.begin(), imap.end());

        if (imap.size() > num)
        {
            vector<IndecesMappingModel> result(imap.begin(), imap.begin() + num);
            return result;
        }
        else
        {
            return imap;
        }
    }

};

0 个答案:

没有答案