尽管释放了所有动态内存,Matlab MEX中的内存泄漏?

时间:2013-09-18 12:56:25

标签: c++ matlab memory-leaks mex

亲爱的stackoverflow社区,

我目前正在研究一个本身调用外部C ++代码的MEX函数。我在matlab中使用双循环调用我的MEX函数(48 x 48次),创建一个相似性矩阵。相似性通过前述的MEX函数计算。

/*
 * Matlab interface function.
 */
void mexFunction(int nlhs, mxArray *plhs[],
        int nrhs, const mxArray *prhs[]) {
    if (nrhs < 2) {
        //we need exactly two input strings.
        mexErrMsgTxt("Two input strings are needed!");
        return;
    }
    if (nrhs > 5) {
        mexErrMsgTxt("Maximum number of inputs is 5!");
        return;
    }
    if (nrhs == 3 || nrhs == 4) {
        mexErrMsgTxt("You are expected to give all three score vectors: for meta, middle and nodes score.");
        return;
    }
    if (nlhs != 1) {
        //and we only give one output argument
        mexErrMsgTxt("The fragment distance only provides one output argument!");
        return;
    }
    //if possible get score vectors
    if (nrhs == 5) {
        extractScoreVector(prhs, 2, META_SCORENUM, meta_scores);
        extractScoreVector(prhs, 3, MIDDLE_SCORENUM, middle_scores);
        extractScoreVector(prhs, 4, NODES_SCORENUM, nodes_scores);
    } else {
        //otherwise take default scores
        meta_scores[meta_del] = -1;
        meta_scores[meta_ins] = -1;
        meta_scores[meta_match] = 10;
        meta_scores[meta_mismatch] = -2;
        middle_scores[0] = -6;
        middle_scores[1] = -6;
        nodes_scores[nodes_del] = -18;
        nodes_scores[nodes_ins] = -18;
        nodes_scores[nodes_skipl] = 0;
        nodes_scores[nodes_skipr] = 0;
    }

    //get both string inputs.

    std::string firstSeq = getMatlabString(prhs, 0);
    std::string sndSeq = getMatlabString(prhs, 1);

    //split them into node encodings.
    firstNodes = split(firstSeq, '|');
    sndNodes = split(sndSeq, '|');
    //initialize distance table.
    distanceTable = (int**) malloc(sizeof (int *) * firstNodes.size());
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        distanceTable[i] = (int*) malloc(sizeof (int) * sndNodes.size());
        for (unsigned int j = 0; j < sndNodes.size(); j++) {
            distanceTable[i][j] = -1;
        }
    }

    //construct input for nodes alignment: nodes are only represented by index with normed length to 3 (instead of index 1 we append 001).
    std::stringstream nodesInput;

    //first the node indices of the first fragment.
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        int magnitude = getMagnitude(i);
        for (int j = 0; j < 3 - magnitude; j++) {
            nodesInput << '0';
        }
        nodesInput << i << '|';
    }
    //then an @
    nodesInput << '@';
    //then the reversed indices of the second fragment with normed length to 3 (instead of index 1 we append 001).
    for (int i = sndNodes.size() - 1; i >= 0; i--) {
        int magnitude = getMagnitude(i);
        for (int j = 0; j < 3 - magnitude; j++) {
            nodesInput << '0';
        }
        nodesInput << i << '|';
    }
    nodesInput << '\0';

    //call nodes alignment.

    char* nodes_argv[2];
    //fake program name, dummy string
    nodes_argv[0] = (char*) "nodes";
    //actual input. The stringstream string has to be bound to a constant string
    //reference in order to prevent damage to the string behind it. a string stream
    //usually only protects its memory until the string is first evaluated.
    //this special construct prevents the memory from being overwritten.
    const std::string& tmp = nodesInput.str();
    nodes_argv[1] = const_cast<char*> (tmp.c_str());

    //call nodes alignment.
    gapc::Opts opts;
    try {
        //parse inputs
        opts.parse(2, nodes_argv);
    } catch (std::exception &e) {
        std::cerr << "Exception: " << e.what() << '\n';
        std::exit(1);
    }
    nodes obj;

    try {
        obj.init(opts);
    } catch (std::exception &e) {
        std::cerr << "Exception: " << e.what() << '\n';
        std::exit(1);
    }

    obj.cyk();

    gapc::return_type res = obj.run();

    //free distance table memory.
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        free(distanceTable[i]);
    }
    free(distanceTable);

    //clear the node vectors
    firstNodes.clear();
    sndNodes.clear();

    //Version for simple score return value
    //plhs[0] = mxCreateDoubleScalar(res);

    //Version for string return value
    std::stringstream nodeOutput;
    obj.print_result(nodeOutput, res);
    const std::string& outStr = nodeOutput.str();
    plhs[0] = mxCreateString(outStr.c_str());
}

外部代码是gapc :: opts和节点obj部分。到目前为止,外部代码没有已知的内存泄漏问题,因此我猜测我在此处发送的代码中存在问题。不幸的是我无法找到错误。我试图解释代码中提到的任何手动变量,但这总是会导致MATLAB崩溃(我看到Matlab尝试释放变量本身并在内存不存在时崩溃)。

内存泄漏在这里至关重要:在循环中大约7个步骤之后,已经占用大约1 GB RAM,在我的测试用例中它达到大约13 GB RAM。这对程序来说不合理,因此很可能发生内存泄漏。

我也尝试在stackoverflow中找到一个修复程序,但这里提到的所有内容似乎都不适用于我的场景。

由于内存泄漏非常庞大,最合理的变量(因为它们包含的内容最多)是firstSeq,sndSeq,firstNodes,sndNodes,distanceTable,opts和obj。

所以我的问题是:

  1. 我忘了释放其中一个变量吗?
  2. 您是否看到其他可能导致代码内存泄漏的内容?
  3. 我该如何解决?
  4. 就我的研究而言,对象不必被释放,因为它们是自动管理的。仍然:某处记忆必须泄漏。

    /修改

    根据要求,我还提供了我的帮助函数的代码。请注意,函数“nodes_score”,“meta_score”和“node_distance”是从我使用obj.run()在代码中调用的外部函数调用的。

    //using namespace std;
    
    /*
     * This solution for the split problem is taken from
     * 
     * http://stackoverflow.com/questions/236129/splitting-a-string-in-c
     */
    std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
        std::stringstream ss(s);
        std::string item;
        while (std::getline(ss, item, delim)) {
            elems.push_back(item);
        }
        return elems;
    }
    
    std::vector<std::string> split(const std::string &s, char delim) {
        std::vector<std::string> elems;
        split(s, delim, elems);
        return elems;
    }
    
    //These vectors are global and contain the string encoding of the nodes for
    //each fragment.
    std::vector<std::string> firstNodes;
    std::vector<std::string> sndNodes;
    //this table contains the node distances for each combination of nodes.
    int** distanceTable;
    
    std::map<int, std::string> constructMetaMap(std::string nodeStr) {
    
        //get the single meta information strings
        std::vector<std::string> metaInfoStrs = split(nodeStr, '*');
        //initialize the map mapping meta information indices to the respective meta information content.
        std::map<int, std::string> metaMap;
    
        for (std::vector<std::string>::iterator metaInfoStr = metaInfoStrs.begin(); metaInfoStr != metaInfoStrs.end(); ++metaInfoStr) {
            //string stream for the meta info index.
            std::stringstream idxStream;
            int metaContentIdx = 1;
            for (std::string::iterator metaInfoChar = (*metaInfoStr).begin(); metaInfoChar != (*metaInfoStr).end(); ++metaInfoChar) {
                if (*metaInfoChar == '#') {
                    //if we have finished looking for the current index, store the new map entry.
                    int metaIdx;
                    idxStream >> metaIdx;
                    metaMap[metaIdx] = (*metaInfoStr).substr(metaContentIdx);
                } else {
                    //otherwise store the current char and increment the start index of the actual meta info content.
                    idxStream << *metaInfoChar;
                    metaContentIdx++;
                }
            }
        }
        return metaMap;
    }
    
    const int MIDDLE_SCORENUM = 2;
    
    int middle_scores[MIDDLE_SCORENUM];
    
    /*
     * Emulates a call to meta alignment.
     * 
     * The node distance is defined as the sum over the distance between all meta
     * informations. If for a certain keyword no meta information exists in one of
     * the fragments a negative score is appended.
     */
    int node_distance(unsigned int firstNodeIndex, unsigned int sndNodeIndex) {
    
        //check if the distance was already calculated.
        if (distanceTable[firstNodeIndex][sndNodeIndex] != -1) {
            return distanceTable[firstNodeIndex][sndNodeIndex];
        }
    
        //construct maps of keyword indices to meta information content.
        std::map<int, std::string> firstMetaMap = constructMetaMap(firstNodes[firstNodeIndex]);
        std::map<int, std::string> sndMetaMap = constructMetaMap(sndNodes[sndNodeIndex]);
    
        int node_distance_score = 0;
        //iterate over the first map.
        for (std::map<int, std::string>::const_iterator metaEntry = firstMetaMap.begin(); metaEntry != firstMetaMap.end(); ++metaEntry) {
            const int metaInfoIdx = metaEntry -> first;
            //if we don't have a value to that index in the second map, punish that.
            if (sndMetaMap.count(metaInfoIdx) == 0) {
                node_distance_score += middle_scores[0];
            } else {
                //otherwise do an alignment of the meta information.
                //and construct the input argument string array
                std::string sndMetaStr = sndMetaMap[metaInfoIdx];
                std::reverse(sndMetaStr.begin(), sndMetaStr.end());
    
                std::stringstream metaInput;
                metaInput << metaEntry -> second;
                metaInput << '@';
                metaInput << sndMetaStr;
                metaInput << '\0';
    
                char* argv[2];
                //fake program name, dummy string
                argv[0] = (char*) "meta";
                //actual input. The stringstream string has to be bound to a constant string
                //reference in order to prevent damage to the string behind it. a string stream
                //usually only protects its memory until the string is first evaluated.
                //this special construct prevents the memory from being overwritten.
                const std::string& tmp = metaInput.str();
                argv[1] = const_cast<char*> (tmp.c_str());
    
                //call meta alignment.
                gapc::Opts opts;
                try {
                    opts.parse(2, argv);
                } catch (std::exception &e) {
                    std::cerr << "Exception: " << e.what() << '\n';
                    std::exit(1);
                }
                meta obj;
    
                try {
                    obj.init(opts);
                } catch (std::exception &e) {
                    std::cerr << "Exception: " << e.what() << '\n';
                    std::exit(1);
                }
                gapc::add_event("start");
    
                obj.cyk();
    
                int metaScore = obj.run();
                node_distance_score += metaScore;
            }
        }
        //iterate over the second map
        for (std::map<int, std::string>::const_iterator metaEntry = sndMetaMap.begin(); metaEntry != sndMetaMap.end(); ++metaEntry) {
            const int metaInfoIdx = metaEntry -> first;
            //if we don't have a value to that index in the second map, punish that.
            if (firstMetaMap.count(metaInfoIdx) == 0) {
                node_distance_score += middle_scores[1];
            }
            //otherwise do nothing.
        }
        //store the result in the table.
        distanceTable[firstNodeIndex][sndNodeIndex] = node_distance_score;
        //clear the maps
        firstMetaMap.clear();
        sndMetaMap.clear();
    
        return node_distance_score;
    }
    
    const int META_SCORENUM = 6;
    const int NODES_SCORENUM = 4;
    
    int meta_scores[META_SCORENUM];
    int nodes_scores[NODES_SCORENUM];
    
    /*
     * Returns the score for a given operation
     */
    int meta_score(meta_score_type type) {
        return meta_scores[(int) type];
    }
    
    /*
     * Returns the score for a given operation
     */
    int nodes_score(nodes_score_type type) {
        return nodes_scores[(int) type];
    }
    
    // Utility function for extracting string inputs
    
    std::string getMatlabString(const mxArray *prhs[], int strIndex) {
        const mxArray *strData = prhs[strIndex];
        int strLength = mxGetN(prhs[strIndex]) + 1;
        char *buf = mxArrayToString(strData);
        std::string s(buf);
        mxFree(buf);
        return s;
    }
    
    //Utility function for extracting the score vector.
    
    void extractScoreVector(const mxArray *prhs[], int vecIdx, int scorelength, int scoreVec[]) {
        //Declarations
        const mxArray *vecData;
        double *singleVals;
        int rowLen, colLen;
    
        //Copy input pointer
        vecData = prhs[vecIdx];
    
        //Get matrix
        singleVals = (double*) mxGetPr(vecData);
        rowLen = mxGetN(vecData);
        colLen = mxGetM(vecData);
    
        //we don't care if it is a column or row vector but it has to be a
        //SCORENUM x 1 vector.
        if ((rowLen == 1 && colLen == scorelength) || (rowLen == scorelength && colLen == 1)) {
            for (int i = 0; i < scorelength; i++) {
                scoreVec[i] = (int) singleVals[i];
            }
        } else {
            mexErrMsgTxt("The score vector has the wrong number of entries!");
        }
    }
    
    int getMagnitude(int number) {
        if (number == 0) {
            return 1;
        }
        int magn = 0;
        while (number > 0) {
            magn++;
            number = number / 10;
        }
        return magn;
    }
    

1 个答案:

答案 0 :(得分:2)

你在以下两个陈述中泄露记忆:

//get both string inputs.
std::string firstSeq(getMatlabString(prhs, 0));
std::string sndSeq(getMatlabString(prhs, 1));

getMatlabString为字符串分配内存,并返回指向已分配内存的指针。您正在复制指针所指向的字符串的内容,但之后您永远不会释放内存。这也解释了clear mex_filename;解决问题的原因,因为当MEX文件被卸载时,MATLAB将自动释放通过mxMalloc和朋友分配的内存。我将getMatlabString函数重写为

std::string getMatlabString(const mxArray *prhs[], int strIndex) {
    const mxArray *strData = prhs[strIndex];
    int strLength = mxGetN(prhs[strIndex]) + 1;
    std::unique_ptr<char[], void(*)(char *)> 
      buf( static_cast<char *>(mxCalloc(strLength, sizeof(char))),
           []( char *p ) { mxFree(p); } );

    mxGetString(strData, buf.get(), strLength);
    return std::string(buf.get());
}

如果您的编译器不支持unique_ptr和lambda表达式,则可以用vector<char>替换该部分。

另外,正如我在评论中提到的,我将distanceTable更改为vector<vector<int>> distanceTable;。调用vector::resize()将大小设置为您需要的大小,如果该变量必须是全局变量,则在使用它来释放内存后,使用临时向量进行交换。

std::vector<std::vector<int>>().swap(distanceTable);

使用上面的vector<char>代替unique_ptr

std::string getMatlabString(const mxArray *prhs[], int strIndex) {
    const mxArray *strData = prhs[strIndex];
    int strLength = mxGetN(prhs[strIndex]) + 1;
    std::vector<char> buf( strLength, 0 );

    mxGetString(strData, &buf[0], strLength);
    return std::string(&buf[0]);
}