连接trie路径的字符串记录

时间:2013-02-23 01:34:28

标签: c++ trie

我想连接结构通过trie的路径的记录 - 当使用不匹配的路径时,这是我的特里搜索功能:

void Trie::suggestCorrectionsHelper(nodeT *w, int index, string record, string final, double edits, Trie::MatchesT &matchSet, string gap_record, int level)
{   
    if (record.empty() || w == NULL ) {//tried everyChar
        matchSet.candidate = final+record;
        if ( matchSet.candidate.length() == record_delimiterFormat.size() ) { //check length and record exists
            updateSet(w, matchSet, edits,  gap_record );
        }
    } else {//look for end of node paths
        if(index == w->alpha.size() ) { return; //no more paths here
        } else {            
            //CASE A: self
            if (record[0] == w->alpha[index].token_char ) {//replace char with self so no edit distance
                suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits, matchSet, gap_record, level+1); //follow char, no edit

            //CASE B: follow new path
            } else {
                //CASE B1: wilds
                if (record[0] == '*' && (final.length() + record.length() ) != record_delimiterFormat.size() ) { 
                    suggestCorrectionsHelper(w->alpha[index].next, 0, '*'+record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1); //wilds, keep adding chars

                //CASE B2: no wilds
                } else {
                    suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1);//follow path
                }
            }
            //CASE C: try next path - place outside else to allow finding alternative char even when self path is valid
            suggestCorrectionsHelper(w, index+1, record, final, edits, matchSet, gap_record, level);//increment and check next path
        }
    }
}

路径完成后,会添加到:

//write to struct and then adds to struct vector 
void Trie::updateSet(nodeT *w, Trie::MatchesT & matchSet, double &edits, string gap_record)
{   
    CorrectionT cs; // struct with attributes of match n
    cs.suggestedRecord = matchSet.candidate; // add attributes to struct
    cs.editDistance = edits;
    cs.gap = gap_record;
    matchSet.candidateStack.push_back(cs);
}

我预计会看起来像0,1,3,5,12

但我得到0,0,0,0,0,0,2,2,2,3,3,4,12

它不能通过多个同一级别,所以通过递归我记录路径,我想单独记录每条路径;如何?

trie类看起来像这样:

void Trie::AddFirstNode(){ // run-once, initial condition of first node
    nodeT *tempNode = new nodeT;
    root = tempNode;
}

void Trie::InsertNode(nodeT *w, ParseT & packet, string codeSoFar) // add new char
{
    for (unsigned int i = 0; i <= w->alpha.size(); i++) { // loop and insert tokens in sorted vector
        if (i == w->alpha.size() || codeSoFar[0] < w->alpha[i].token_char) { //position 

            //create new TokenT
            tokenT *tempChar = new tokenT;
            tempChar->next = NULL;
            tempChar->token_char = codeSoFar[0];

            nodeT *tempLeaf = new nodeT; //create new nodeT
            tempChar->next = tempLeaf; //link TokenT with its nodeT

            AddRecord(tempLeaf, packet, codeSoFar.substr(1)); //add next char in record, if last char AddRecord will terminate

            w->alpha.insert(w->alpha.begin()+i, *tempChar);
            return; 
        }
    }
}


//function to add record to trie
void Trie::AddRecord(nodeT *w, ParseT & packet, string codeSoFar)
{
    if (codeSoFar.empty() ) {   //condition 0: record's last char
        return;

    } else { //keep parsing down record path
        for (unsigned int i = 0; i < w->alpha.size(); i++) {    
            if (codeSoFar[0] == w->alpha[i].token_char) {
                return AddRecord(w->alpha[i].next, packet, codeSoFar.substr(1)); // condition 2: char exists
            }
        }
        InsertNode(w, packet, codeSoFar); //condition 3: no existing char
    }
}

结构

struct MatchesT {
    string candidate;

    double testDistance; //try edit distance

    list<CorrectionT> candidateStack; //
};

输出

void PrintCorrections(Trie & tri, Trie::MatchesT & matchSet, ofstream& myfile)
{
while (!matchSet.candidateStack.empty() ) {
count++;
Trie::CorrectionT corr = matchSet.candidateStack.back();
matchSet.candidateStack.pop_back();

myfile << corr.suggestedRecord << ","  << fixed << setprecision(2) << corr.editDistance << "," << corr.gap << "\n";
}

1 个答案:

答案 0 :(得分:0)

我发现了一个更好的解决方案,而不是将连接添加为+ =然后传递为参数,只需将参数传递给gap_record +;保留当前节点内存,并且路径上的下一个节点具有连接。

void Trie::suggestCorrectionsHelper(nodeT *w, int index, string record, string final, double edits, Trie::MatchesT &matchSet, string gap_record, int level)
{   
    if (record.empty() || w == NULL ) {//tried everyChar
        matchSet.candidate = final+record;
        if ( matchSet.candidate.length() == record_delimiterFormat.size() ) { //check length and record exists
            updateSet(w, matchSet, edits,  gap_record );
        }
    } else {//look for end of node paths
        if(index == w->alpha.size() ) { return; //no more paths here
        } else {            
            //CASE A: self
            if (record[0] == w->alpha[index].token_char ) {//replace char with self so no edit distance
                suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits, matchSet, gap_record, level+1); //follow char, no edit

            //CASE B: follow new path
            } else {
                //CASE B1: wilds
                if (record[0] == '*' && (final.length() + record.length() ) != record_delimiterFormat.size() ) { 
                    suggestCorrectionsHelper(w->alpha[index].next, 0, '*'+record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+(IntegerToString(level)+","), level+1); //wilds, keep adding chars

                //CASE B2: no wilds
                } else {
                    suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+(IntegerToString(level)+","), level+1);//follow path
                }
            }
            //CASE C: try next path - place outside else to allow finding alternative char even when self path is valid
            suggestCorrectionsHelper(w, index+1, record, final, edits, matchSet, gap_record, level);//increment and check next path
        }
    }
}