dynamic programming for string comparisons

时间:2016-12-02 05:25:38

标签: c++ dynamic-programming

I am currently working on a problem that is making use of Levenshtein distance to calculate the optimal score between two strings.

I am successfully able to calculate the score for smaller strings. But when I try it with longer strings (exceeding length of more than 5000) i seem to be getting the wrong output. I am currently thinking if the issue has to do with memory allocation.

Below is my working

bool isVowel(char word){
    return word == 'a' || word == 'e' || word == 'i' || word == 'o' || word == 'u';
}

int calculateMismatch(char first, char second){
    int cost;

    if((first != second) && (isVowel(first))){
        cost = 3;
    }else if((first != second) && (isVowel(first) && isVowel(second))){
        cost = 1;
    }else if((first != second) && (isVowel(second))){
        cost = 3;
    }else{
        cost = 1;
    }

       return cost;
}

int getMinimum(int val1 , int val2, int val3){

if((val1 <= val2) && (val1 <= val3)){
    return val1;
}

if((val2 <= val1) && (val2 <= val3)){
    return val2;
}

if((val3 <= val1) && (val3 <= val2)){
    return val3;
}

return 0;

}

int calculatingScore(string str1, string str2){

int sizestr1 = str1.size();
int sizestr2 = str2.size();
int cost;

vector<int> initial(sizestr2 + 1);
vector<vector<int> > result(sizestr1 + 1, initial);

result[0][0] = 0;

for(int i = 1; i < sizestr1 + 1; i++){
    result[i][0] = i * 2;
}

for(int j = 1; j < sizestr2 + 1; j++){
    result[0][j] = j * 2;
}


for(int j = 1; j <= sizestr2 ; j++){
    for(int i = 1; i <= sizestr1 ; i++){
        if(str1[i - 1] == str2[j - 1]){
            cost = 0;
        }else{
            cost = calculateMismatch(str1[i - 1], str2[j - 1]);
        }

        int val1 = result[i - 1][j] + 2;
        int val2 = result[i][j-1] + 2;
        int val3 = result[i-1][j-1] + cost;


        result[i][j] = getMinimum(val1, val2, val3);
    }
}

int finalResult = result[sizestr1][sizestr2];

return finalResult;

}

1 个答案:

答案 0 :(得分:2)

我不知道Levenshtein距离但你的calculateMismatch函数似乎错了。这应该是

int calculateMismatch(char first, char second){
    int cost;

   if((first != second) && (isVowel(first) && isVowel(second)))
    {
        cost = 3;
    }else if((first != second) && (isVowel(first))){
        cost = 1;
    }else if((first != second) && (isVowel(second))){
        cost = 3;
    }else{
        cost = 1;
    }

       return cost;
}