我有以下问题要解决: 有两个任意长度的字符串,任意内容。我需要找到所有有序序列的最大长度,它出现在两个字符串中。
示例1: 输入:“a1b2c3”“1a2b3c” 输出:“123”“12c”“1b3”“1bc”“a23”“a2c”“ab3”“abc”
示例2: 输入:“cadb”“abcd” 输出:“ab”“ad”“cd”
我用直接的方式写了两个循环,递归,然后删除重复项和结果,这是更大结果的一部分(例如“abc”序列包含“ab”“ac”和“bc”序列,所以我正在过滤那些)
// "match" argument here used as temporary buffer
void match_recursive(set<string> &matches, string &match, const string &a_str1, const string &a_str2, size_t a_pos1, size_t a_pos2)
{
bool added = false;
for(size_t i = a_pos1; i < a_str1.length(); ++i)
{
for(size_t j = a_pos2; j < a_str2.length(); ++j)
{
if(a_str1[i] == a_str2[j])
{
match.push_back(a_str1[i]);
if(i < a_str1.length() - 1 && j < a_str2.length() - 1)
match_recursive(matches, match, a_str1, a_str2, i + 1, j + 1);
else
matches.emplace(match);
added = true;
match.pop_back();
}
}
}
if(!added)
matches.emplace(match);
}
此功能解决了问题,但复杂性是不可接受的。例如,“我们的机器上的”0:00 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0我认为这个问题应该有一些简单的算法,但不知怎的,我在网上找不到任何算法。
你们能指出我正确的方向吗?
答案 0 :(得分:2)
查找“最长公共子序列(LCS)”问题,例如http://en.wikipedia.org/wiki/Longest_common_subsequence_problem并了解动态编程解决方案如何工作以找到两个序列的LCS,基于有效构建解决方案,从简单地获取每个序列的第一个字符的LCS,然后构建LCS解决方案对于两个序列的更长和更长的前缀对。您需要做的唯一修改是,当您从先前为先前前缀对计算的LCS解决方案获得当前前缀对的LCS时,您需要为先前的前缀对存储所有先前的LCS字符串,然后组合这些集合将LCS字符串(可能具有添加的字符)串联到您为当前前缀对存储的整个LCS字符串集中。这将有效地解决您的问题。您可以通过首先获得单个LCS并获得整体LCS长度,然后查找所有早期前缀对(有助于获得LCS长度的计算路径)然后返回并重复动态编程迭代来更有效地解决问题。只是为那些前缀对,这次跟踪我之前描述的所有可能的LCS序列。
答案 1 :(得分:0)
听起来你正试图找到2个字符串之间的相似之处?多年前我在网上的某个地方找到了这个代码并进行了稍微修改(抱歉,我不能再引用它了)并经常使用它。它工作得非常快(无论如何都是字符串)。您可能需要根据自己的需要进行更改。对不起,这是在VB。
Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case.
''' Returns a numeric indication of their similarity
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0
' Convert each string to simplest form (letters
' and digits only, all upper case)
psStr1 = ReplaceSpecial(psStr1.ToUpper)
psStr2 = ReplaceSpecial(psStr2.ToUpper)
If psStr1.Trim = "" Or psStr2.Trim = "" Then
' One or both of the strings is now empty
Return 0
End If
If psStr1 = psStr2 Then
' Strings are identical
Return 100
End If
' Initialize cumulative score (this will be the
' total length of all the common substrings)
piScore = 0
' Find all common sub-strings
FindCommon(psStr1, psStr2)
' We now have the cumulative score. Return this
' as a percent of the maximum score. The maximum
' score is the average length of the two strings.
Return piScore * 200 / (Len(psStr1) + Len(psStr2))
End Function
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
' Finds longest common substring (other than single
' characters) in psS1 and psS2, then recursively
' finds longest common substring in left-hand
' portion and right-hand portion. Updates the
' cumulative score.
Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
Dim sRightStr1 As String = "", sRightStr2 As String = ""
sHoldStr = psS2
Do While Len(sHoldStr) > iLongest
sTestStr = sHoldStr
Do While Len(sTestStr) > 1
iJ = InStr(psS1, sTestStr)
If iJ > 0 Then
' Test string is sub-set of the other string
If Len(sTestStr) > iLongest Then
' Test string is longer than previous
' longest. Store its length and position.
iLongest = Len(sTestStr)
iStartPos1 = iJ
iStartPos2 = InStr(psS2, sTestStr)
End If
' No point in going further with this string
Exit Do
Else
' Test string is not a sub-set of the other
' string. Discard final character of test
' string and try again.
sTestStr = Left(sTestStr, Len(sTestStr) - 1)
End If
Loop
' Now discard first char of test string and
' repeat the process.
sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)
Loop
' Update the cumulative score with the length of
' the common sub-string.
piScore = piScore + iLongest
' We now have the longest common sub-string, so we
' can isolate the sub-strings to the left and right
' of it.
If iStartPos1 > 3 And iStartPos2 > 3 Then
sLeftStr1 = Left(psS1, iStartPos1 - 1)
sLeftStr2 = Left(psS2, iStartPos2 - 1)
If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
' Get longest common substring from left strings
FindCommon(sLeftStr1, sLeftStr2)
End If
Else
sLeftStr1 = ""
sLeftStr2 = ""
End If
If iLongest > 0 Then
sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
sRightStr2 = Mid(psS2, iStartPos2 + iLongest)
If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
' Get longest common substring from right strings
FindCommon(sRightStr1, sRightStr2)
End If
Else
sRightStr1 = ""
sRightStr2 = ""
End If
End Sub
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
Dim iPos As Integer
Dim sReturn As String = ""
Dim iAsc As Integer
For iPos = 1 To sString.Length
iAsc = Asc(Mid(sString, iPos, 1))
If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
sReturn &= Chr(iAsc)
End If
Next
Return sReturn
End Function
只需调用Similar函数,就会得到0到100之间的结果。
希望这有帮助
答案 2 :(得分:0)
以下是动态编程解决方案的代码。我用你给出的例子测试它。我已经解决了LCS问题,但这是第一次打印它们。
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <set>
using namespace std;
#define MAX_LENGTH 100
int lcs(const char* a, const char* b)
{
int row = strlen(a)+ 1;
int column = strlen(b) + 1;
//Memoization lower the function's time cost in exchange for space cost.
int **matrix = (int**)malloc(sizeof(int*) * row);
int i, j;
for(i = 0; i < row; ++i)
matrix[i] = (int*)calloc(sizeof(int), column);
typedef set<string> lcs_set;
lcs_set s_matrix[MAX_LENGTH][MAX_LENGTH];
//initiate
for(i = 0; i < MAX_LENGTH ; ++i)
s_matrix[0][i].insert("");
for(i = 0; i < MAX_LENGTH ; ++i)
s_matrix[i][0].insert("");
//Bottom up calculation
for(i = 1; i < row; ++i)
{
for(j = 1; j < column; ++j)
{
if(a[i - 1] == b[j - 1])
{
matrix[i][j] = matrix[i -1][j - 1] + 1;
// if your compiler support c++ 11, you can simplify this code.
for(lcs_set::iterator it = s_matrix[i - 1][j - 1].begin(); it != s_matrix[i - 1][j - 1].end(); ++it)
s_matrix[i][j].insert(*it + a[i - 1]);
}
else
{
if(matrix[i][j - 1] > matrix[i - 1][j])
{
matrix[i][j] = matrix[i][j - 1];
for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
s_matrix[i][j].insert(*it);
}
else if(matrix[i][j - 1] == matrix[i - 1][j])
{
matrix[i][j] = matrix[i][j - 1];
for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
s_matrix[i][j].insert(*it);
for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
s_matrix[i][j].insert(*it);
}
else
{
matrix[i][j] = matrix[i - 1][j];
for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
s_matrix[i][j].insert(*it);
}
}
}
}
int lcs_length = matrix[row - 1][column -1];
// all ordered sequences with maximum length are here.
lcs_set result_set;
int m, n;
for(m = 1; m < row; ++m)
{
for(n = 1; n < column; ++n)
{
if(matrix[m][n] == lcs_length)
{
for(lcs_set::iterator it = s_matrix[m][n].begin(); it != s_matrix[m][n].end(); ++it)
result_set.insert(*it);
}
}
}
//comment it
for(lcs_set::iterator it = result_set.begin(); it != result_set.end(); ++it)
printf("%s\t", it->c_str());
printf("\n");
for(i = 0; i < row; ++i)
free(matrix[i]);
free(matrix);
return lcs_length;
}
int main()
{
char buf1[MAX_LENGTH], buf2[MAX_LENGTH];
while(scanf("%s %s", buf1, buf2) != EOF)
{
printf("length is: %d\n", lcs(buf1, buf2) );
}
return 0;
}