[注意:我事先搜查过,无法找到解决所有子序列的LCS问题的建议。]
我在编写“最长公共子序列”问题的解决方案时遇到问题,我将返回两个输入字符串的所有最常见的子序列。我查看Wikipedia page并尝试在那里实现伪代码,但遇到了我的“backtrackAll”方法的问题。我相信我正在下面正确计算LCS矩阵,但我的“backtrackAll”方法返回一个空集。关于我做错了什么的提示?
public static void main (String[] args) {
String s1 = "AGCAT";
String s2 = "GAC";
int[][] matrix = computeMatrix(s1,s2);
HashSet<String> set = backtrackAll(matrix,s1,s2,s1.length(),s2.length());
//more stuff would go here...
}
private static int[][] computeMatrix(String s1, String s2) {
int[][] C = new int[s1.length()+1][s2.length()+1];
for (int i = 1; i < s1.length()+1; i++) {
for (int j = 1; j < s2.length()+1; j++) {
if (s1.charAt(i-1) == s2.charAt(j-1)) {
C[i][j] = C[i-1][j-1] + 1;
} else {
C[i][j] = Math.max(C[i][j-1], C[i-1][j]);
}
}
}
return C;
}
private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
if (i == 0 || j == 0) {
return new HashSet<String>();
} else if (s1.charAt(i-1) == s2.charAt(j-1)) {
HashSet<String> R = backtrackAll(C,s1,s2,i-1,j-1);
HashSet<String> new_set = new HashSet<String>();
for (String Z: R) {
new_set.add(Z + s1.charAt(i-1));
}
return new_set;
} else {
HashSet<String> R = new HashSet<String>();
if (C[i][j-1] >= C[i-1][j]) {
R = backtrackAll(C, s1, s2, i, j-1);
}
if (C[i-1][j] >= C[i][j-1]) {
R.addAll(backtrackAll(C,s1,s2,i-1,j));
}
return R;
}
}
答案 0 :(得分:2)
我修改了一下。它现在有效。您还应该考虑何时返回null HashSet
,在这种情况下必须添加最后一个匹配的字符。
private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
// System.out.println(i+" " + j);
if (i == 0 || j == 0) {
// System.out.println("0t");
return new HashSet<String>();
} else if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
// System.out.println("2t");
HashSet<String> R = backtrackAll(C, s1, s2, i - 1, j - 1);
HashSet<String> new_set = new HashSet<String>();
for (String Z : R) {
// System.out.println("1t");
new_set.add(Z + s1.charAt(i - 1));
}
new_set.add("" + s1.charAt(i - 1));
return new_set;
} else {
// System.out.println("3t");
HashSet<String> R = new HashSet<String>();
if (C[i][j - 1] >= C[i - 1][j]) {
R = backtrackAll(C, s1, s2, i, j - 1);
}
if (C[i - 1][j] >= C[i][j - 1]) {
R.addAll(backtrackAll(C, s1, s2, i - 1, j));
}
return R;
}
}
答案 1 :(得分:1)
由于这是“家庭作业”,这里有几个提示。
确保您了解已编码的算法。这可能是找出实施问题的最重要的一步。
尝试使用调试器来弄清楚发生了什么。比较您认为应该发生的事情与实际发生的事情。
尝试在代码中添加一些assert
语句,以检查相信应该保持为true的前置条件,后置条件和不变量。 (使用java -ea ...
)
坚持正常的Java命名约定。变量名以小写字母开头。变量名称中没有下划线。
答案 2 :(得分:1)
第二个答案打印所有内容,但不仅是最长的,我的是正确的。
private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
if (i == 0 || j == 0) {
HashSet<String> set = new HashSet<String>();
set.add("");
return set;
} else if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
HashSet<String> R = backtrackAll(C, s1, s2, i - 1, j - 1);
HashSet<String> new_set = new HashSet<String>();
for (String Z : R) {
new_set.add(Z + s1.charAt(i - 1));
}
return new_set;
} else {
HashSet<String> R = new HashSet<String>();
if (C[i][j - 1] >= C[i - 1][j]) {
R = backtrackAll(C, s1, s2, i, j - 1);
}
if (C[i - 1][j] >= C[i][j - 1]) {
R.addAll(backtrackAll(C, s1, s2, i - 1, j));
}
return R;
}
}
答案 3 :(得分:0)
以下是C#中的两个版本,以获取最长的公共子序列(您可以参考:http://codingworkout.blogspot.com/2014/07/longest-common-subsequence.html)
基于缓存表的回溯,其中包含最长公共子序列的长度
缓存,而不是缓存,legths,捕获lcs本身。
版本1(基于lcs前缀长度的回溯):
string[] GetLongestCommonSubsequences(string A, string B, int aIndex, int bIndex,
int[][] DP_LCS_AllPrefixes_Cache)
{
if(DP_LCS_AllPrefixes_Cache[aIndex][bIndex] == 0)
{
return null;
}
if(A[aIndex-1] == B[bIndex -1])
{
var r = this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex - 1,
DP_LCS_AllPrefixes_Cache);
if(r == null)
{
return new string[] { A[aIndex - 1].ToString() };
}
return r.Select(s => s + A[aIndex - 1].ToString()).ToArray();
}
int lcs_up_direction = DP_LCS_AllPrefixes_Cache[aIndex - 1][bIndex];
int lcs_left_direction = DP_LCS_AllPrefixes_Cache[aIndex][bIndex-1];
if(lcs_up_direction == lcs_left_direction)
{
string[] lcs_up = this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex,
DP_LCS_AllPrefixes_Cache);
string[] lcs_left = this.GetLongestCommonSubsequences(A, B, aIndex, bIndex-1,
DP_LCS_AllPrefixes_Cache);
return lcs_up.Union(lcs_left).ToArray();
}
if(lcs_up_direction > lcs_left_direction)
{
return this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex,
DP_LCS_AllPrefixes_Cache);
}
return this.GetLongestCommonSubsequences(A, B, aIndex, bIndex - 1, DP_LCS_AllPrefixes_Cache);
}
**其中递归函数的调用者是**
string[] GetLongestCommonSubsequences(string A, string B, int[][] DP_LCS_AllPrefixes_Cache)
{
var r = this.GetLongestCommonSubsequences(A, B, A.Length, B.Length,
DP_LCS_AllPrefixes_Cache);
return r;
}
版本2 - 缓存捕获所有前缀的lcs
class LCS_Prefix
{
public int Length = 0;
public string[] Subsequences = null;
}
LCS_Prefix[][] LCS_OfAllPrefixes_Subsequences(string A, string B)
{
A.ThrowIfNullOrWhiteSpace("a");
B.ThrowIfNullOrWhiteSpace("b");
LCS_Prefix[][] LCS_DP_OfAllPrefixes_Subsequences_Cache = new LCS_Prefix[A.Length + 1][];
for (int i = 0; i < LCS_DP_OfAllPrefixes_Subsequences_Cache.Length; i++)
{
LCS_DP_OfAllPrefixes_Subsequences_Cache[i] = new LCS_Prefix[B.Length + 1];
for(int j = 0; j< LCS_DP_OfAllPrefixes_Subsequences_Cache[i].Length; j++)
{
LCS_DP_OfAllPrefixes_Subsequences_Cache[i][j] = new LCS_Prefix();
}
}
for (int rowIndexOfCache = 1; rowIndexOfCache <= A.Length; rowIndexOfCache++)
{
for (int columnIndexOfCache = 1; columnIndexOfCache <= B.Length; columnIndexOfCache++)
{
//LCS(Ai, Bj) = 0 if i <=0, or j <= 0
// LCS(Ai, Bj) + 1 if Ai == Bj
// Max(LCS(Ai-1, Bj), LCS(Ai, Bj-1))
LCS_Prefix lcsPrefix = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache][columnIndexOfCache];
if (A[rowIndexOfCache - 1] == B[columnIndexOfCache - 1])
{
var lcs_Prefix_Diagnoal = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache - 1]
[columnIndexOfCache - 1];
lcsPrefix.Length = lcs_Prefix_Diagnoal.Length + 1;
if (lcs_Prefix_Diagnoal.Subsequences == null)
{
lcsPrefix.Subsequences = new string[] { A[rowIndexOfCache - 1].ToString() };
}
else
{
lcsPrefix.Subsequences = lcs_Prefix_Diagnoal.Subsequences
.Select(s => s + A[rowIndexOfCache - 1]).ToArray();
}
}
else
{
LCS_Prefix prefix1_Upward = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache - 1][columnIndexOfCache];
var prefix2_Leftward = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache][columnIndexOfCache-1];
if(prefix1_Upward.Length == prefix2_Leftward.Length)
{
Assert.IsTrue(prefix1_Upward.Length == prefix2_Leftward.Length);
Assert.IsTrue((prefix1_Upward.Subsequences == null &&
prefix2_Leftward.Subsequences == null)
|| (prefix1_Upward.Subsequences != null
&& prefix2_Leftward.Subsequences != null));
if (prefix1_Upward.Subsequences != null)
{
Assert.IsTrue(prefix1_Upward.Subsequences.All(s1 => prefix2_Leftward.Subsequences.Any(s2 => (s2.Length == s1.Length))));
}
lcsPrefix.Length = prefix1_Upward.Length;
if (prefix1_Upward.Subsequences != null)
{
lcsPrefix.Subsequences = prefix1_Upward.Subsequences
.Union(prefix2_Leftward.Subsequences).ToArray();
}
else
{
Assert.IsNull(prefix2_Leftward.Subsequences);
}
}
else if(prefix1_Upward.Length > prefix2_Leftward.Length)
{
lcsPrefix.Length = prefix1_Upward.Length;
lcsPrefix.Subsequences = prefix1_Upward.Subsequences;
}
else
{
lcsPrefix.Length = prefix2_Leftward.Length;
lcsPrefix.Subsequences = prefix2_Leftward.Subsequences;
}
}
}
}
return LCS_DP_OfAllPrefixes_Subsequences_Cache;
}
单元测试
[TestMethod]
public void LCS_Tests()
{
string A = "AGCAT", B = "GAC";
var DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 2);
var lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
Assert.IsNotNull(lcs_sequences);
Assert.IsTrue(lcs_sequences.Any(s => "AC".Equals(s)));
Assert.IsTrue(lcs_sequences.Any(s => "GC".Equals(s)));
Assert.IsTrue(lcs_sequences.Any(s => "GA".Equals(s)));
var DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 2);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
.Any(s => "AC".Equals(s)));
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
.Any(s => "GC".Equals(s)));
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
.Any(s => "GA".Equals(s)));
A = "ABCDGH"; B = "AEDFHR";
DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 3);
lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
Assert.IsNotNull(lcs_sequences);
Assert.IsTrue(lcs_sequences.Any(s => "ADH".Equals(s)));
DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 3);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
.Any(s => "ADH".Equals(s)));
A = "AGGTAB"; B = "GXTXAYB";
DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 4);
lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
Assert.IsNotNull(lcs_sequences);
Assert.IsTrue(lcs_sequences.Any(s => "GTAB".Equals(s)));
DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 4);
Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
.Any(s => "GTAB".Equals(s)));
}