我在C#
中编写了以下代码,用于获取由use给出的两个文本的最长公共子序列的长度,但它不适用于大字符串。请你帮助我好吗。我真的很困惑。
public Form1()
{
InitializeComponent();
}
public int lcs(char[] s1, char[] s2, int s1size, int s2size)
{
if (s1size == 0 || s2size == 0)
{
return 0;
}
else
{
if (s1[s1size - 1] == s2[s2size - 1])
{
return (lcs(s1, s2, s1size - 1, s2size - 1) + 1);
}
else
{
int x = lcs(s1, s2, s1size, s2size - 1);
int y = lcs(s1, s2, s1size - 1, s2size);
if (x > y)
{
return x;
}
else
return y;
}
}
}
private void button1_Click(object sender, EventArgs e)
{
string st1 = textBox2.Text.Trim(' ');
string st2 = textBox3.Text.Trim(' ');
char[] a = st1.ToCharArray();
char[] b = st2.ToCharArray();
int s1 = a.Length;
int s2 = b.Length;
textBox1.Text = lcs(a, b, s1, s2).ToString();
}
答案 0 :(得分:8)
这里使用的是递归方法。因此,如您所述,它会导致程序出现性能问题。
使用动态编程方法,而不是递归。
这是C#代码。
public static void LCS(char[] str1, char[] str2)
{
int[,] l = new int[str1.Length, str2.Length];
int lcs = -1;
string substr = string.Empty;
int end = -1;
for (int i = 0; i < str1.Length; i++)
{
for (int j = 0; j < str2.Length; j++)
{
if (str1[i] == str2[j])
{
if (i == 0 || j == 0)
{
l[i, j] = 1;
}
else
l[i, j] = l[i - 1, j - 1] + 1;
if (l[i, j] > lcs)
{
lcs = l[i, j];
end = i;
}
}
else
l[i, j] = 0;
}
}
for (int i = end - lcs + 1; i <= end; i++)
{
substr += str1[i];
}
Console.WriteLine("Longest Common SubString Length = {0}, Longest Common Substring = {1}", lcs, substr);
}
答案 1 :(得分:1)
以下是如何在C#中找到最长公共子字符串的解决方案:
public static string GetLongestCommonSubstring(params string[] strings)
{
var commonSubstrings = new HashSet<string>(strings[0].GetSubstrings());
foreach (string str in strings.Skip(1))
{
commonSubstrings.IntersectWith(str.GetSubstrings());
if (commonSubstrings.Count == 0)
return string.Empty;
}
return commonSubstrings.OrderByDescending(s => s.Length).DefaultIfEmpty(string.Empty).First();
}
private static IEnumerable<string> GetSubstrings(this string str)
{
for (int c = 0; c < str.Length - 1; c++)
{
for (int cc = 1; c + cc <= str.Length; cc++)
{
yield return str.Substring(c, cc);
}
}
}
我在这里找到了它:http://www.snippetsource.net/Snippet/75/longest-common-substring
答案 2 :(得分:1)
只是为了好玩,这是一个使用Queue<T>
的示例:
string LongestCommonSubstring(params string[] strings)
{
return LongestCommonSubstring(strings[0], new Queue<string>(strings.Skip(1)));
}
string LongestCommonSubstring(string x, Queue<string> strings)
{
if (!strings.TryDequeue(out var y))
{
return x;
}
var output = string.Empty;
for (int i = 0; i < x.Length; i++)
{
for (int j = x.Length - i; j > -1; j--)
{
string common = x.Substring(i, j);
if (y.IndexOf(common) > -1 && common.Length > output.Length) output = common;
}
}
return LongestCommonSubstring(output, strings);
}
尽管它仍在使用递归,但这是Queue<T>
的一个很好的例子。
答案 3 :(得分:0)
我从https://iq.opengenus.org/longest-common-substring-using-rolling-hash/的Ashutosh Singh重构了C ++代码,以在C#中创建滚动哈希方法-这将在O(N * log(N)^ 2)时间和O(N)空间中找到子字符串
using System;
using System.Collections.Generic;
public class RollingHash
{
private class RollingHashPowers
{
// _mod = prime modulus of polynomial hashing
// any prime number over a billion should suffice
internal const int _mod = (int)1e9 + 123;
// _hashBase = base (point of hashing)
// this should be a prime number larger than the number of characters used
// in my use case I am only interested in ASCII (256) characters
// for strings in languages using non-latin characters, this should be much larger
internal const long _hashBase = 257;
// _pow1 = powers of base modulo mod
internal readonly List<int> _pow1 = new List<int> { 1 };
// _pow2 = powers of base modulo 2^64
internal readonly List<long> _pow2 = new List<long> { 1L };
internal void EnsureLength(int length)
{
if (_pow1.Capacity < length)
{
_pow1.Capacity = _pow2.Capacity = length;
}
for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
{
_pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
_pow2.Add(_pow2[currentIndx] * _hashBase);
}
}
}
private class RollingHashedString
{
readonly RollingHashPowers _pows;
readonly int[] _pref1; // Hash on prefix modulo mod
readonly long[] _pref2; // Hash on prefix modulo 2^64
// Constructor from string:
internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
{
_pows = pows;
_pref1 = new int[s.Length + 1];
_pref2 = new long[s.Length + 1];
const long capAVal = 'A';
const long capZVal = 'Z';
const long aADif = 'a' - 'A';
unsafe
{
fixed (char* c = s)
{
// Fill arrays with polynomial hashes on prefix
for (int i = 0; i < s.Length; ++i)
{
long v = c[i];
if (caseInsensitive && capAVal <= v && v <= capZVal)
{
v += aADif;
}
_pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
_pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
}
}
}
}
// Rollingnomial hash of subsequence [pos, pos+len)
// If mxPow != 0, value automatically multiply on base in needed power.
// Finally base ^ mxPow
internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
{
int hash1 = _pref1[pos + len] - _pref1[pos];
long hash2 = _pref2[pos + len] - _pref2[pos];
if (hash1 < 0)
{
hash1 += RollingHashPowers._mod;
}
if (mxPow != 0)
{
hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
}
return Tuple.Create(hash1, hash2);
}
}
private readonly RollingHashPowers _rhp;
public RollingHash(int longestLength = 0)
{
_rhp = new RollingHashPowers();
if (longestLength > 0)
{
_rhp.EnsureLength(longestLength);
}
}
public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
{
// Calculate max neede power of base:
int mxPow = Math.Max(a.Length, b.Length);
_rhp.EnsureLength(mxPow);
// Create hashing objects from strings:
RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);
// Binary search by length of same subsequence:
int pos = -1;
int low = 0;
int minLen = Math.Min(a.Length, b.Length);
int high = minLen + 1;
var tupleCompare = Comparer<Tuple<int, long>>.Default;
while (high - low > 1)
{
int mid = (low + high) / 2;
List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
for (int i = 0; i + mid <= a.Length; ++i)
{
hashes.Add(hash_a.Apply(i, mid, mxPow));
}
hashes.Sort(tupleCompare);
int p = -1;
for (int i = 0; i + mid <= b.Length; ++i)
{
if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
{
p = i;
break;
}
}
if (p >= 0)
{
low = mid;
pos = p;
}
else
{
high = mid;
}
}
// Output answer:
return pos >= 0
? b.Substring(pos, low)
: string.Empty;
}
}