我正在制作一个在union下关闭的字符串数组列表,这意味着完整列表中必须有两个数组的并集。
为此,我编写了自己的代码,该代码遍历字符串数组列表,并添加新的字符串数组,仅当列表中的两个现有字符串数组不存在时,它们才是列表中两个现有字符串数组的并集。列表。
public static List<string[]> UnionClosed(List<string[]> kstructure)
{
for (int i = 1; i < kstructure.Count; i++)
{
for (int j = i + 1; j < kstructure.Count; j++)
{
string[] kstate1 = kstructure[i];
string[] kstate2 = kstructure[j];
string[] unionStatesResult = kstate1.Union(kstate2).ToArray();
int flag = 0;
for (int k = 1; k < kstructure.Count; k++)
{
if (kstructure[k].Length == unionStatesResult.Length && kstructure[k].Intersect(unionStatesResult).Count()==kstructure[k].Length)
{
flag = flag + 1;
break;
}
}
if (flag==0)
{
kstructure.Add(unionStatesResult);
}
}
}
return (kstructure);
}
预期结果是获取在并集下关闭的字符串数组的列表。
例如,如果我通过输入:{{},{“ i1”},{“ i2”},{“ i3”},{“ i4”},{“ i1”,“ i2”},{ “ i1”,“ i3”},{“ i1”,“ i4”},{“ i1”,“ i5”},{“ i2”,“ i3”},{“ i2”,“ i4”},{ “ i2”,“ i5”},{“ i1”,“ i2”,“ i3”},{“ i1”,“ i2”,“ i4”},{“ i1”,“ i2”,“ i5”} ,{“ i1”,“ i3”,“ i4”},{“ i1”,“ i3”,“ i5”},{“ i2”,“ i3”,“ i4”},{“ i2”,“ i3 “,” i5“},{” i1“,” i2“,” i3“,” i4“},{” i1“,” i2“,” i3“,” i5“},{” i1“,” i3 “,” i4“,” i5“},{” i1“,” i2“,” i3“,” i4“,” i5“}}。
则预期输出应为:{{},{“ i1”},{“ i2”},{“ i3”},{“ i4”},{“ i1”,“ i2”},{“ i1“,” i3“},{” i1“,” i4“},{” i1“,” i5“},{” i2“,” i3“},{” i2“,” i4“},{” i2“,” i5“},{” i3“,” i4“},{” i1“,” i2“,” i3“},{” i1“,” i2“,” i4“},{” i1“ ,“ i2”,“ i5”},{“ i1”,“ i3”,“ i4”},{“ i1”,“ i3”,“ i5”},{“ i1”,“ i4”,“ i5” },{“ i2”,“ i3”,“ i4”},{“ i2”,“ i3”,“ i5”},{“ i2”,“ i4”,“ i5”},{“ i1”,“ i2“,” i3“,” i4“},{” i1“,” i2“,” i3“,” i5“},{” i1“,” i2“,” i4“,” i5“},{” i1“,” i3“,” i4“,” i5“},{” i2“,” i3“,” i4“,” i5“},{” i1“,” i2“,” i3“,” i4“ ,“ i5”}}。
我正在获取输出,但是问题是它非常慢,并且无法给出大量字符串数组的结果。我希望使用union关闭大小为500的列表,但是此代码没有提供给我结果。我想知道是否有任何c#功能可以执行相同操作。在R集包中,binary_closure()下提供了相同的功能。我想要在C#中使用同样的东西。
答案 0 :(得分:0)
通过使用HashSets进行集合比较,而不是执行循环,可以使此过程更有效。
首先,输入来自您的评论之一:
var input = new string[][]
{
new string[0],
new[] {"i1"},
new[] {"i2"},
new[] {"i3"},
new[] {"i4"},
new[] {"i1", "i2"},
new[] {"i1", "i3"},
new[] {"i1", "i4"},
new[] {"i1", "i5"},
new[] {"i2", "i3"},
new[] {"i2", "i4"},
new[] {"i2", "i5"},
new[] {"i1", "i2", "i3"},
new[] {"i1", "i2", "i4"},
new[] {"i1", "i2", "i5"},
new[] {"i1", "i3", "i4"},
new[] {"i1", "i3", "i5"},
new[] {"i2", "i3", "i4"},
new[] {"i2", "i3", "i5"},
new[] {"i1", "i2", "i3", "i4"},
new[] {"i1", "i2", "i3", "i5"},
new[] {"i1", "i3", "i4", "i5"},
new[] {"i1", "i2", "i3", "i4", "i5"},
};
我们需要一个这样的帮助器,它可以让我们与HashSets进行比较以了解是否相等:
public class HashSetEqualityComparer<T> : IEqualityComparer<HashSet<T>>
{
public static readonly HashSetEqualityComparer<T> Instance = new HashSetEqualityComparer<T>();
public bool Equals(HashSet<T> x, HashSet<T> y)
{
if (ReferenceEquals(x, y))
return true;
if (ReferenceEquals(x, null) || ReferenceEquals(y, null))
return false;
return x.SetEquals(y);
}
public int GetHashCode(HashSet<T> obj)
{
// See http://stackoverflow.com/a/670068/1086121
if (obj == null)
throw new ArgumentNullException(nameof(obj));
var comparer = obj.Comparer;
int hash = 0;
foreach (T element in obj)
{
hash = unchecked(hash + comparer.GetHashCode(element));
}
return hash;
}
}
然后,代码:
var output = new HashSet<HashSet<string>>(HashSetEqualityComparer<string>.Instance);
for (int i = 0; i < input.Length; i++)
{
// We need to make sure that every input item is in the output
output.Add(new HashSet<string>(input[i]));
for (int j = i + 1; j < input.Length; j++)
{
// It annoys me that we have to create a HashSet<string>(input[i]))twice
var hashSet = new HashSet<string>(input[i]);
hashSet.UnionWith(input[j]);
output.Add(hashSet);
}
}
对于第一个和第二个字符串的每个组合,我们创建一个新的HashSet。然后,我们尝试将该HashSet<string>
添加到我们的HashSet输出中,该输出为HashSet<HashSet<string>>
。我们的自定义IEqualityComparer
将确保仅在不存在的情况下才添加它。
我们还将输入和输出集合分开,因此我们不会像对待输入一样开始处理输出(这会增加很多不必要的工作)。