据我了解,LINQ(对于可枚举的)运算符GroupBy
将在返回第一个分组之前枚举整个源序列。
如果我知道密钥已经排序或者我不关心具有不同的分组密钥,那么这不是非常大的数据友好且不必要。
是否有一个我错过的运营商,它只对相同密钥的连续项进行分组,并在新密钥出现后立即返回分组?
因为我没有找到如何使用现有功能实现这一点,所以我编写了自己的运算符并将其命名为GroupByUntilChanged
。仍然宁愿使用框架代码。
public static class MyEnumerable
{
/// <summary>
/// Groups SUCCESSIVE elements of a sequence having the same key [...].
/// </summary>
public static IEnumerable<IGrouping<TKey, TElement>> GroupByUntilChanged<TSource, TKey, TElement>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TElement> elementSelector,
IEqualityComparer<TKey> comparer)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
if (elementSelector == null) throw new ArgumentNullException(nameof(elementSelector));
return GroupByUntilChangedEnumerator(source, keySelector, elementSelector, comparer ?? EqualityComparer<TKey>.Default);
}
// omitted: 7 more overloads returning GroupByUntilChangedEnumerator
private static IEnumerable<IGrouping<TKey, TElement>> GroupByUntilChangedEnumerator<TSource, TKey, TElement>(
IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TElement> elementSelector,
IEqualityComparer<TKey> comparer)
{
using (var e = source.GetEnumerator())
{
if (!e.MoveNext())
yield break;
var grouping = new Grouping<TKey, TElement>(keySelector(e.Current), elementSelector(e.Current));
while (e.MoveNext())
{
var key = keySelector(e.Current);
if (comparer.Equals(grouping.Key, key))
grouping.Add(elementSelector(e.Current));
else
{
yield return grouping;
grouping = new Grouping<TKey, TElement>(key, elementSelector(e.Current));
}
}
yield return grouping;
}
}
/// <summary>
/// <see cref="IGrouping{TKey, TElement}"/> implementation.
/// </summary>
private sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
{
private readonly IList<TElement> _elements;
public Grouping(TKey key, TElement first)
{
Key = key;
_elements = new List<TElement> { first };
}
public TKey Key { get; }
public void Add(TElement next)
{
_elements.Add(next);
}
public IEnumerator<TElement> GetEnumerator()
{
return _elements.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return _elements.GetEnumerator();
}
}
}
答案 0 :(得分:1)
这不是标准System.Linq
扩展名。但是,我确实在MoreLinq.MoreEnumerable.GroupAdjacent
找到了它。特别是,源代码非常接近你所拥有的(复制和粘贴后代):
private static IEnumerable<IGrouping<TKey, TElement>> GroupAdjacentImpl<TSource, TKey, TElement>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TElement> elementSelector,
IEqualityComparer<TKey> comparer)
{
Debug.Assert(source != null);
Debug.Assert(keySelector != null);
Debug.Assert(elementSelector != null);
Debug.Assert(comparer != null);
using (var iterator = source.Select(item => new KeyValuePair<TKey, TElement>(keySelector(item), elementSelector(item)))
.GetEnumerator())
{
var group = default(TKey);
var members = (List<TElement>) null;
while (iterator.MoveNext())
{
var item = iterator.Current;
if (members != null && comparer.Equals(group, item.Key))
{
members.Add(item.Value);
}
else
{
if (members != null)
yield return CreateGroupAdjacentGrouping(group, members);
group = item.Key;
members = new List<TElement> { item.Value };
}
}
if (members != null)
yield return CreateGroupAdjacentGrouping(group, members);
}
}
private static Grouping<TKey, TElement> CreateGroupAdjacentGrouping<TKey, TElement>(TKey key, IList<TElement> members)
{
Debug.Assert(members != null);
return Grouping.Create(key, members.IsReadOnly ? members : new ReadOnlyCollection<TElement>(members));
}
static class Grouping
{
public static Grouping<TKey, TElement> Create<TKey, TElement>(TKey key, IEnumerable<TElement> members)
{
return new Grouping<TKey, TElement>(key, members);
}
}
#if !NO_SERIALIZATION_ATTRIBUTES
[Serializable]
#endif
private sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
{
private readonly IEnumerable<TElement> _members;
public Grouping(TKey key, IEnumerable<TElement> members)
{
Debug.Assert(members != null);
Key = key;
_members = members;
}
public TKey Key { get; private set; }
public IEnumerator<TElement> GetEnumerator()
{
return _members.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
虽然这不是框架代码,但它仍然是一个很好的编写和测试的代码(tests)来自我所尊重的C#开发人员至少看过的库。