我需要一个Dictionary<string,T>
这样的数据结构,在该结构中可以进行区分大小写和不区分大小写的搜索。
我正在寻求通过区分大小写或不区分大小写的List<Tuple<string,T>>
遍历foreach来改善使用StringComparer
获得的O(n)时间。
这是我希望最终用户在 Search 方法调用中选择区分大小写的库。 (否则,我可以在类构造函数中创建其他具有打开/关闭灵敏度的Dictionary)
有什么想法吗?
答案 0 :(得分:2)
经过进一步思考并阅读了注释,我认为最好的实现是使用新的不区分大小写的属性和方法扩展似乎区分大小写的Dictionary
。由于该实现基于不区分大小写的Dictionary
且包含区分大小写的子词典,并且C#没有私有继承,因此最好仅实现一个新的Dictionary
包装器。
public class CaseDictionary<TValue> : IDictionary<string, TValue>, IDictionary, IReadOnlyDictionary<string, TValue> {
#region Members
Dictionary<string, Dictionary<string, TValue>> CIDict;
#endregion
#region Constructors
public CaseDictionary() {
CIDict = new Dictionary<string, Dictionary<string, TValue>>(StringComparer.OrdinalIgnoreCase);
}
public CaseDictionary(int init) {
CIDict = new Dictionary<string, Dictionary<string, TValue>>(init, StringComparer.OrdinalIgnoreCase);
}
public CaseDictionary(IDictionary<string, TValue> init)
: this(init != null ? init.Count : 0) {
foreach (var kvp in init)
Add(kvp.Key, kvp.Value);
}
#endregion
#region Properties
public ICollection<string> Keys => CIDict.Values.SelectMany(v => v.Keys).ToList();
public ICollection<TValue> Values => CIDict.Values.SelectMany(v => v.Values).ToList();
public int Count => CIDict.Values.Select(v => v.Count).Sum();
public TValue this[string aKey]
{
get
{
if (CIDict.TryGetValue(aKey, out var possibles) && possibles.TryGetValue(aKey, out var theValue))
return theValue;
throw new KeyNotFoundException();
}
set
{
if (CIDict.TryGetValue(aKey, out var possibles)) {
if (possibles.ContainsKey(aKey))
possibles[aKey] = value;
else
possibles.Add(aKey, value);
}
else
CIDict.Add(aKey, new Dictionary<string, TValue>() { { aKey, value } });
}
}
#endregion
#region Methods
public void Add(string aKey, TValue aValue) {
if (CIDict.TryGetValue(aKey, out var values))
values.Add(aKey, aValue);
else
CIDict.Add(aKey, new Dictionary<string, TValue>() { { aKey, aValue } });
}
public bool ContainsKey(string aKey) {
if (CIDict.TryGetValue(aKey, out var possibles))
return possibles.ContainsKey(aKey);
else
return false;
}
public bool Remove(string aKey) {
if (CIDict.TryGetValue(aKey, out var possibles))
return possibles.Remove(aKey);
else
return false;
}
public bool TryGetValue(string aKey, out TValue theValue) {
if (CIDict.TryGetValue(aKey, out var possibles))
return possibles.TryGetValue(aKey, out theValue);
else {
theValue = default(TValue);
return false;
}
}
#endregion
#region ICollection<KeyValuePair<,>> Properties and Methods
bool ICollection<KeyValuePair<string, TValue>>.IsReadOnly => false;
void ICollection<KeyValuePair<string, TValue>>.Add(KeyValuePair<string, TValue> item) => Add(item.Key, item.Value);
public void Clear() => CIDict.Clear();
bool ICollection<KeyValuePair<string, TValue>>.Contains(KeyValuePair<string, TValue> item) {
if (CIDict.TryGetValue(item.Key, out var possibles))
return ((ICollection<KeyValuePair<string, TValue>>)possibles).Contains(item);
else
return false;
}
bool ICollection<KeyValuePair<string, TValue>>.Remove(KeyValuePair<string, TValue> item) {
if (CIDict.TryGetValue(item.Key, out var possibles))
return ((ICollection<KeyValuePair<string, TValue>>)possibles).Remove(item);
else
return false;
}
public void CopyTo(KeyValuePair<string, TValue>[] array, int index) {
if (array == null)
throw new ArgumentNullException("array");
if (index < 0 || index > array.Length)
throw new ArgumentException("index must be non-negative and within array argument Length");
if (array.Length - index < Count)
throw new ArgumentException("array argument plus index offset is too small");
foreach (var subd in CIDict.Values)
foreach (var kvp in subd)
array[index++] = kvp;
}
#endregion
#region IDictionary Methods
bool IDictionary.IsFixedSize => false;
bool IDictionary.IsReadOnly => false;
ICollection IDictionary.Keys => (ICollection)Keys;
ICollection IDictionary.Values => (ICollection)Values;
object IDictionary.this[object key]
{
get
{
if (key == null)
throw new ArgumentNullException("key");
if (key is string aKey)
if (CIDict.TryGetValue(aKey, out var possibles))
if (possibles.TryGetValue(aKey, out var theValue))
return theValue;
return null;
}
set
{
if (key == null)
throw new ArgumentNullException("key");
if (value == null && default(TValue) != null)
throw new ArgumentNullException("value");
if (key is string aKey) {
if (value is TValue aValue)
this[aKey] = aValue;
else
throw new ArgumentException("value argument has wrong type");
}
else
throw new ArgumentException("key argument has wrong type");
}
}
void IDictionary.Add(object key, object value) {
if (key == null)
throw new ArgumentNullException("key");
if (value == null && default(TValue) != null)
throw new ArgumentNullException("value");
if (key is string aKey) {
if (value is TValue aValue)
Add(aKey, aValue);
else
throw new ArgumentException("value argument has wrong type");
}
else
throw new ArgumentException("key argument has wrong type");
}
bool IDictionary.Contains(object key) {
if (key == null)
throw new ArgumentNullException("key");
if (key is string aKey)
if (CIDict.TryGetValue(aKey, out var possibles))
return possibles.ContainsKey(aKey);
return false;
}
void IDictionary.Remove(object key) {
if (key == null)
throw new ArgumentNullException("key");
if (key is string aKey)
Remove(aKey);
}
#endregion
#region ICollection Methods
bool ICollection.IsSynchronized => false;
object ICollection.SyncRoot => throw new NotImplementedException();
void ICollection.CopyTo(Array array, int index) {
if (array == null)
throw new ArgumentNullException("array");
if (array.Rank != 1)
throw new ArgumentException("array argument can not be multi-dimensional");
if (array.GetLowerBound(0) != 0)
throw new ArgumentException("array argument has non-zero lower bound");
if (array is KeyValuePair<string, TValue>[] kvps) {
CopyTo(kvps, index);
}
else {
if (index < 0 || index > array.Length)
throw new ArgumentException("index must be non-negative and within array argument Length");
if (array.Length - index < Count)
throw new ArgumentException("array argument plus index offset is too small");
if (array is DictionaryEntry[] des) {
foreach (var subd in CIDict.Values)
foreach (var kvp in subd)
des[index++] = new DictionaryEntry(kvp.Key, kvp.Value);
}
else if (array is object[] objects) {
foreach (var subd in CIDict.Values)
foreach (var kvp in subd)
objects[index++] = kvp;
}
else
throw new ArgumentException("array argument is an invalid type");
}
}
#endregion
#region IReadOnlyDictionary<,> Methods
IEnumerable<string> IReadOnlyDictionary<string, TValue>.Keys => CIDict.Values.SelectMany(v => v.Keys);
IEnumerable<TValue> IReadOnlyDictionary<string, TValue>.Values => CIDict.Values.SelectMany(v => v.Values);
#endregion
#region Case-Insensitive Properties and Methods
public ICollection<string> KeysCI => CIDict.Keys;
public IndexerPropertyAtCI AtCI => new IndexerPropertyAtCI(this);
public bool ContainsKeyCI(string aKey) => CIDict.ContainsKey(aKey);
public bool TryGetValueCI(string aKey, out ICollection<TValue> rtnValues) {
if (CIDict.TryGetValue(aKey, out var theValues)) {
rtnValues = theValues.Select(v => v.Value).ToList();
return true;
}
else {
rtnValues = default(List<TValue>);
return false;
}
}
public class IndexerPropertyAtCI {
CaseDictionary<TValue> myDict;
public IndexerPropertyAtCI(CaseDictionary<TValue> d) => myDict = d;
public ICollection<TValue> this[string aKey] => myDict.CIDict[aKey].Select(v => v.Value).ToList();
}
#endregion
#region IEnumerable Methods
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public IEnumerator<KeyValuePair<string, TValue>> GetEnumerator() {
foreach (var subd in CIDict.Values)
foreach (var kvp in subd)
yield return kvp;
}
IDictionaryEnumerator IDictionary.GetEnumerator() => new CaseDictionaryEnumerator(GetEnumerator());
struct CaseDictionaryEnumerator : IDictionaryEnumerator {
private IEnumerator<KeyValuePair<string, TValue>> en;
public CaseDictionaryEnumerator(IEnumerator<KeyValuePair<string, TValue>> anEn) => en = anEn;
public DictionaryEntry Entry => new DictionaryEntry(en.Current.Key, en.Current.Value);
public object Current => Entry;
public bool MoveNext() => en.MoveNext();
public void Reset() => en.Reset();
public object Key => en.Current.Key;
public object Value => en.Current.Value;
}
#endregion
}
鉴于此类,它可以用作:
var d = new CaseDictionary<int>();
d.Add("word", 1);
d.Add("Word", 2);
d.Add("WOrd", 3);
d.Add("word2", 4);
d.Add("worD2", 5);
Console.WriteLine(d.ContainsKey("WOrd"));
Console.WriteLine(d.ContainsKey("WOrd2"));
Console.WriteLine(d.ContainsKeyCI("WOrd2"));
Console.WriteLine(d["word2"]);
d["word2"] = 6;
Console.WriteLine(d["word2"]);
Console.WriteLine();
foreach (var w in d.AtCI["word2"])
Console.WriteLine(w);
输出为:
True
False
True
4
6
6
5
答案 1 :(得分:1)
您可以只使用普通词典,但可以定义用于执行不区分大小写的搜索的扩展方法:
static class ExtensionMethods
{
static public T GetValue<T>(this Dictionary<string,T> source, string key, bool caseSensitive)
{
if (caseSensitive) return source[key];
key = source.Keys.FirstOrDefault( k => String.Compare(key, k, StringComparison.CurrentCultureIgnoreCase) == 0);
if (key == null) throw new KeyNotFoundException();
return source[key];
}
}
或者,如果您确实需要,可以将字典子类化,并使其成为适当的实例成员。
答案 2 :(得分:0)
您可以使用new Dictionary<string,(string CaseSensitiveKey,T Data)
,其中键始终是小写字母(请参见下文),但是...
string.Contains
或Regex.IsMatch
(我稍后添加了此内容)
我认为您可能最终会使用string.Contains
(甚至可能是Regex.IsMatch
),以便您的搜索可以捕获部分匹配项。
var d = new Dictionary<string, string>() {
{ "First Last", "Some data" },
{ "Fir La", "Some data 2" } };
while (true)
{
var term = Console.ReadLine();
// Case-sensitive flag would control RegexOptions
var results = d.Where( kvp => Regex.IsMatch(kvp.Key, term, RegexOptions.IgnoreCase)).ToList();
if (results.Any())
foreach (var kvp in results)
Console.WriteLine($"\t{kvp.Key}:{kvp.Value}");
else
Console.WriteLine("Not found");
}
fi.*la
First Last:Some data
Fir La:Some data 2
fir.*t
First Last:Some data
// Case-sensitive flag would control `StrinComparison` flag.
var results = d.Where(
kvp => kvp.Key.ToLower().Contains(term.ToLower(), StringComparison.InvariantCultureIgnoreCase))
.ToList();
}
Fi
Found First Last:Some data
Found Fir La:Some data 2
First
Found First Last:Some data
Fal
Not found
您可以使用new Dictionary<string,(string CaseSensitiveKey,T Data)
,其中键总是小写。
如果字典中可能有“ Gerardo Grignoli”和“ gerardo grignoli”,这将不起作用,但我怀疑您的情况并非如此,因为如果您要查找密钥,您不会不在部分比赛之后。显然,这只是一个假设。
如果您要寻求一种完全匹配的快速解决方案,并且只能处理因大小写而异的条目,请查看Dictionary<string, Dictionary<string, TValue>>
的其他答案。
public static T LowerCaseKeyWay<T>(Dictionary<string, (string CaseSensitiveKey, T Data)> d, string term, bool isCS)
=> d.TryGetValue(term.ToLower(), out var item)
? !isCS
? item.Data
: term == item.CaseSensitiveKey ? item.Data : default
: default;
使用示例。
class SO
{
public int Number { get; set; }
public int Rep { get; set; }
}
public static void Main(string[] args)
{
var d = new Dictionary<string,(string CaseSensitiveKey,SO Data)>() {
{ "Gerardo Grignoli".ToLower(), ("Gerardo Grignoli", new SO { Number=97471, Rep=7987} )},
{ "John Wu".ToLower(), ("John Wu", new SO { Number=2791540, Rep=34973})}
};
foreach( var searchTerm in new []{ "Gerardo Grignoli", "Gerardo Grignoli".ToLower()} )
foreach( var isSearchCaseSensitive in new[]{true,false} ) {
Console.WriteLine($"{searchTerm}/case-sensitive:{isSearchCaseSensitive}: {Search(d, searchTerm, isSearchCaseSensitive)?.Rep}");
}
}
输出
Gerardo Grignoli/case-sensitive:True: 7987
Gerardo Grignoli/case-sensitive:False: 7987
gerardo grignoli/case-sensitive:True:
gerardo grignoli/case-sensitive:False: 7987
noOfSearches: 1000
noOfItems: 100
Lowercase key way: Elapsed 4ms, count found: 1500
Linq way Elapsed 57ms, count found: 1500
noOfSearches: 1000
noOfItems: 1000
Lowercase key way: Elapsed 3ms, count found: 3000
Linq way Elapsed 454ms, count found: 3000
noOfSearches: 10000
noOfItems: 100
Lowercase key way: Elapsed 11ms, count found: 15000
Linq way Elapsed 447ms, count found: 15000
noOfSearches: 10000
noOfItems: 1000
Lowercase key way: Elapsed 10ms, count found: 15000
Linq way Elapsed 5156ms, count found: 15000
noOfSearches: 100000
noOfItems: 100
Lowercase key way: Elapsed 113ms, count found: 150000
Linq way Elapsed 5059ms, count found: 150000
noOfSearches: 100000
noOfItems: 1000
Lowercase key way: Elapsed 83ms, count found: 150000
Linq way Elapsed 48855ms, count found: 150000
noOfSearches: 1000000
noOfItems: 100
Lowercase key way: Elapsed 1279ms, count found: 1500000
Linq way Elapsed 49558ms, count found: 1500000
noOfSearches: 1000000
noOfItems: 1000
Lowercase key way: Elapsed 961ms, count found: 1500000
(...)
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace ConsoleApp4
{
class SO
{
public int Number { get; set; }
public int Rep { get; set; }
}
class Program
{
public static void Main(string[] args)
{
// Preload linq
var _ = new []{"•`_´•"}.FirstOrDefault( k => k == "(O_O)" );
foreach( int noOfSearches in new []{1000, 10000, 100000, 1000000} )
foreach( int noOfItems in new []{100, 1000} )
{
var d1 = new Dictionary<string, SO>();
for(int i = 0; i < noOfItems; i++) {
d1.Add($"Name {i}", new SO {Number = i, Rep = i *2});
}
var d2 = new Dictionary<string, (string CaseSensitiveKey, SO Data)>();
foreach (var entry in d1)
{
d2.Add(entry.Key.ToLower(), (entry.Key, entry.Value));
}
Console.WriteLine($"noOfSearches: {noOfSearches}");
Console.WriteLine($" noOfItems: {noOfItems}");
Console.Write(" Lowercase key way:".PadRight(30));
PrimitiveSpeedTest( (term, isCS) => LowerCaseKeyWay(d2, term, isCS), noOfItems, noOfSearches);
Console.Write(" Linq way".PadRight(30));
PrimitiveSpeedTest( (term, isCS) => LinqWay(d1, term, isCS), noOfItems, noOfSearches);
}
}
private static void PrimitiveSpeedTest(Func<string, bool, SO> search, int noOfItems, int noOfSearches)
{
var count = 0;
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < noOfSearches; i++)
{
var originalTerm = $"Name {i % (noOfItems*2)}"; // Some found, some not found
foreach (var term in new[] { originalTerm, originalTerm.ToLower() })
foreach (var isCS in new[] { true, false })
{
var so = search(term, isCS);
if (so != null) count++;
//Console.WriteLine($"{term}/case-sensitive:{isCS}: {Search(d, term, isCS)?.Rep}");
}
}
var elapsed = sw.Elapsed;
Console.WriteLine($"Elapsed {sw.ElapsedMilliseconds}ms, count found: {count} ");
}
public static SO LowerCaseKeyWay(Dictionary<string, (string CaseSensitiveKey, SO Data)> d, string term, bool isCS)
=> d.TryGetValue(term.ToLower(), out var item)
? !isCS
? item.Data
: term == item.CaseSensitiveKey ? item.Data : null
: null;
static public T LinqWay<T>(Dictionary<string,T> source, string key, bool caseSensitive)
{
//Original: if (caseSensitive) return source[key];
if(caseSensitive) return source.ContainsKey(key) ? source[key] : default;
key = source.Keys.FirstOrDefault( k => String.Compare(key, k, StringComparison.CurrentCultureIgnoreCase) == 0);
//Original: if (key == null) throw new KeyNotFoundException();
if (key == null) return default;
return source[key];
}
}
}
答案 3 :(得分:0)
由于字典对密钥进行了哈希处理,因此您应该使用Dictionary<String, Dictionary<String, T>>
。
添加密钥:
不区分大小写的搜索:
区分大小写的搜索
答案 4 :(得分:-1)
您绝对不会无所事事地编写自己的专论(衍生)。第一个值是一个键。因此,它仅适用于 exact 匹配,而不适用于不区分大小写的匹配。实际上,更糟糕的是:
我最近了解到Dictionary也是我们的通用哈希表。它使用Hashtable方法(为每个键和输入获取一个哈希并首先进行比较),以加快比较速度,特别是在字符串之类的东西上。因此,在查找密钥时,它会经过密钥收集和:
您的要求可以解决这个问题。完全是。实际上,当哈希应该匹配时,由于哈希值,您实际上最终将不匹配。
第一个解决方案将是停止尝试在Code中执行此操作,而转到适当的DBMS。他们倾向于支持您可能会想到的所有比较方法。有很多方法可以加快它们的速度,例如索引。应该有一个进程内数据库。但是很少有人愿意走这条路。
第二个解决方案,我可以想到的是,只需花费很少的精力即可尝试重写Dictionary。一些想法:
.toLower()
就可以了。您基本上在上面的清单中添加了第3步:
希望只能修改添加和查找例程。像remove这样的东西应该使用find函数首先找到元素。这有点hacky。理想情况下,您想向用户隐藏执行此操作的内部方式,因此,带大小写的键列表应该是私有的。当然,这意味着必须接触更多代码。