我想知道Microsoft如何编写用于字符串比较的算法。
string.equal and string.compare
他们是否逐字逐句比较:
int matched = 1;
for (int i = 0; i < str1.Length; i++)
{
if (str1[i] == str2[i])
{
matched++;
}
else
{
break;
}
}
if (matched == str1.Length) return true;
或者一次匹配
if (str1[0] == str2[0] && str1[1] == str2[1] && str1[2] == str2[2]) return true;
我尝试在string.equal函数上按F12,但它让我得到函数声明而不是实际代码。感谢
在Thilo提到查看源代码之后我能够找到这个...这就是微软编写它的方式。
public static bool Equals(String a, String b) {
if ((Object)a==(Object)b) {
return true;
}
if ((Object)a==null || (Object)b==null) {
return false;
}
if (a.Length != b.Length)
return false;
return EqualsHelper(a, b);
}
但这会引发一个问题,即通过逐个字符检查或完全匹配是否更快?
答案 0 :(得分:4)
查看the source(复制如下):
这引发了一个问题,即通过逐个字符检查或完成匹配是否更快
我不明白这个问题。如果不检查每个字符,就无法完成“完全匹配”。一旦发现不匹配,你可以做的就是拯救。这会减少运行时间,但不会改变它是O(n)的事实。
// Determines whether two strings match.
[Pure]
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
public bool Equals(String value) {
if (this == null) //this is necessary to guard against reverse-pinvokes and
throw new NullReferenceException(); //other callers who do not use the callvirt instruction
if (value == null)
return false;
if (Object.ReferenceEquals(this, value))
return true;
if (this.Length != value.Length)
return false;
return EqualsHelper(this, value);
}
[System.Security.SecuritySafeCritical] // auto-generated
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
private unsafe static bool EqualsHelper(String strA, String strB)
{
Contract.Requires(strA != null);
Contract.Requires(strB != null);
Contract.Requires(strA.Length == strB.Length);
int length = strA.Length;
fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
{
char* a = ap;
char* b = bp;
// unroll the loop
#if AMD64
// for AMD64 bit platform we unroll by 12 and
// check 3 qword at a time. This is less code
// than the 32 bit case and is shorter
// pathlength
while (length >= 12)
{
if (*(long*)a != *(long*)b) return false;
if (*(long*)(a+4) != *(long*)(b+4)) return false;
if (*(long*)(a+8) != *(long*)(b+8)) return false;
a += 12; b += 12; length -= 12;
}
#else
while (length >= 10)
{
if (*(int*)a != *(int*)b) return false;
if (*(int*)(a+2) != *(int*)(b+2)) return false;
if (*(int*)(a+4) != *(int*)(b+4)) return false;
if (*(int*)(a+6) != *(int*)(b+6)) return false;
if (*(int*)(a+8) != *(int*)(b+8)) return false;
a += 10; b += 10; length -= 10;
}
#endif
// This depends on the fact that the String objects are
// always zero terminated and that the terminating zero is not included
// in the length. For odd string sizes, the last compare will include
// the zero terminator.
while (length > 0)
{
if (*(int*)a != *(int*)b) break;
a += 2; b += 2; length -= 2;
}
return (length <= 0);
}
}