我在C#中有简单的方法:
public static string BetweenOf(string ActualStr, string StrFirst, string StrLast)
{
return ActualStr.Substring(ActualStr.IndexOf(StrFirst) + StrFirst.Length, (ActualStr.Substring(ActualStr.IndexOf(StrFirst))).IndexOf(StrLast) + StrLast.Length);
}
我如何优化这个?
答案 0 :(得分:4)
如果我已经明白你想做什么,我认为你的实施可能不正确。
这是一个我相信至少在GC方面表现更好的实现,因为它不会使用多次调用SubString
,这会在堆上创建仅暂时使用的新字符串。
public static string BetweenOfFixed(string ActualStr, string StrFirst, string StrLast)
{
int startIndex = ActualStr.IndexOf(StrFirst) + StrFirst.Length;
int endIndex = ActualStr.IndexOf(StrLast, startIndex);
return ActualStr.Substring(startIndex, endIndex - startIndex);
}
比较这与正则表达式解决方案的性能会很有趣。
答案 1 :(得分:2)
你可以构建一个正则表达式:
var regex = strFirst + "(.*)" + strLast;
您的文本之间将是匹配的第一个(也是唯一的)捕获。
答案 2 :(得分:2)
以下是来自@Chris here的代码与正则表达式测试的对比:
void Main()
{
string input = "abcdefghijklmnopq";
string first = "de";
string last = "op";
Regex re1 = new Regex("de(.*)op", RegexOptions.None);
Regex re2 = new Regex("de(.*)op", RegexOptions.Compiled);
// pass 1 is JIT preheat
for (int pass = 1; pass <= 2; pass++)
{
int iterations = 1000000;
if (pass == 1)
iterations = 1;
Stopwatch sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfFixed(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("IndexOf: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexAdhoc(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex adhoc: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexCached(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex uncompiled: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexCompiled(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex compiled: " +
sw.ElapsedMilliseconds + "ms");
}
}
public static string BetweenOfFixed(string ActualStr, string StrFirst,
string StrLast)
{
int startIndex = ActualStr.IndexOf(StrFirst) + StrFirst.Length;
int endIndex = ActualStr.IndexOf(StrLast, startIndex);
return ActualStr.Substring(startIndex, endIndex - startIndex);
}
public static string BetweenOfRegexAdhoc(string ActualStr, string StrFirst,
string StrLast)
{
// I'm assuming you don't replace the delimiters on every call
Regex re = new Regex("de(.*)op", RegexOptions.None);
return re.Match(ActualStr).Groups[1].Value;
}
private static Regex _BetweenOfRegexCached =
new Regex("de(.*)op", RegexOptions.None);
public static string BetweenOfRegexCached(string ActualStr, string StrFirst,
string StrLast)
{
return _BetweenOfRegexCached.Match(ActualStr).Groups[1].Value;
}
private static Regex _BetweenOfRegexCompiled =
new Regex("de(.*)op", RegexOptions.Compiled);
public static string BetweenOfRegexCompiled(string ActualStr, string StrFirst,
string StrLast)
{
return _BetweenOfRegexCompiled.Match(ActualStr).Groups[1].Value;
}
输出:
IndexOf: 1419ms Regex adhoc: 7788ms Regex uncompiled: 1074ms Regex compiled: 682ms
答案 3 :(得分:0)
使用正则表达式怎么样?这可能比构建临时字符串更快。 此外,这还可以轻松轻松地处理无法找到此类字符串的情况。