我编写了两个函数,将一串以空格分隔的整数转换为int数组。第一个函数使用Substring
,然后应用System.Int32.Parse
将子字符串转换为int
值:
let intsOfString (s: string) =
let ints = ResizeArray()
let rec inside i j =
if j = s.Length then
ints.Add(s.Substring(i, j-i) |> System.Int32.Parse)
else
let c = s.[j]
if '0' <= c && c <= '9' then
inside i (j+1)
else
ints.Add(s.Substring(i, j-i) |> System.Int32.Parse)
outside (j+1)
and outside i =
if i < s.Length then
let c = s.[i]
if '0' <= c && c <= '9' then
inside i (i+1)
else
outside (i+1)
outside 0
ints.ToArray()
第二个函数遍历字符串的字符,在不创建临时子字符串的情况下累积整数:
let intsOfString (s: string) =
let ints = ResizeArray()
let rec inside n i =
if i = s.Length then
ints.Add n
else
let c = s.[i]
if '0' <= c && c <= '9' then
inside (10*n + int c - 48) (i+1)
else
ints.Add n
outside(i+1)
and outside i =
if i < s.Length then
let c = s.[i]
if '0' <= c && c <= '9' then
inside (int c - 48) (i+1)
else
outside (i+1)
outside 0
ints.ToArray()
对以空格分隔的整数1到1,000,000进行基准测试,第一个版本需要1.5秒,而第二个版本需要0.3秒。
解析这些值可能对性能至关重要,因此使用临时子字符串在表上留下5倍的性能可能是不可取的。解析整数很容易,但解析其他值,如浮点数,小数和日期则相当困难。
那么,是否有内置函数直接从字符串中的子字符串进行解析(即使用给定的字符串的起始和长度)以避免生成临时字符串?如果没有,是否有任何库提供有效的功能来执行此操作?
答案 0 :(得分:8)
System.Int32.Parse
最慢,因为它使用CultureInfo
,FormatInfo
等;并且性能原因不在临时字符串中。
反思代码:
private unsafe static bool ParseNumber(ref char* str, NumberStyles options, ref Number.NumberBuffer number, NumberFormatInfo numfmt, bool parseDecimal)
{
number.scale = 0;
number.sign = false;
string text = null;
string text2 = null;
string str2 = null;
string str3 = null;
bool flag = false;
string str4;
string str5;
if ((options & NumberStyles.AllowCurrencySymbol) != NumberStyles.None)
{
text = numfmt.CurrencySymbol;
if (numfmt.ansiCurrencySymbol != null)
{
text2 = numfmt.ansiCurrencySymbol;
}
str2 = numfmt.NumberDecimalSeparator;
str3 = numfmt.NumberGroupSeparator;
str4 = numfmt.CurrencyDecimalSeparator;
str5 = numfmt.CurrencyGroupSeparator;
flag = true;
}
else
{
str4 = numfmt.NumberDecimalSeparator;
str5 = numfmt.NumberGroupSeparator;
}
int num = 0;
char* ptr = str;
char c = *ptr;
while (true)
{
if (!Number.IsWhite(c) || (options & NumberStyles.AllowLeadingWhite) == NumberStyles.None || ((num & 1) != 0 && ((num & 1) == 0 || ((num & 32) == 0 && numfmt.numberNegativePattern != 2))))
{
bool flag2;
char* ptr2;
if ((flag2 = (((options & NumberStyles.AllowLeadingSign) == NumberStyles.None) ? false : ((num & 1) == 0))) && (ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
num |= 1;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (flag2 && (ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
num |= 1;
number.sign = true;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (c == '(' && (options & NumberStyles.AllowParentheses) != NumberStyles.None && (num & 1) == 0)
{
num |= 3;
number.sign = true;
}
else
{
if ((text == null || (ptr2 = Number.MatchChars(ptr, text)) == null) && (text2 == null || (ptr2 = Number.MatchChars(ptr, text2)) == null))
{
break;
}
num |= 32;
text = null;
text2 = null;
ptr = ptr2 - (IntPtr)2 / 2;
}
}
}
}
c = *(ptr += (IntPtr)2 / 2);
}
int num2 = 0;
int num3 = 0;
while (true)
{
if ((c >= '0' && c <= '9') || ((options & NumberStyles.AllowHexSpecifier) != NumberStyles.None && ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))))
{
num |= 4;
if (c != '0' || (num & 8) != 0)
{
if (num2 < 50)
{
number.digits[(IntPtr)(num2++)] = c;
if (c != '0' || parseDecimal)
{
num3 = num2;
}
}
if ((num & 16) == 0)
{
number.scale++;
}
num |= 8;
}
else
{
if ((num & 16) != 0)
{
number.scale--;
}
}
}
else
{
char* ptr2;
if ((options & NumberStyles.AllowDecimalPoint) != NumberStyles.None && (num & 16) == 0 && ((ptr2 = Number.MatchChars(ptr, str4)) != null || (flag && (num & 32) == 0 && (ptr2 = Number.MatchChars(ptr, str2)) != null)))
{
num |= 16;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if ((options & NumberStyles.AllowThousands) == NumberStyles.None || (num & 4) == 0 || (num & 16) != 0 || ((ptr2 = Number.MatchChars(ptr, str5)) == null && (!flag || (num & 32) != 0 || (ptr2 = Number.MatchChars(ptr, str3)) == null)))
{
break;
}
ptr = ptr2 - (IntPtr)2 / 2;
}
}
c = *(ptr += (IntPtr)2 / 2);
}
bool flag3 = false;
number.precision = num3;
number.digits[(IntPtr)num3] = '\0';
if ((num & 4) != 0)
{
if ((c == 'E' || c == 'e') && (options & NumberStyles.AllowExponent) != NumberStyles.None)
{
char* ptr3 = ptr;
c = *(ptr += (IntPtr)2 / 2);
char* ptr2;
if ((ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
c = *(ptr = ptr2);
}
else
{
if ((ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
c = *(ptr = ptr2);
flag3 = true;
}
}
if (c >= '0' && c <= '9')
{
int num4 = 0;
do
{
num4 = num4 * 10 + (int)(c - '0');
c = *(ptr += (IntPtr)2 / 2);
if (num4 > 1000)
{
num4 = 9999;
while (c >= '0' && c <= '9')
{
c = *(ptr += (IntPtr)2 / 2);
}
}
}
while (c >= '0' && c <= '9');
if (flag3)
{
num4 = -num4;
}
number.scale += num4;
}
else
{
ptr = ptr3;
c = *ptr;
}
}
while (true)
{
if (!Number.IsWhite(c) || (options & NumberStyles.AllowTrailingWhite) == NumberStyles.None)
{
bool flag2;
char* ptr2;
if ((flag2 = (((options & NumberStyles.AllowTrailingSign) == NumberStyles.None) ? false : ((num & 1) == 0))) && (ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
num |= 1;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (flag2 && (ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
num |= 1;
number.sign = true;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (c == ')' && (num & 2) != 0)
{
num &= -3;
}
else
{
if ((text == null || (ptr2 = Number.MatchChars(ptr, text)) == null) && (text2 == null || (ptr2 = Number.MatchChars(ptr, text2)) == null))
{
break;
}
text = null;
text2 = null;
ptr = ptr2 - (IntPtr)2 / 2;
}
}
}
}
c = *(ptr += (IntPtr)2 / 2);
}
if ((num & 2) == 0)
{
if ((num & 8) == 0)
{
if (!parseDecimal)
{
number.scale = 0;
}
if ((num & 16) == 0)
{
number.sign = false;
}
}
str = ptr;
return true;
}
}
str = ptr;
return false;
}
public static int Parse(string s)
{
return Number.ParseInt32(s, NumberStyles.Integer, NumberFormatInfo.CurrentInfo);
}
internal unsafe static int ParseInt32(string s, NumberStyles style, NumberFormatInfo info)
{
byte* stackBuffer = stackalloc byte[1 * 114 / 1];
Number.NumberBuffer numberBuffer = new Number.NumberBuffer(stackBuffer);
int result = 0;
Number.StringToNumber(s, style, ref numberBuffer, info, false);
if ((style & NumberStyles.AllowHexSpecifier) != NumberStyles.None)
{
if (!Number.HexNumberToInt32(ref numberBuffer, ref result))
{
throw new OverflowException(Environment.GetResourceString("Overflow_Int32"));
}
}
else
{
if (!Number.NumberToInt32(ref numberBuffer, ref result))
{
throw new OverflowException(Environment.GetResourceString("Overflow_Int32"));
}
}
return result;
}
private unsafe static void StringToNumber(string str, NumberStyles options, ref Number.NumberBuffer number, NumberFormatInfo info, bool parseDecimal)
{
if (str == null)
{
throw new ArgumentNullException("String");
}
fixed (char* ptr = str)
{
char* ptr2 = ptr;
if (!Number.ParseNumber(ref ptr2, options, ref number, info, parseDecimal) || ((ptr2 - ptr / 2) / 2 < str.Length && !Number.TrailingZeros(str, (ptr2 - ptr / 2) / 2)))
{
throw new FormatException(Environment.GetResourceString("Format_InvalidString"));
}
}
}
答案 1 :(得分:5)
我已经为双打写了这个,但没有创建临时子串。它意味着在JSON解析器中使用,因此它根据http://www.json.org/将自身限制为如何在JSON中表示双精度。
它还不是最优的,因为它要求您知道数字的开始和结束位置(begin
和end
参数),因此您必须遍历数字的长度两次才能找到结束的地方。它仍然比double.Parse
快10-15倍,并且它可以相当容易地修改,它在函数内部找到end
,然后作为out
参数返回,以了解您需要的位置继续解析主字符串。
像这样使用:
Parsers.TryParseDoubleFastStream("1", 0, 1, out j);
Parsers.TryParseDoubleFastStream("2.0", 0, 3, out j);
Parsers.TryParseDoubleFastStream("3.5", 0, 3, out j);
Parsers.TryParseDoubleFastStream("-4.5", 0, 4, out j);
Parsers.TryParseDoubleFastStream("50.06", 0, 5, out j);
Parsers.TryParseDoubleFastStream("1000.65", 0, 7, out j);
Parsers.TryParseDoubleFastStream("-10000.8600", 0, 11, out j);
代码可以在这里找到:
https://gist.github.com/3010984(在这里发帖太冗长了。)
而StandardFunctions.IgnoreChar
就像我的目的一样简单:
public static bool IgnoreChar(char c)
{
return c < 33;
}
答案 2 :(得分:4)
将所有此代码粘贴到C#中并调用Test()
。这与您可以直接在字符串数组上操作以使用C#解析数字一样接近。它专为速度而非优雅而打造。为OpenGL图形引擎创建了ParseInt
和ParseFloat
函数,以便从基于文本的3d模型中导入矢量。解析浮标是该过程中的重要瓶颈。这是我能做到的最快速度。
using System.Diagnostics;
private void Test()
{
Stopwatch sw = new Stopwatch();
StringBuilder sb = new StringBuilder();
int iterations = 1000;
// Build a string of 1000000 space separated numbers
for (var n = 0; n < iterations; n++)
{
if (n > 0)
sb.Append(' ');
sb.Append(n.ToString());
}
string numberString = sb.ToString();
// Time the process
sw.Start();
StringToInts(numberString, iterations);
//StringToFloats(numberString, iterations);
sw.Stop();
long proc1 = sw.ElapsedMilliseconds;
Console.WriteLine("iterations: {0} \t {1}ms", iterations, proc1);
}
private unsafe int[] StringToInts(string s, int length)
{
int[] ints = new int[length];
int index = 0;
int startpos = 0;
fixed (char* pStringBuffer = s)
{
fixed (int* pIntBuffer = ints)
{
for (int n = 0; n < s.Length; n++)
{
if (s[n] == ' ' || n == s.Length - 1)
{
if (n == s.Length - 1)
n++;
// pIntBuffer[index++] = int.Parse(new string(pStringBuffer, startpos, n - startpos));
pIntBuffer[index++] = ParseInt((pStringBuffer + startpos), n - startpos);
startpos = n + 1;
}
}
}
}
return ints;
}
private unsafe float[] StringToFloats(string s, int length)
{
float[] floats = new float[length];
int index = 0;
int startpos = 0;
fixed (char* pStringBuffer = s)
{
fixed (float* pFloatBuffer = floats)
{
for (int n = 0; n < s.Length; n++)
{
if (s[n] == ' ' || n == s.Length - 1)
{
if (n == s.Length - 1)
n++;
pFloatBuffer[index++] = ParseFloat((pStringBuffer + startpos), n - startpos); // int.Parse(new string(pStringBuffer, startpos, n - startpos));
startpos = n + 1;
}
}
}
}
return floats;
}
public static unsafe int ParseInt(char* input, int len)
{
int pos = 0; // read pointer position
int part = 0; // the current part (int, float and sci parts of the number)
bool neg = false; // true if part is a negative number
int* ret = stackalloc int[1];
while (pos < len && (*(input + pos) > '9' || *(input + pos) < '0') && *(input + pos) != '-')
pos++;
// sign
if (*(input + pos) == '-')
{
neg = true;
pos++;
}
// integer part
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
part = part * 10 + (input[pos++] - '0');
*ret = neg ? (part * -1) : part;
return *ret;
}
public static unsafe float ParseFloat(char* input, int len)
{
//float ret = 0f; // return value
int pos = 0; // read pointer position
int part = 0; // the current part (int, float and sci parts of the number)
bool neg = false; // true if part is a negative number
float* ret = stackalloc float[1];
// find start
while (pos < len && (input[pos] < '0' || input[pos] > '9') && input[pos] != '-' && input[pos] != '.')
pos++;
// sign
if (input[pos] == '-')
{
neg = true;
pos++;
}
// integer part
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
part = part * 10 + (input[pos++] - '0');
*ret = neg ? (float)(part * -1) : (float)part;
// float part
if (pos < len && input[pos] == '.')
{
pos++;
double mul = 1;
part = 0;
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
{
part = part * 10 + (input[pos] - '0');
mul *= 10;
pos++;
}
if (neg)
*ret -= (float)part / (float)mul;
else
*ret += (float)part / (float)mul;
}
// scientific part
if (pos < len && (input[pos] == 'e' || input[pos] == 'E'))
{
pos++;
neg = (input[pos] == '-'); pos++;
part = 0;
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
{
part = part * 10 + (input[pos++] - '0');
}
if (neg)
*ret /= (float)Math.Pow(10d, (double)part);
else
*ret *= (float)Math.Pow(10d, (double)part);
}
return (float)*ret;
}
答案 3 :(得分:1)
那么,是否有内置函数直接从字符串中的子字符串进行解析(即 使用给定的字符串的开始和长度)以避免生成临时的 串?如果没有,是否有任何库提供有效的功能来执行此操作?
您似乎想要使用lexing缓冲区和词法分析器,类似于OCaml可以为ocamllex
和Lexbuf
缓冲区提供的内容。 (我不能为F#提供参考。)
如果您的基准测试涉及由其他令牌分隔的大量整数是典型案例,那么它将运行良好。但在其他情况下,这可能是不切实际的。
答案 4 :(得分:0)
不确定这是否有用,但您尝试过类似的事情:
var stringValues = input.split(" ");
var intValues = Array.ConvertAll(stringValues, s => int.Parse(s));