我的解析器通过首先从中缀转换为后缀来评估PEMDAS表达式,然后使用标准的后缀评估规则。我解析表达式并将标记存储在列表中。这个预编译对我来说没问题,因为我计划缓存预编译的函数。
我正在尝试优化评估功能(请参阅代码中的Evaluate04)。在我的硬件上,我可以在不到600毫秒的时间内获得1,000,000次评估。说实话,我相信这很快。比数据库访问调用获取表达式要快得多。
我想看看你们是否可以加快速度。微优化,完全重构类。它的完成方式无关紧要。
这是我能够获得它的速度,你能改进吗?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
namespace ParseTest2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
enum Operator {Add, Subtract, Multiply, Divide}
enum TokenType { Variable, Value, Add, Subtract, Multiply, Divide };
private class TokenFactory
{
public static Token Add
{
get { return new Token(TokenType.Add); }
}
public static Token Subtract
{
get { return new Token(TokenType.Subtract); }
}
public static Token Multiply
{
get { return new Token(TokenType.Multiply); }
}
public static Token Divide
{
get { return new Token(TokenType.Divide); }
}
public static Token Val(float value)
{
return new Token(value);
}
public static Token Var(string variable)
{
return new Token(variable);
}
}
private class Token
{
public readonly TokenType TokenType;
public float Value;
public readonly string Variable;
public Token(TokenType tokenType)
{
TokenType = tokenType;
//Value = 0;
//Variable = null;
}
public Token(float value)
{
TokenType = TokenType.Value;
Value = value;
//Variable = null;
}
public Token(string variable)
{
//TokenType = TokenType.Variable;
Variable = variable;
//Value = 0;
}
public override string ToString()
{
return
Expression.IsOperand(this) ?
string.Format("{0} {1} {2}", TokenType, Variable ?? "", Value) :
string.Format("{0}", TokenType);
}
}
class Expression
{
List<Token> _tokens;
public Action<Token> SetVariableValue;
public Expression(string expression)
{
//time to convert to postfix from infix
var infix = expression.Split(' ');
string postfix = string.Empty;
Action<string> add = s => {postfix += " " + s;};
var ops = new Stack<string>();
Func<string, int> getPrecedence = value =>
{
switch(value)
{
case "*":
case "/":
return 1;
case "+":
case "-":
return 0;
}
return -1;
};
Func<string, bool> isLowerOrEqualPrecedence = val =>
{
if (ops.Count == 0) return false;
var op = ops.Peek();
int cur = getPrecedence(val);
int top = getPrecedence(op);
return cur <= top;
};
foreach (string val in infix)
{
if ("-+*/".Contains(val))
{
if (ops.Count == 0)
ops.Push(val);
else
{
if (!isLowerOrEqualPrecedence(val))
{
ops.Push(val);
}
else
{
while (isLowerOrEqualPrecedence(val))
{
add(ops.Pop());
}
ops.Push(val);
}
}
}
else if (val == "(")
ops.Push(val);
else if (val == ")")
{
while (true)
{
var op = ops.Pop();
if (op == "(") break;
add(op);
}
}
else
{
add(val);
}
}
while (ops.Count > 0)
{
add(ops.Pop());
}
_tokens = new List<Token>();
foreach (string x in postfix.Trim().Split(' '))
{
switch(x)
{
case "*":
_tokens.Add(TokenFactory.Multiply);
break;
case "/":
_tokens.Add(TokenFactory.Divide);
break;
case "+":
_tokens.Add(TokenFactory.Add);
break;
case "-":
_tokens.Add(TokenFactory.Subtract);
break;
default:
_tokens.Add(TokenFactory.Var(x));
break;
}
}
}
public static bool IsOperand(Token tk)
{
return tk.TokenType == TokenType.Variable || tk.TokenType == TokenType.Value;
}
public float Evaluate04()
{
Token[] tokens = _tokens.ToArray();
int i;
for (i = tokens.Length - 1; i >= 0; --i)
if (tokens[i].TokenType == TokenType.Variable)
SetVariableValue(tokens[i]);
int tokenCount = tokens.Length;
while (true)
{
int i1 = 0, i2 = 0, i3 = 0;
//i1 = i2 = i3 = -1;
int j;
Token t1, t2, t3;
//looking for operator preceded by two operands
for (i = 0; i < tokens.Length - 2; ++i)
//i = 0;
//while( true )
{
t1 = null;
t2 = null;
t3 = null;
j = i;
do
{
t1 = tokens[j];
i1 = j++;
} while (t1 == null);
do
{
t2 = tokens[j];
i2 = j++;
} while (t2 == null);
do
{
t3 = tokens[j];
i3 = j++;
} while (t3 == null);
//it's a binary operation if t3 is an operator and t2, t1 are operands
//if (!IsOperand(t3) && IsOperand(t2) && IsOperand(t1))
if (
!(t3.TokenType == TokenType.Variable || t3.TokenType == TokenType.Value) &&
(t2.TokenType == TokenType.Variable || t2.TokenType == TokenType.Value) &&
(t1.TokenType == TokenType.Variable || t1.TokenType == TokenType.Value))
{
float val;
if (t3.TokenType == TokenType.Add)
val = t1.Value + t2.Value;
else if (t3.TokenType == TokenType.Divide)
val = t1.Value / t2.Value;
else if (t3.TokenType == TokenType.Subtract)
val = t1.Value - t2.Value;
else if (t3.TokenType == TokenType.Multiply)
val = t1.Value * t2.Value;
else
val = int.MinValue;
//we're done, return
if (tokenCount == 3)
return val;
tokenCount -= 2;
//ok, we are done processing these, null them
tokens[i1] = new Token(val);
tokens[i2] = null;
tokens[i3] = null;
//break, for loop and repeat until done
break;
}
}
}
}
}
private void Form1_Load(object sender, EventArgs e)
{
Action<Token> setVariableValue = token =>
{
if (token.Variable == "A")
token.Value = 4;
else if (token.Variable == "B")
token.Value = 5;
else if (token.Variable == "C")
token.Value = 7;
else if (token.Variable == "D")
token.Value = 2;
};
//spaces are required because I'm splitting on them.
//I know it's lame, it's a detail I'll address later...
var x = new Expression("( A + B ) / ( C + D )");
x.SetVariableValue = setVariableValue;
Func<float> eval = x.Evaluate04;
eval();
int count = 1000000;
float res = 0;
var sw = new System.Diagnostics.Stopwatch();
//sw.Start();
//Parallel.ForEach(Enumerable.Range(1, count), i =>
//{
// res = eval();
//});
//sw.Stop();
//Console.WriteLine("{0} takes: {1}", "parallel", sw.ElapsedMilliseconds);
sw.Reset();
sw.Start();
//for(int i=0; i<count; ++i)
foreach (int i in Enumerable.Range(1, count))
{
res = eval();
}
sw.Stop();
Console.WriteLine("{0} takes: {1}", "single thread", sw.ElapsedMilliseconds);
Console.WriteLine("{0}", res);
}
private void Form1_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode == Keys.Escape)
Close();
}
}
}
答案 0 :(得分:1)
如果您只执行一次并多次进行评估,则从解析树到RPN的处理不应该是性能问题。
所有这些令牌爵士乐和索引令牌数组是什么?
为什么不只是有一个堆栈,像这样:
for (i = 0; i < n; i++){
switch(op[i]){
case VARIABLE:
stk[j++] = <value_of_variable>;
break;
case PLUS:
temp = stk[--j];
stk[j-1] += temp;
break;
// and so on...
}
}
在内部循环中,您不应该进行任何内存分配。 你应该做的最昂贵的事情是查找变量的值。
The easiest way to find out what is taking the most time is this.
第二种最简单的方法是在反汇编级别单步执行。如果你发现它进入new
之类的任何系统例程,请快速停止。迭代器也是如此。