帮助优化我的RPN评估功能

时间:2009-10-19 13:50:57

标签: c# parsing optimization infix-notation postfix-notation

我的解析器通过首先从中缀转换为后缀来评估PEMDAS表达式,然后使用标准的后缀评估规则。我解析表达式并将标记存储在列表中。这个预编译对我来说没问题,因为我计划缓存预编译的函数。

我正在尝试优化评估功能(请参阅代码中的Evaluate04)。在我的硬件上,我可以在不到600毫秒的时间内获得1,000,000次评估。说实话,我相信这很快。比数据库访问调用获取表达式要快得多。

我想看看你们是否可以加快速度。微优化,完全重构类。它的完成方式无关紧要。

这是我能够获得它的速度,你能改进吗?

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;

namespace ParseTest2
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        enum Operator {Add, Subtract, Multiply, Divide}

        enum TokenType { Variable, Value, Add, Subtract, Multiply, Divide };

        private class TokenFactory
        {
            public static Token Add
            {
                get { return new Token(TokenType.Add); }
            }
            public static Token Subtract
            {
                get { return new Token(TokenType.Subtract); }
            }
            public static Token Multiply
            {
                get { return new Token(TokenType.Multiply); }
            }
            public static Token Divide
            {
                get { return new Token(TokenType.Divide); }
            }
            public static Token Val(float value)
            {
                return new Token(value);
            }
            public static Token Var(string variable)
            {
                return new Token(variable);
            }  
        }

        private class Token
        {
            public readonly TokenType TokenType;
            public float Value;
            public readonly string Variable;

            public Token(TokenType tokenType)
            {
                TokenType = tokenType;
                //Value = 0;
                //Variable = null;                
            }
            public Token(float value)
            {
                TokenType = TokenType.Value;
                Value = value;
                //Variable = null;
            }
            public Token(string variable)
            {
                //TokenType = TokenType.Variable;
                Variable = variable;
                //Value = 0;
            }
            public override string ToString()
            {
                return
                    Expression.IsOperand(this) ?
                    string.Format("{0} {1} {2}", TokenType, Variable ?? "", Value) :
                    string.Format("{0}", TokenType);
            }
        }

        class Expression
        {
            List<Token> _tokens;

            public Action<Token> SetVariableValue;

            public Expression(string expression)
            {
                //time to convert to postfix from infix
                var infix = expression.Split(' ');
                string postfix = string.Empty;

                Action<string> add = s => {postfix += " " + s;};
                var ops = new Stack<string>();

                Func<string, int> getPrecedence = value =>
                {
                    switch(value)
                    {
                        case "*":
                        case "/":
                            return 1;
                        case "+":
                        case "-":
                            return 0;

                    }
                    return -1;
                };

                Func<string, bool> isLowerOrEqualPrecedence = val => 
                {
                    if (ops.Count == 0) return false;
                    var op = ops.Peek();
                    int cur = getPrecedence(val);
                    int top = getPrecedence(op);
                    return cur <= top;
                };

                foreach (string val in infix)
                {
                    if ("-+*/".Contains(val))
                    {
                        if (ops.Count == 0)
                            ops.Push(val);
                        else
                        {
                            if (!isLowerOrEqualPrecedence(val))
                            {
                                ops.Push(val);
                            }
                            else
                            {
                                while (isLowerOrEqualPrecedence(val))
                                {
                                    add(ops.Pop());
                                }
                                ops.Push(val);
                            }
                        }
                    }
                    else if (val == "(")
                        ops.Push(val);
                    else if (val == ")")
                    {
                        while (true)
                        {
                            var op = ops.Pop();
                            if (op == "(") break;
                            add(op);
                        }
                    }
                    else
                    {
                        add(val);
                    }                    
                }

                while (ops.Count > 0)
                {
                    add(ops.Pop());
                }


                _tokens = new List<Token>();
                foreach (string x in postfix.Trim().Split(' '))
                { 
                    switch(x)
                    {
                        case "*":
                            _tokens.Add(TokenFactory.Multiply);
                            break;
                        case "/":
                            _tokens.Add(TokenFactory.Divide);
                            break;
                        case "+":
                            _tokens.Add(TokenFactory.Add);
                            break;                        
                        case "-":
                            _tokens.Add(TokenFactory.Subtract);
                            break;
                        default:
                            _tokens.Add(TokenFactory.Var(x));
                            break;
                    }
                }                             
            }

            public static bool IsOperand(Token tk)
            {
                return tk.TokenType == TokenType.Variable || tk.TokenType == TokenType.Value;
            }



            public float Evaluate04()
            {
                Token[] tokens = _tokens.ToArray();

                int i;

                for (i = tokens.Length - 1; i >= 0; --i)
                    if (tokens[i].TokenType == TokenType.Variable)
                        SetVariableValue(tokens[i]);

                int tokenCount = tokens.Length;

                while (true)
                {
                    int i1 = 0, i2 = 0, i3 = 0;
                    //i1 = i2 = i3 = -1;
                    int j;
                    Token t1, t2, t3;

                    //looking for operator preceded by two operands
                    for (i = 0; i < tokens.Length - 2; ++i)
                    //i = 0;
                    //while( true )
                    {
                        t1 = null;
                        t2 = null;
                        t3 = null;

                        j = i;
                        do
                        {
                            t1 = tokens[j];
                            i1 = j++;
                        } while (t1 == null);


                        do
                        {
                            t2 = tokens[j];
                            i2 = j++;
                        } while (t2 == null);

                        do
                        {
                            t3 = tokens[j];
                            i3 = j++;
                        } while (t3 == null);

                        //it's a binary operation if t3 is an operator and t2, t1 are operands
                        //if (!IsOperand(t3) && IsOperand(t2) && IsOperand(t1))
                        if (
                            !(t3.TokenType == TokenType.Variable || t3.TokenType == TokenType.Value) &&
                             (t2.TokenType == TokenType.Variable || t2.TokenType == TokenType.Value) &&
                             (t1.TokenType == TokenType.Variable || t1.TokenType == TokenType.Value))
                        {
                            float val;

                            if (t3.TokenType == TokenType.Add)
                                val = t1.Value + t2.Value;
                            else if (t3.TokenType == TokenType.Divide)
                                val = t1.Value / t2.Value;
                            else if (t3.TokenType == TokenType.Subtract)
                                val = t1.Value - t2.Value;
                            else if (t3.TokenType == TokenType.Multiply)
                                val = t1.Value * t2.Value;
                            else
                                val = int.MinValue;

                            //we're done, return
                            if (tokenCount == 3)
                                return val;

                            tokenCount -= 2;

                            //ok, we are done processing these, null them
                            tokens[i1] = new Token(val);
                            tokens[i2] = null;
                            tokens[i3] = null;

                            //break, for loop and repeat until done
                            break;
                        }
                    }
                }
            }
        }
        private void Form1_Load(object sender, EventArgs e)
        {

            Action<Token> setVariableValue = token =>
            {
                if (token.Variable == "A")
                    token.Value = 4;
                else if (token.Variable == "B")
                    token.Value = 5;
                else if (token.Variable == "C")
                    token.Value = 7;
                else if (token.Variable == "D")
                    token.Value = 2;
            };

            //spaces are required because I'm splitting on them.
            //I know it's lame, it's a detail I'll address later...
            var x = new Expression("( A + B ) / ( C + D )");
            x.SetVariableValue = setVariableValue;
            Func<float> eval = x.Evaluate04;
            eval();
            int count = 1000000;
            float res = 0;

            var sw = new System.Diagnostics.Stopwatch();

            //sw.Start();
            //Parallel.ForEach(Enumerable.Range(1, count), i =>
            //{
            //    res = eval();
            //});
            //sw.Stop();
            //Console.WriteLine("{0} takes: {1}", "parallel", sw.ElapsedMilliseconds);

            sw.Reset();
            sw.Start();

            //for(int i=0; i<count; ++i)
            foreach (int i in Enumerable.Range(1, count))
            {
                res = eval();
            }
            sw.Stop();
            Console.WriteLine("{0} takes: {1}", "single thread", sw.ElapsedMilliseconds);
            Console.WriteLine("{0}", res);

        }

        private void Form1_KeyDown(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.Escape)
                Close();

        }         
    }
}

1 个答案:

答案 0 :(得分:1)

如果您只执行一次并多次进行评估,则从解析树到RPN的处理不应该是性能问题。

所有这些令牌爵士乐和索引令牌数组是什么?

为什么不只是有一个堆栈,像这样:

for (i = 0; i < n; i++){
  switch(op[i]){
    case VARIABLE:
      stk[j++] = <value_of_variable>;
      break;
    case PLUS:
      temp = stk[--j];
      stk[j-1] += temp;
      break;
    // and so on...
  }
}

在内部循环中,您不应该进行任何内存分配。 你应该做的最昂贵的事情是查找变量的值。

The easiest way to find out what is taking the most time is this.

第二种最简单的方法是在反汇编级别单步执行。如果你发现它进入new之类的任何系统例程,请快速停止。迭代器也是如此。