Question

我有一个解析器，基本上打印出堆栈机器的动作，我的运算符优先级给定了一些表达式。我的目标是尽可能优化速度。我已阅读提供an article concerning qi optimizations的this example code。我理解主要文章中描述的优化的要点，但是我不清楚如何将它集成到我的代码中。

以下是我的解析器的以下工作示例。我已经尝试使用raw[]来提供基本迭代器来进行优化。必须给凤凰动作调用提供字符串或迭代器，通过它们可以创建字符串;这些函数的真实版本并不简单，它们的功能还无法在解析时评估：

#include <iostream>
#include <vector>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_char.hpp>
#include <boost/spirit/include/qi_parse.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using std::endl;
using std::cout;
using std::string;
using std::vector;

void fPushOp(const char* op){
  cout << "PushOp: " << op << endl;
}

void fPushInt(const boost::iterator_range<string::const_iterator>& my_str){
  cout << "PushInt: " << my_str << endl;
}

template<typename Iterator, typename Skipper = qi::space_type>
struct Calculator : public qi::grammar<Iterator, Skipper> {

  qi::rule<Iterator, Skipper>  
    expression, logical_or_expression, logical_and_expression, negate_expression, series_expression,
    single_expression, inclusive_or_expression, exclusive_or_expression, and_expression, equality_expression, 
    relational_expression, shift_expression, additive_expression, multiplicative_expression, 
    term, complement_factor, factor, result, integer, variable, variable_combo, word, prefix;

  qi::rule<Iterator> number;
  Calculator() : Calculator::base_type(result)
  {
    number = 
        qi::raw[
            ("0x" >> +qi::char_("0-9a-fA-F"))     
          | ("0b" >> +qi::char_("0-1"))
          | ("0" >>  +qi::char_("0-7"))
          | (+qi::char_("0-9"))
        ] [phx::bind(&fPushInt, qi::_1)]
        ;

    integer = 
          number
        | ('-' >> number) [phx::bind(&fPushOp, "OP_UNARY_MINUS")]
        ;

    variable =
          ((qi::alpha | qi::char_('_')) 
              >> *(qi::alnum | qi::char_('_')) 
              >> '['
              >>  (+(qi::alnum | qi::char_('_') | qi::char_(',')) 
                | ('\'' >> *~qi::char_('\'') >> '\'')) 
              >> ']')
        | ((qi::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_')))
        ;

    variable_combo =
        qi::raw [
          variable >> *(qi::char_('.') >> variable)
        ] [phx::bind(&fPushInt, qi::_1)]
        ;

    word = 
        qi::raw[
          variable
        ] [phx::bind(&fPushInt, qi::_1)]
        ;

    factor =
            ("ceil(" >> expression >> ')')                                                      [phx::bind(&fPushOp, "OP_CEIL")]
        |   ("wrap(" >> expression >> ')')                                                      [phx::bind(&fPushOp, "OP_WRAP")]
        |   ("abs(" >> expression >> ')')                                                       [phx::bind(&fPushOp, "OP_ABS")]
        |   ("count1(" >> expression >> ')')                                                    [phx::bind(&fPushOp, "OP_COUNT1")]
        |   ("pick(" >> expression >> ')')                                                      [phx::bind(&fPushOp, "OP_PICK")]
        |   ("defined(" >> expression >> ')')                                                   [phx::bind(&fPushOp, "OP_DEF")]
        |   ("string_equal(" >> word >> ',' >> word >> ')')                                     [phx::bind(&fPushOp, "OP_STREQ")]
        |   ("string_contains(" >> word >> ',' >> word >> ')')                                  [phx::bind(&fPushOp, "OP_STRCON")]
        |   ("lsl(" >> single_expression >> ',' >> single_expression >> ',' >> number >> ')')   [phx::bind(&fPushOp, "OP_LSL")]
        |   ("lsr(" >> single_expression >> ',' >> single_expression >> ')')                    [phx::bind(&fPushOp, "OP_LSR")]
        |   ("asr(" >> single_expression >> ',' >> single_expression >> ',' >> number >> ')')   [phx::bind(&fPushOp, "OP_ASR")]
        |   ("ror(" >> single_expression >> ',' >> single_expression >> ',' >> number >> ')')   [phx::bind(&fPushOp, "OP_ROR")]
        |   ("rrx(" >> single_expression >> ',' >> single_expression >> ',' >> single_expression >> ',' >> number >> ')')[phx::bind(&fPushOp, "OP_RRX")]
        |   ('(' >> expression >> ')')
        |   variable_combo
        |   integer
        ;
    complement_factor = factor
        | ('~' >> factor) [phx::bind(&fPushOp, "OP_COMPLEMENT")]
        ;
    term = complement_factor
      >> *( (".." >> complement_factor) [phx::bind(&fPushOp, "OP_LEGER")]
          | ('\\' >> complement_factor) [phx::bind(&fPushOp, "OP_MASK")]
          ); 
    multiplicative_expression = term
      >> *( ('/' >> term) [phx::bind(&fPushOp, "OP_DIV")]
          | ('%' >> term) [phx::bind(&fPushOp, "OP_MOD")]
          | ('*' >> term) [phx::bind(&fPushOp, "OP_MUL")]
          );
    additive_expression = multiplicative_expression
      >> *( ('+' >> multiplicative_expression)  [phx::bind(&fPushOp, "OP_ADD")]
          | ('-' >> multiplicative_expression)  [phx::bind(&fPushOp, "OP_SUB")]
          );
    shift_expression = additive_expression
      >> *( (">>" >> additive_expression) [phx::bind(&fPushOp, "OP_SRL")]
          | ("<<" >> additive_expression) [phx::bind(&fPushOp, "OP_SLL")]
          );
    relational_expression = shift_expression
      >> *( ('<' >> shift_expression) [phx::bind(&fPushOp, "OP_LT")]
          | ('>' >> shift_expression) [phx::bind(&fPushOp, "OP_GT")]
          | ("<=" >> shift_expression)[phx::bind(&fPushOp, "OP_LET")]
          | (">=" >> shift_expression)[phx::bind(&fPushOp, "OP_GET")]
          );
    equality_expression = relational_expression 
      >> *( ("==" >> relational_expression)[phx::bind(&fPushOp, "OP_EQ")]
          | ("!=" >> relational_expression)[phx::bind(&fPushOp, "OP_NEQ")] 
          );
    and_expression = equality_expression 
      >> *(('&' >> equality_expression)     [phx::bind(&fPushOp, "OP_AND")]); 
    exclusive_or_expression = and_expression 
      >> *(('^' >> and_expression)          [phx::bind(&fPushOp, "OP_XOR")]); 
    inclusive_or_expression = exclusive_or_expression 
      >> *(('|' >> exclusive_or_expression) [phx::bind(&fPushOp, "OP_OR")]); 
    single_expression = inclusive_or_expression;
    series_expression = inclusive_or_expression 
      >> *((',' >> inclusive_or_expression) [phx::bind(&fPushOp, "OP_SERIES")]);
    negate_expression = series_expression
        | ('!' >> series_expression)        [phx::bind(&fPushOp, "OP_NEGATE")];
    logical_and_expression = negate_expression
      >> *(("&&" >> negate_expression)      [phx::bind(&fPushOp, "OP_LOGICAL_AND")]); 
    logical_or_expression = logical_and_expression 
      >> *(("||" >> logical_and_expression) [phx::bind(&fPushOp, "OP_LOGICAL_OR")]);
    expression = logical_or_expression;

    result = expression;
  }
};

int main(){
  Calculator<string::const_iterator> calc;
  const string expr("0xff0000 >> 3 && 3 + (!9) | (0,200)");
  cout << "Expression: " << expr << endl;

  string::const_iterator it = expr.begin();
  phrase_parse(it, expr.end(), calc, qi::space);

  cout << "Remaining: " << (string(it,expr.end())) << endl;
  return 0;
}

此外，我读了the slides from this pdf concerning utree并试图弄清楚如果可能的话，如何使用utree输出而不是语义动作，因为显然这些事情是邪恶的。有人可以提供或指出一个关于如何构造utree的简单示例，然后可以将其提供给堆栈计算机以按顺序打印出操作吗？

Answer 1

优化取决于您想要实现的目标。因此，我认为你过早地进行了优化。

E.g。将variable_combo解析为raw[]输入序列如果您想稍后解释符号没有任何意义（因为您必须再次解析变量combo ，并且你甚至不得不预测那里的空格："foo . bar .tux"这里是一个有效的变量组合。）

我有很多关于优化Boost Spirit的帖子（例如，启动here）。快速观察：

考虑回溯的正确性;用你的语法解析'ceil（3.7'），你会得到：
```
Expression: ceil(3.7)
PushInt: 3
PushInt: ceil
Remaining: (3.7)
```
注意解析失败时如何发出操作码。另请注意，它会发出错误的操作码
- 推送3而不是3.7
- 它推送ceil作为PushInt？
因此，它不仅检测到解析失败太晚，它只是忽略括号，无法发现函数调用并解析错误的数字。

关于过早评估，我将指出这个流行的答案：Boost Spirit: "Semantic actions are evil"?

除此之外，我只是确认我怀疑你是在做过早的优化。考虑做
```
#define BOOST_SPIRIT_DEBUG
```
然后在语法构造函数中：
```
BOOST_SPIRIT_DEBUG_NODES(
        (expression)(logical_or_expression)(logical_and_expression)(negate_expression)(series_expression)(single_expression)
        (inclusive_or_expression)(exclusive_or_expression)(and_expression)(equality_expression)(relational_expression)
        (shift_expression)(additive_expression)(multiplicative_expression)(term)(complement_factor)(factor)(result)(integer)
        (variable)(variable_combo)(word)(prefix)
```
要真正了解解析器的行为方式。

考虑qi :: symbols例如：

qi::symbols<char,const char*> unary_function;

unary_function.add
    ("ceil",    "OP_CEIL")
    ("wrap",    "OP_WRAP")
    ("abs",     "OP_ABS")
    ("count1",  "OP_COUNT1")
    ("pick",    "OP_PICK")
    ("defined", "OP_DEF");

unary_call = (unary_function >> "(" >> expression >> ')') [phx::bind(&fPushOp, qi::_1)];

特征可能会让内部编译器在内联后更有可能进行优化（与语义操作相反，因为许多级别的模板实例化会掩盖某些情况，特别是涉及bind时）

您可能希望在此处创建运算符优先级表，正如某些精神示例所示。使用规则层次结构强制执行优先级的传统方法使语法复杂化。这有两个关键的缺点：

每条规则引入虚拟调度（Spirit X3将来可能不再具有此限制）
你的语法变得如此复杂，以至于你已经失去了概述（见第一个子弹）

优化boost :: spirit :: qi解析器

1 个答案:

推荐