我找到了一个解析算法here,但它在ML中并且我对它不太熟悉。为了更好地理解算法,我试图将其转换为像C ++这样的命令式语言。现在你有一些我不确定或不太懂的事情。
这是一个用于解析后缀表达式的标题(AFAIK这在技术上不是标题,但是匹配但我不熟悉函数术语):
parse_postfix(stack, (e, []),
ipts as RATOR (irator as (_, _, POSTFIX)) :: ipts') =
这意味着ipts
是列表ipts'
的头部并且是后缀运算符?为什么内部会有另一场比赛(irator as...
)?它是否将其从列表中删除或进展?或者ipts
移除运算符irator
时列表的其余部分是什么?
我很难翻译这个。这是我到目前为止所编码的内容:
#include <iostream>
#include <map>
#include <stack>
#include <string>
#include <vector>
enum Assoc { Left, Right, Noassoc };
enum Fixity { Prefix, Infix, Postfix };
struct Oper {
std::string Symbol;
int Precedence;
Fixity Fix; // We can't represent bound types that way (INFIX <assoc>)
Assoc Asc; // so we just make it have the operator anyway
Oper(std::string const& s, int p, Fixity f, Assoc a)
: Symbol(s), Precedence(p), Fix(f), Asc(a) { }
};
// A regular AST representation
struct Expr { };
struct ConstExpr : public Expr {
int Value;
ConstExpr(int i) : Value(i) { }
};
struct UryExpr : public Expr {
const Expr *Sub;
Oper *OP;
UryExpr(const Expr *s, Oper *o)
: Sub(s), OP(o) { }
};
struct BinExpr : public Expr {
const Expr *LHS, *RHS;
Oper *OP;
BinExpr(const Expr *l, const Expr *r, Oper *o)
: LHS(l), RHS(r), OP(o) { }
};
bool noparens(Oper *inner, Oper *outer, Assoc side) {
int pi = inner->Precedence, po = outer->Precedence;
Fixity fi = inner->Fix, fo = outer->Fix;
Assoc ai = inner->Asc, ao = outer->Asc;
if (pi > po) return true;
if (side == Left && fi == Postfix) return true;
if (side == Left && fi == Infix && ai == Left) return (fo == Infix && ao == Left);
if (side == Right && fi == Postfix) return true;
if (side == Right && fi == Infix && ai == Right) return (fo == Infix && ao == Right);
if (side == Noassoc) {
if (fi == Infix && fo == Infix) return ai == ao;
return fi == fo;
}
return false;
}
struct StackElem {
Oper *infixop;
const Expr *exp;
std::vector<Oper*> prefixes;
StackElem(Oper* i, const Expr* e, std::vector<Oper*> pref)
: infixop(i), exp(e), prefixes(pref) {}
};
std::map<std::string, Oper*> OperatorMap;
Oper *juxtarator = new Oper(" <juxtarator> ", 100, Infix, Left);
Oper *minrator = new Oper(" <minimal precedence operator> ", -1, Infix, Noassoc);
Oper *srator(std::stack<StackElem> const& st) { return (st.empty() ? minrator : st.top().infixop); }
Oper* get_op(std::string s) {
auto it = OperatorMap.find(s);
if (it == OperatorMap.end()) return nullptr;
return it->second;
}
Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts);
Expr* parse_prefix(const std::stack<StackElem> stack, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts) {
if (!ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* op = get_op(head);
if (!op) return parse_postfix(stack, new ConstExpr(std::atoi(head.c_str())), prefixes, tail);
if (op->Fix == Prefix) {
std::vector<Oper*> newprefix = prefixes;
newprefix.push_back(op);
return parse_prefix(stack, prefixes, tail);
}
else throw std::string("Lookahead is not a prefix operator");
}
else throw std::string("Premature EOF");
}
Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts)
{
if (prefixes.empty() && !ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* irator = get_op(head);
if (irator) {
if (irator->Fix == Postfix) {
if (noparens(srator(stack), irator, Left)) {
if (!stack.empty()) {
StackElem el = stack.top();
std::stack<StackElem> stack_tail = stack;
stack_tail.pop();
return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
}
else throw std::string("Impossible");
}
else if (noparens(irator, srator(stack), Right)) {
return parse_postfix(stack, new UryExpr(e, irator), std::vector<Oper*>(), tail);
}
else throw std::string("Non-associative");
}
else if (irator->Fix == Infix) {
if (noparens(srator(stack), irator, Left)) {
if (!stack.empty()) {
StackElem el = stack.top();
std::stack<StackElem> stack_tail = stack;
stack_tail.pop();
return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
}
else throw std::string("Impossible");
}
else if (noparens(irator, srator(stack), Right)) {
std::stack<StackElem> newstack = stack;
newstack.push(StackElem(irator, e, std::vector<Oper*>()));
return parse_prefix(newstack, std::vector<Oper*>(), tail);
}
else throw std::string("Non-associative");
}
}
}
else if (!prefixes.empty() && !ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* op = prefixes[0];
std::vector<Oper*> newprefixes(prefixes.begin() + 1, prefixes.end());
Oper* irator = get_op(head);
if (irator) {
if (irator->Fix == Postfix) {
if (noparens(op, irator, Noassoc)) {
return parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
}
else if (noparens(irator, op, Noassoc)) {
return parse_postfix(stack, new UryExpr(e, irator), prefixes, tail);
}
else throw std::string("Equal precedence!");
}
else if (irator->Fix == Infix) {
if (noparens(op, irator, Noassoc)) {
parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
}
else if (noparens(irator, op, Noassoc)) {
std::stack<StackElem> newstack = stack;
newstack.push(StackElem(irator, e, prefixes));
return parse_prefix(newstack, std::vector<Oper*>(), tail);
}
else throw std::string("Equal precedence!");
}
}
}
std::vector<std::string> nnip = ipts;
nnip.insert(nnip.begin(), juxtarator->Symbol);
return parse_postfix(stack, e, prefixes, nnip);
}
Expr* parse(std::vector<std::string> input) {
return parse_prefix(std::stack<StackElem>(), std::vector<Oper*>(), input);
}
int main(void)
{
OperatorMap.insert(std::make_pair(minrator->Symbol, minrator));
OperatorMap.insert(std::make_pair(juxtarator->Symbol, juxtarator));
OperatorMap.insert(std::make_pair("+", new Oper("+", 3, Infix, Left)));
std::vector<std::string> tokens = { "2", "+", "3" };
try {
Expr* e = parse(tokens);
}
catch (std::string err) {
std::cout << "Error: " << err << std::endl;
}
system("PAUSE");
return 0;
}
我希望这部分是带有解析前缀的核心,但我不知道如何实现parse_postfix
函数。
编辑:
现在这试图成为完整的测试程序,但由于某些原因它失败了,就像输入&#34; 2&#34; &#34; +&#34; &#34; 3&#34; (或甚至只是一个数字)触发异常(过早的EOF)。
答案 0 :(得分:2)
parse_postfix(stack, (e, []), ipts as RATOR (irator as (_, _, POSTFIX)) :: ipts') = ...
这意味着
ipts
是列表ipts'
的头部并且是后缀运算符?
不完全是。 as
匹配运算符实际上比::
之类的模式构造函数更紧密;添加正确的括号后,ipts
将成为完整列表,其中RATOR ...
为头部,ipts'
(一个元素简称)为尾部:
parse_postfix(stack, (e, []),
ipts as (RATOR (irator as (_, _, POSTFIX)) :: ipts')) = ...
为什么内部有另一场比赛(
irator as...
)?
此处as
匹配运算符用于两个不同的目的:
ipts as (... :: ipts')
和irator as (_, _, POSTFIX)
模式用于保证变量ipts
和irator
涵盖特定子结构的内容,因此在函数体保证ipts
永远不会为空,irator
始终是后缀式 rator (因为否则它不是parse_postfix
& #39;处理它的工作)。
作为小型性能增强。诺曼也可以写下像。
parse_postfix(stack, (e, []),
RATOR (text, prec, POSTFIX) :: ipts') = ...
然后每当他引用RATOR (text, prec, POSTFIX)
时irator
及RATOR (text, prec, POSTFIX :: ipts'
引用ipts
,就会引用irator
。但这更长,更难阅读,并且在引用ipts
和noparens
时需要重新构造已在内存中构造的值(即减少复制)。
相反,辅助函数UNARY
,值构造函数ParseError
,异常irator
等都是为了直接处理ipts
3元组而设计的便利性。
它是否会将其从列表中删除或继续前进?或者
irator
移除运算符ipts'
时列表的其余部分是什么?
有时,差不多。删除irator
时ipts
是列表的其余部分,而ipts
是未删除任何元素的完整列表。根据 if-then-else 中是否引用ipts'
或parse_postfix
,会弹出或不弹出元素。
我希望这部分是带有解析前缀的核心,但我不知道如何实现
T1: o (Rank = 3) T2: o (Rank = 2) / \ | o o o | | o o | *
函数。
我现在不能说。但有一件事是肯定的:如果你坚持使用不可变数据结构,翻译这些函数会简单得多。不过,它不会跑得那么快。