我正在构建一个命令语言的解析器,我从各种样本中拼凑起来。我已经阅读了Boost Spirit Qi和Lex文档,我认为我理解基础知识,但从我读过的内容来看,我应该避免使用属性并使用utree。我在utree上找到的文档基本上都很糟糕。鉴于以下代码,我有以下问题:
如何使命令令牌不区分大小写,但不更改带引号的字符串的内容?
#include <Windows.h>
#include <conio.h>
#include <string>
#include <vector>
#include <iostream>
#define BOOST_SPIRIT_DEBUG
#include <boost\spirit\include\qi.hpp>
#include <boost\spirit\include\phoenix.hpp>
#include <boost\spirit\include\lex.hpp>
#include <boost\spirit\include\lex_lexertl.hpp>
using namespace std;
using namespace boost::spirit;
using boost::spirit::utree;
//
// Tokens used by the command grammar
//
template <typename Lexer>
struct command_tokens : lex::lexer <Lexer>
{
command_tokens () :
//
// Verbs, with abbreviation (just enough characters to make each unique)
//
boot ("B(O(O(T)?)?)?"),
exit ("E(X(I(T)?)?)?"),
help ("H(E(L(P)?)?)?"),
dash_help ("-H(E(L(P)?)?)?"),
slash_help ("\\/H(E(L(P)?)?)?"),
load ("L(O(A(D)?)?)?"),
quit ("Q(U(I(T)?)?)?"),
set ("SE(T)?"),
show ("SH(O(W)?)?"),
//
// Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
//
debug ("DEB(U(G)?)?"),
drive ("DRI(V(E)?)?"),
trace ("TRA(C(E)?)?"),
//
// Qualifiers
//
on ("ON"),
off ("OFF"),
//
// Tokens to pass back to the grammar
//
quoted_string ("...")
{
using namespace boost::spirit::lex;
//
// Associate the tokens with the lexer
//
this->self
= boot
| exit
| help
| dash_help
| slash_help
| load
| quit
| set
| show
| debug
| drive
| trace
| off
| on
| quoted_string
;
//
// Define whitespace to ignore: space, tab, newline
//
this->self ("WS")
= lex::token_def <> ("[ \\t\\n]+")
;
}
lex::token_def <> boot;
lex::token_def <> dash_help;
lex::token_def <> debug;
lex::token_def <string> drive;
lex::token_def <> exit;
lex::token_def <> help;
lex::token_def <> load;
lex::token_def <> off;
lex::token_def <> on;
lex::token_def <> quit;
lex::token_def <string> quoted_string;
lex::token_def <> set;
lex::token_def <> show;
lex::token_def <> slash_help;
lex::token_def <> trace;
};
//
// Display parse error
//
struct error_handler_
{
template <typename, typename, typename>
struct result
{
typedef void type;
};
template <typename Iterator>
void operator ()
(
qi::info const& What,
Iterator Err_pos,
Iterator Last
) const
{
cout << "Error! Expecting "
<< What
<< " here: \""
<< string (Err_pos, Last)
<< "\""
<< endl;
}
};
boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
//
// Grammar describing the valid commands
//
template <typename Iterator, typename Lexer>
struct command_grammar : qi::grammar <Iterator>
{
template <typename Lexer>
command_grammar (command_tokens <Lexer> const& Tok) :
command_grammar::base_type (start)
{
using qi::on_error;
using qi::fail;
using qi::char_;
start
= +commands;
commands
= (
boot_command
| exit_command
| help_command
| load_command
| set_command
| show_command
);
boot_command
= Tok.boot;
exit_command
= Tok.exit
| Tok.quit;
help_command
= Tok.help
| Tok.dash_help
| Tok.slash_help;
load_command
= Tok.load >> Tok.quoted_string;
set_command
= Tok.set;
show_command
= Tok.show;
set_property
= debug_property
| drive_property
| trace_property;
debug_property
= Tok.debug >> on_off;
drive_property
= Tok.drive >> char_ ("A-Z") >> char_ (":");
trace_property
= Tok.trace >> on_off;
on_off
= Tok.on
| Tok.off;
BOOST_SPIRIT_DEBUG_NODE (start);
BOOST_SPIRIT_DEBUG_NODE (commands);
BOOST_SPIRIT_DEBUG_NODE (boot_command);
BOOST_SPIRIT_DEBUG_NODE (exit_command);
BOOST_SPIRIT_DEBUG_NODE (help_command);
BOOST_SPIRIT_DEBUG_NODE (load_command);
BOOST_SPIRIT_DEBUG_NODE (quit_command);
BOOST_SPIRIT_DEBUG_NODE (set_command);
BOOST_SPIRIT_DEBUG_NODE (show_command);
BOOST_SPIRIT_DEBUG_NODE (set_property);
BOOST_SPIRIT_DEBUG_NODE (debug_property);
BOOST_SPIRIT_DEBUG_NODE (drive_property);
BOOST_SPIRIT_DEBUG_NODE (trace_property);
BOOST_SPIRIT_DEBUG_NODE (target_property);
on_error <fail> (start, error_handler (_4, _3, _2));
}
qi::rule <Iterator> start;
qi::rule <Iterator> commands;
qi::rule <Iterator> boot_command;
qi::rule <Iterator> exit_command;
qi::rule <Iterator> help_command;
qi::rule <Iterator> load_command;
qi::rule <Iterator> quit_command;
qi::rule <Iterator> set_command;
qi::rule <Iterator> show_command;
qi::rule <Iterator> set_property;
qi::rule <Iterator> debug_property;
qi::rule <Iterator, string ()> drive_property;
qi::rule <Iterator> target_property;
qi::rule <Iterator> trace_property;
qi::rule <Iterator> on_off;
};
int
main
(
int Argc,
PCHAR Argv
)
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token <base_iterator_type> token_type;
typedef lex::lexertl::lexer <token_type> lexer_type;
typedef command_tokens <lexer_type> command_tokens;
typedef command_tokens::iterator_type iterator_type;
typedef command_grammar <iterator_type, command_tokens::lexer_def> command_grammar;
command_tokens tokens;
command_grammar commands (tokens);
string input = "SET DRIVE C:";
string::iterator it = input.begin ();
iterator_type iter = tokens.begin (it, input.end ());
iterator_type end = tokens.end ();
string ws ("WS");
bool result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
if (result)
{
cout << "Parse succeeded" << endl;
}
else
{
string rest (it, input.end ());
cout << "Parse failed" << endl;
cout << "Stopped at " << rest << endl;
}
return 0;
} // End of main
答案 0 :(得分:2)
我将支持大部分代码,原因很简单,经验告诉我Lex
和utree
通常不是您想要使用的。
你想要的是定义一个AST来表示你的命令语言,然后提出一个语法来构建它。
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot;
struct help;
struct load;
struct exit;
struct set;
struct show;
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
}
完整代码仅添加调试输出帮助程序。这是完整的Fusion Adaption:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
我在这里做出一些选择:
让白色空间和不区分大小写,允许行分隔命令:(另见Boost spirit skipper issues)
start = skip(blank) [lazy_command % eol];
让我们使用Nabialek Trick将命令与前缀相关联。我使用了一段非常简单的代码片段来生成唯一的前缀:
std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
for (auto const full : verbs)
for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n";
}
您可以对属性执行相同操作,但我认为当前设置更简单:
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
<强> Live On Coliru 强>
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
打印:
----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything
at all"' -----
Parsed LOAD (anything
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()
<强> Live On Coliru 强>
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot {};
struct help {};
struct load {};
struct exit {};
struct set {};
struct show {};
static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
template <typename Tag>
static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) {
return os << Tag{} << " " << boost::fusion::as_vector(command);
}
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
问。如何使用utree注释解析器以创建AST?
问。如何在构建完成后走出去,发现解析的内容?
问。我想添加评论字符“!”。那么,在那之后我怎么能忽略一切 - 除非它出现在一个带引号的字符串中?
只需使Skipper
类型成为解析的规则,例如:
qi::rule<Iterator> my_skipper;
my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
然后使用它而不是像skip(blank)
skip(my_skipper)
问。为什么我的错误处理程序在无效输入时被调用?
operator>
而不是operator>>
)。如果不这样做,则无法匹配子表达式只是回溯。问。如何使命令令牌不区分大小写,但不更改带引号的字符串的内容?