使用Boost Spirit解析命令语言

时间:2017-05-30 07:10:11

标签: c++ parsing scripting boost-spirit boost-spirit-qi

我正在构建一个命令语言的解析器,我从各种样本中拼凑起来。我已经阅读了Boost Spirit Qi和Lex文档,我认为我理解基础知识,但从我读过的内容来看,我应该避免使用属性并使用utree。我在utree上找到的文档基本上都很糟糕。鉴于以下代码,我有以下问题:

  1. 如何使用utree注释解析器以创建AST?
  2. 如何在构建后行走,发现解析的内容?例如for token-only命令,例如SET DEBUG ON,以及带有值的命令,例如LOAD“file.ext”或SET DRIVE C:
  3. 我想添加评论字符“!”。那么,我怎么能忽略之后的所有内容 - 除非它出现在带引号的字符串中?
  4. 为什么我的错误处理程序在无效输入时被调用?
  5. 如何使命令令牌不区分大小写,但不更改带引号的字符串的内容?

    #include <Windows.h>
    #include <conio.h>
    #include <string>
    #include <vector>
    #include <iostream>
    
    #define BOOST_SPIRIT_DEBUG
    
    #include <boost\spirit\include\qi.hpp>
    #include <boost\spirit\include\phoenix.hpp>
    #include <boost\spirit\include\lex.hpp>
    #include <boost\spirit\include\lex_lexertl.hpp>
    
    using namespace std;
    using namespace boost::spirit;
    using boost::spirit::utree;
    
    //
    // Tokens used by the command grammar
    //
    
    template <typename Lexer>
    struct command_tokens : lex::lexer <Lexer>
        {
        command_tokens () :
    
            //
            // Verbs, with abbreviation (just enough characters to make each unique)
            //
    
            boot        ("B(O(O(T)?)?)?"),
            exit        ("E(X(I(T)?)?)?"),
            help        ("H(E(L(P)?)?)?"),
            dash_help   ("-H(E(L(P)?)?)?"),
            slash_help  ("\\/H(E(L(P)?)?)?"),
            load        ("L(O(A(D)?)?)?"),
            quit        ("Q(U(I(T)?)?)?"),
            set         ("SE(T)?"),
            show        ("SH(O(W)?)?"),
    
            //
            // Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
            //
    
            debug       ("DEB(U(G)?)?"),
            drive       ("DRI(V(E)?)?"),
            trace       ("TRA(C(E)?)?"),
    
            //
            // Qualifiers
            //
    
            on          ("ON"),
            off         ("OFF"),
    
            //
            // Tokens to pass back to the grammar
            //
    
            quoted_string   ("...")
    
            {
            using namespace boost::spirit::lex;
    
            //
            // Associate the tokens with the lexer
            //
    
            this->self 
                = boot
                | exit
                | help
                | dash_help
                | slash_help
                | load
                | quit
                | set
                | show
                | debug
                | drive
                | trace
                | off
                | on
                | quoted_string
                ;
    
            //
            // Define whitespace to ignore: space, tab, newline
            //
    
            this->self ("WS")
                = lex::token_def <> ("[ \\t\\n]+")
                ;
            }
    
        lex::token_def <>   boot;
        lex::token_def <>   dash_help;
        lex::token_def <>   debug;
        lex::token_def <string> drive;
        lex::token_def <>   exit;
        lex::token_def <>   help;
        lex::token_def <>   load;
        lex::token_def <>   off;
        lex::token_def <>   on;
        lex::token_def <>   quit;
        lex::token_def <string> quoted_string;
        lex::token_def <>   set;
        lex::token_def <>   show;
        lex::token_def <>   slash_help;
        lex::token_def <>   trace;
        };
    
    //
    // Display parse error
    //
    
    struct error_handler_
        {
        template <typename, typename, typename>
        struct result
            {
            typedef void type;
            };
    
        template <typename Iterator>
        void operator ()
            (
            qi::info const& What,
            Iterator        Err_pos,
            Iterator        Last
            ) const
    
            {
            cout << "Error! Expecting "
                << What
                << " here: \""
                << string (Err_pos, Last)
                << "\""
                << endl;
            }
        };
    
    boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
    
    //
    // Grammar describing the valid commands
    //
    
    template <typename Iterator, typename Lexer>
    struct command_grammar : qi::grammar <Iterator>
        {
        template <typename Lexer>
        command_grammar (command_tokens <Lexer> const& Tok) :
            command_grammar::base_type (start)
            {
            using qi::on_error;
            using qi::fail;
            using qi::char_;
    
            start
                = +commands;
    
            commands
                = (
                  boot_command
                | exit_command
                | help_command
                | load_command
                | set_command
                | show_command
                );
    
            boot_command
                = Tok.boot;
    
            exit_command
                = Tok.exit
                | Tok.quit;
    
            help_command
                = Tok.help
                | Tok.dash_help
                | Tok.slash_help;
    
            load_command
                = Tok.load >> Tok.quoted_string;
    
            set_command
                = Tok.set;
    
            show_command
                = Tok.show;
    
            set_property
                = debug_property
                | drive_property
                | trace_property;
    
            debug_property
                = Tok.debug >> on_off;
    
           drive_property
                = Tok.drive >> char_ ("A-Z") >> char_ (":");
    
            trace_property
                = Tok.trace >> on_off;
    
            on_off
                = Tok.on
                | Tok.off;
    
            BOOST_SPIRIT_DEBUG_NODE (start);
            BOOST_SPIRIT_DEBUG_NODE (commands);
            BOOST_SPIRIT_DEBUG_NODE (boot_command);
            BOOST_SPIRIT_DEBUG_NODE (exit_command);
            BOOST_SPIRIT_DEBUG_NODE (help_command);
            BOOST_SPIRIT_DEBUG_NODE (load_command);
            BOOST_SPIRIT_DEBUG_NODE (quit_command);
            BOOST_SPIRIT_DEBUG_NODE (set_command);
            BOOST_SPIRIT_DEBUG_NODE (show_command);
            BOOST_SPIRIT_DEBUG_NODE (set_property);
            BOOST_SPIRIT_DEBUG_NODE (debug_property);
            BOOST_SPIRIT_DEBUG_NODE (drive_property);
            BOOST_SPIRIT_DEBUG_NODE (trace_property);
            BOOST_SPIRIT_DEBUG_NODE (target_property);
            on_error <fail> (start, error_handler (_4, _3, _2));
            }
    
        qi::rule <Iterator> start;
        qi::rule <Iterator> commands;
        qi::rule <Iterator> boot_command;
        qi::rule <Iterator> exit_command;
        qi::rule <Iterator> help_command;
        qi::rule <Iterator> load_command;
        qi::rule <Iterator> quit_command;
        qi::rule <Iterator> set_command;
        qi::rule <Iterator> show_command;
        qi::rule <Iterator> set_property;
        qi::rule <Iterator> debug_property;
        qi::rule <Iterator, string ()>  drive_property;
        qi::rule <Iterator> target_property;
        qi::rule <Iterator> trace_property;
        qi::rule <Iterator> on_off;
        };
    
    int
    main
        (
        int     Argc,
        PCHAR   Argv
        )
    {
        typedef std::string::iterator                       base_iterator_type;
        typedef lex::lexertl::token <base_iterator_type>    token_type;
        typedef lex::lexertl::lexer <token_type>            lexer_type;
        typedef command_tokens <lexer_type>                 command_tokens;
        typedef command_tokens::iterator_type               iterator_type;
        typedef command_grammar <iterator_type, command_tokens::lexer_def>  command_grammar;
    
        command_tokens      tokens;
        command_grammar     commands (tokens);
        string              input = "SET DRIVE C:";
        string::iterator    it = input.begin ();
        iterator_type       iter = tokens.begin (it, input.end ());
        iterator_type       end = tokens.end ();
        string              ws ("WS");
    
        bool                result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
    
        if (result)
            {
            cout << "Parse succeeded" << endl;
            }
        else
            {
            string  rest (it, input.end ());
            cout << "Parse failed" << endl;
            cout << "Stopped at " << rest << endl;
            }
    
        return 0;
    }                           // End of main
    

1 个答案:

答案 0 :(得分:2)

我将支持大部分代码,原因很简单,经验告诉我Lexutree通常不是您想要使用的。

你想要的是定义一个AST来表示你的命令语言,然后提出一个语法来构建它。

AST

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot;
        struct help;
        struct load;
        struct exit;
        struct set;
        struct show;
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;
}

完整代码仅添加调试输出帮助程序。这是完整的Fusion Adaption:

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

语法

我在这里做出一些选择:

  • 让白色空间和不区分大小写,允许行分隔命令:(另见Boost spirit skipper issues

    start = skip(blank) [lazy_command % eol];
    
  • 让我们使用Nabialek Trick将命令与前缀相关联。我使用了一段非常简单的代码片段来生成唯一的前缀:

    std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
    for (auto const full : verbs)
        for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
            auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
            if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n";
        }
    
  • 您可以对属性执行相同操作,但我认为当前设置更简单:

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : command_grammar::base_type(start) {
        using namespace qi;

        start = skip(blank) [lazy_command % eol];

        // nabialek trick
        lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];

        on_off.add("on", true)("off", false);

        commands.add
            ("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
            ("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
            ("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
            ("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
            ("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
            ("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
            ("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
            ("set", &set_command) ("se", &set_command)
            ("show", &show_command) ("sho", &show_command) ("sh", &show_command);

        quoted_string = '"' >> +~char_('"') >> '"';

        // nullary commands
        boot_command_ = eps;
        exit_command_ = eps;
        help_command_ = eps;
        show_command_ = eps;

        // non-nullary commands
        load_command_ = quoted_string;
        drive_        = char_("A-Z") >> ':';
        set_command_  = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
                | no_case[ (lit("debug")|"debu"|"deb"|"de")     >> attr("DEBUG") >> on_off ]
                | no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
                ;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(lazy_command)
                (boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
                (boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
                (quoted_string)(drive_)
            )

        on_error<fail>(start, error_handler_(_4, _3, _2));
        on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
        boot_command = boot_command_;
        exit_command = exit_command_;
        help_command = help_command_;
        load_command = load_command_;
        exit_command = exit_command_;
        set_command  = set_command_;
        show_command = show_command_;
    }

  private:
    struct error_handler_t {
        template <typename...> struct result { typedef void type; };

        void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
            std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
        }
    };

    boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};

    qi::rule<Iterator, Ast::Commands()> start;

    using Skipper = qi::blank_type;
    using CommandRule  = qi::rule<Iterator, Ast::Command(), Skipper>;

    qi::symbols<char, bool> on_off;
    qi::symbols<char, CommandRule const*> commands;

    qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;

    qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
    CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;

    qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
    qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
    qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
    qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
    qi::rule<Iterator, Ast::SetCmd(),  Skipper> set_command_;
    qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};

测试用例

<强> Live On Coliru

int main() {
    typedef std::string::const_iterator It;
    command_grammar<It> const commands;

    for (std::string const input : {
            "help",
            "set drive C:",
            "SET DRIVE C:",
            "loAD \"XYZ\"",
            "load \"anything \nat all\"",
            // multiline
            "load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
            })
    {
        std::cout << "----- '" << input << "' -----\n";
        It f = input.begin(), l = input.end();

        Ast::Commands parsed;
        bool result = parse(f, l, commands, parsed);

        if (result) {
            for (auto& cmd : parsed) {
                std::cout << "Parsed " << cmd << "\n";
            }
        } else {
            std::cout << "Parse failed\n";
        }

        if (f != l) {
            std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
        }
    }
}

打印:

----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything 
at all"' -----
Parsed LOAD (anything 
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()

完整列表

<强> Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

namespace qi = boost::spirit::qi;

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
        friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot {};
        struct help {};
        struct load {};
        struct exit {};
        struct set {};
        struct show {};

        static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
        static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
        static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
        static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
        static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
        static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;

    template <typename Tag>
    static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) { 
        return os << Tag{} << " " << boost::fusion::as_vector(command);
    }
}

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : command_grammar::base_type(start) {
        using namespace qi;

        start = skip(blank) [lazy_command % eol];

        // nabialek trick
        lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];

        on_off.add("on", true)("off", false);

        commands.add
            ("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
            ("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
            ("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
            ("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
            ("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
            ("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
            ("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
            ("set", &set_command) ("se", &set_command)
            ("show", &show_command) ("sho", &show_command) ("sh", &show_command);

        quoted_string = '"' >> +~char_('"') >> '"';

        // nullary commands
        boot_command_ = eps;
        exit_command_ = eps;
        help_command_ = eps;
        show_command_ = eps;

        // non-nullary commands
        load_command_ = quoted_string;
        drive_        = char_("A-Z") >> ':';
        set_command_  = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
                | no_case[ (lit("debug")|"debu"|"deb"|"de")     >> attr("DEBUG") >> on_off ]
                | no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
                ;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(lazy_command)
                (boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
                (boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
                (quoted_string)(drive_)
            )

        on_error<fail>(start, error_handler_(_4, _3, _2));
        on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
        boot_command = boot_command_;
        exit_command = exit_command_;
        help_command = help_command_;
        load_command = load_command_;
        exit_command = exit_command_;
        set_command  = set_command_;
        show_command = show_command_;
    }

  private:
    struct error_handler_t {
        template <typename...> struct result { typedef void type; };

        void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
            std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
        }
    };

    boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};

    qi::rule<Iterator, Ast::Commands()> start;

    using Skipper = qi::blank_type;
    using CommandRule  = qi::rule<Iterator, Ast::Command(), Skipper>;

    qi::symbols<char, bool> on_off;
    qi::symbols<char, CommandRule const*> commands;

    qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;

    qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
    CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;

    qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
    qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
    qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
    qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
    qi::rule<Iterator, Ast::SetCmd(),  Skipper> set_command_;
    qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};

int main() {
    typedef std::string::const_iterator It;
    command_grammar<It> const commands;

    for (std::string const input : {
            "help",
            "set drive C:",
            "SET DRIVE C:",
            "loAD \"XYZ\"",
            "load \"anything \nat all\"",
            // multiline
            "load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
            })
    {
        std::cout << "----- '" << input << "' -----\n";
        It f = input.begin(), l = input.end();

        Ast::Commands parsed;
        bool result = parse(f, l, commands, parsed);

        if (result) {
            for (auto& cmd : parsed) {
                std::cout << "Parsed " << cmd << "\n";
            }
        } else {
            std::cout << "Parse failed\n";
        }

        if (f != l) {
            std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
        }
    }
}

后脚本

  

问。如何使用utree注释解析器以创建AST?

  • 见上文
  

问。如何在构建完成后走出去,发现解析的内容?

  

问。我想添加评论字符“!”。那么,在那之后我怎么能忽略一切 - 除非它出现在一个带引号的字符串中?

  • 只需使Skipper类型成为解析的规则,例如:

    qi::rule<Iterator> my_skipper;
    my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
    

    然后使用它而不是像skip(blank)

  • 这样的skip(my_skipper)
  

问。为什么我的错误处理程序在无效输入时被调用?

  • 因为您没有标记期望点(operator>而不是operator>>)。如果不这样做,则无法匹配子表达式只是回溯。
  

问。如何使命令令牌不区分大小写,但不更改带引号的字符串的内容?

  • 见上文