解析许多其他命名集的命名集

时间:2012-10-13 17:00:02

标签: c++ parsing boost-spirit boost-spirit-qi

所以我想用boost :: spirit :: qi编写一个......那么......不那么简单的解析器。我知道提升精神的基本知识,在过去的几个小时里第一次熟悉它。

基本上我需要解析一下:

# comment

# other comment

set "Myset A"
{
    figure "AF 1"
    {
        i 0 0 0
        i 1 2 5
        i 1 1 1
        f 3.1 45.11 5.3
        i 3 1 5
        f 1.1 2.33 5.166
    }

    figure "AF 2"
    {
        i 25 5 1
        i 3 1 3
    }
}

# comment

set "Myset B"
{
    figure "BF 1"
    {
        f 23.1 4.3 5.11
    }
}

set "Myset C"
{
    include "Myset A" # includes all figures from Myset A

    figure "CF"
    {
        i 1 1 1
        f 3.11 5.33 3
    }
}

进入这个:

struct int_point { int x, y, z; };
struct float_point { float x, y, z; };

struct figure
{
    string name;
    vector<int_point> int_points;
    vector<float_point> float_points;
};

struct figure_set
{
    string name;
    vector<figure> figures
};

vector<figure_set> figure_sets; // fill with the data of the input

现在,显然有人为我写这篇文章太多了,但是请你提供一些关于阅读内容以及如何为这项任务构建语法和解析器的提示吗?

而且......可能就是boost :: spirit不是我可以用于任务的最佳库。如果是,那是哪一个?

编辑: 这是我到目前为止的地方。但我还不确定如何继续:http://liveworkspace.org/code/212c31dfc0b6fbdf6c462d8d931c0e9f

我能够阅读一个数字但是,我还不知道如何解析一组数字。

1 个答案:

答案 0 :(得分:11)

这是我对它的看法

我相信将成为阻挡者的规则将是

figure  = eps >> "figure" 
    >> name         [ at_c<0>(_val) = _1 ] >> '{' >> 
    *(
            ipoints [ push_back(at_c<1>(_val), _1) ]
          | fpoints [ push_back(at_c<2>(_val), _1) ]
     ) >> '}';

这实际上是您将混合的if行解析为单独容器这一事实的症状。

请参阅下面的替代方案。

以下是我的完整代码: test.cpp

//#define BOOST_SPIRIT_DEBUG // before including Spirit
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <fstream>

namespace Format
{
    struct int_point   { int x, y, z;   }; 
    struct float_point { float x, y, z; }; 

    struct figure
    {
        std::string              name;
        std::vector<int_point>   int_points;
        std::vector<float_point> float_points;

        friend std::ostream& operator<<(std::ostream& os, figure const& o);
    };

    struct figure_set
    {
        std::string           name;
        std::set<std::string> includes;
        std::vector<figure>   figures;

        friend std::ostream& operator<<(std::ostream& os, figure_set const& o);
    };

    typedef std::vector<figure_set> file_data;
}

BOOST_FUSION_ADAPT_STRUCT(Format::int_point,   
        (int, x)(int, y)(int, z))
BOOST_FUSION_ADAPT_STRUCT(Format::float_point, 
        (float, x)(float, y)(float, z))
BOOST_FUSION_ADAPT_STRUCT(Format::figure,      
        (std::string, name)
        (std::vector<Format::int_point>, int_points)
        (std::vector<Format::float_point>, float_points))
BOOST_FUSION_ADAPT_STRUCT(Format::figure_set,  
        (std::string, name)
        (std::set<std::string>, includes)
        (std::vector<Format::figure>, figures))

namespace Format
{
    std::ostream& operator<<(std::ostream& os, figure const& o)
    {
        using namespace boost::spirit::karma;
        return os << format_delimited(
                "\n    figure" << no_delimit [ '"' << string << '"' ] << "\n    {"
                << *("\n       i" << int_ << int_ << int_)
                << *("\n       f" << float_ << float_ << float_)
                << "\n    }"
                , ' ', o);
    }

    std::ostream& operator<<(std::ostream& os, figure_set const& o)
    {
        using namespace boost::spirit::karma;
        return os << format_delimited(
                "\nset" << no_delimit [ '"' << string << '"' ] << "\n{"
                << *("\n    include " << no_delimit [ '"' << string << '"' ])
                << *stream
                << "\n}"
                , ' ', o);
    }
}

namespace /*anon*/
{
    namespace phx=boost::phoenix;
    namespace qi =boost::spirit::qi;

    template <typename Iterator> struct skipper
        : public qi::grammar<Iterator>
    {
        skipper() : skipper::base_type(start, "skipper")
        {
            using namespace qi;

            comment = '#' >> *(char_ - eol) >> (eol|eoi);
            start   = comment | qi::space;

            BOOST_SPIRIT_DEBUG_NODE(start);
            BOOST_SPIRIT_DEBUG_NODE(comment);
        }

      private:
        qi::rule<Iterator> start, comment;
    };

    template <typename Iterator> struct parser
        : public qi::grammar<Iterator, Format::file_data(), skipper<Iterator> >
    {
        parser() : parser::base_type(start, "parser")
        {
            using namespace qi;
            using phx::push_back;
            using phx::at_c;

            name    = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];

            include = eps >> "include" >> name;
            ipoints = eps >> "i"       >> int_         >> int_   >> int_;
            fpoints = eps >> "f"       >> float_       >> float_ >> float_;

            figure  = eps >> "figure" 
                >> name         [ at_c<0>(_val) = _1 ] >> '{' >> 
                *(
                        ipoints [ push_back(at_c<1>(_val), _1) ]
                      | fpoints [ push_back(at_c<2>(_val), _1) ]
                 ) >> '}';
            set     = eps >> "set" >> name >> '{' >> *include >> *figure >> '}';
            start   = *set;
        }

      private:
        qi::rule<Iterator, std::string()        , skipper<Iterator> > name, include;
        qi::rule<Iterator, Format::int_point()  , skipper<Iterator> > ipoints;
        qi::rule<Iterator, Format::float_point(), skipper<Iterator> > fpoints;
        qi::rule<Iterator, Format::figure()     , skipper<Iterator> > figure;
        qi::rule<Iterator, Format::figure_set() , skipper<Iterator> > set;
        qi::rule<Iterator, Format::file_data()  , skipper<Iterator> > start;
    };
}

namespace Parser {

    bool parsefile(const std::string& spec, Format::file_data& data)
    {
        std::ifstream in(spec.c_str());
        in.unsetf(std::ios::skipws);

        std::string v;
        v.reserve(4096);
        v.insert(v.end(), std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());

        if (!in) 
            return false;

        typedef char const * iterator_type;
        iterator_type first = &v[0];
        iterator_type last = first+v.size();

        try
        {
            parser<iterator_type>  p;
            skipper<iterator_type> s;
            bool r = qi::phrase_parse(first, last, p, s, data);

            r = r && (first == last);

            if (!r)
                std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
            return r;
        }
        catch (const qi::expectation_failure<char const *>& e)
        {
            std::cerr << "FIXME: expected " << e.what_ << ", got '" << std::string(e.first, e.last) << "'" << std::endl;
            return false;
        }
    }
}

int main()
{
    Format::file_data data;
    bool ok = Parser::parsefile("input.txt", data);

    std::cerr << "Parse " << (ok?"success":"failed") << std::endl;
    std::cout << "# figure sets exported automatically by karma\n\n";

    for (auto& set : data)
        std::cout << set;
}

它将解析后的数据作为验证输出: output.txt

Parse success
# figure sets exported automatically by karma


set "Myset A"
{ 
    figure "AF 1"
    { 
       i 0 0 0 
       i 1 2 5 
       i 1 1 1 
       i 3 1 5 
       f 3.1 45.11 5.3 
       f 1.1 2.33 5.166 
    }  
    figure "AF 2"
    { 
       i 25 5 1 
       i 3 1 3 
    }  
} 
set "Myset B"
{ 
    figure "BF 1"
    { 
       f 23.1 4.3 5.11 
    }  
} 
set "Myset C"
{ 
    include  "Myset A"
    figure "CF"
    { 
       i 1 1 1 
       f 3.11 5.33 3.0 
    }  
}

你会注意到

  • 更改了点线的顺序(所有int_points之前的所有float_points
  • 还添加了非有效数字,例如在最后一行3.0而不是3中显示类型if float。
  • 你已经'遗忘'(?)关于你问题中的包含内容

替代

保持实际点线的原始顺序:

typedef boost::variant<int_point, float_point> if_point;

struct figure
{
    std::string            name;
    std::vector<if_point>  if_points;
}

现在规则变得简单:

name    = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];

include = eps >> "include" >> name;
ipoints = eps >> "i"       >> int_         >> int_   >> int_;
fpoints = eps >> "f"       >> float_       >> float_ >> float_;

figure  = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';
set     = eps >> "set"    >> name >> '{' >> *include >> *figure  >> '}';
start   = *set;

注意

中的优雅
figure  = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';

输出保持输入的确切顺序: output.txt

再一次,完整的演示代码(仅限github): test.cpp

奖金更新

最后,我制作了我的第一个正确的Karma语法来输出结果:

name    = no_delimit ['"' << string << '"'];
include = "include" << name;
ipoints = "\n        i" << int_   << int_   << int_;
fpoints = "\n        f" << float_ << float_ << float_;

figure  = "figure" << name << "\n    {" << *(ipoints | fpoints) << "\n    }";
set     = "set"    << name << "\n{" 
            << *("\n   " << include)
            << *("\n   " << figure)  << "\n}";

start   = "# figure sets exported automatically by karma\n\n" 
            << set % eol;

这实际上比我预期的要舒服得多。在最新版本的fully updated gist中查看: test.hpp