使用提升精神解析结构化文本

时间:2016-11-08 21:14:05

标签: c++ parsing boost

再一次,我想请求你的帮助。使用boost精神库,我想将下面的语法解析为struct“unitConstruct”。到目前为止,我的解析器失败,无法正确解析此语法。我们将非常感谢您的帮助。请在下面找到我的代码的快照。

此语法将遵循此格式,基于标准。我想将 UNIT 存储在密钥中(如 struct unitConstruct 所示),并将 TB_SENSOR_PRIMARY_VALUE_UNIT 存储在标识符中,最后我想将标记存储在向量中( variablereferences )。此模式是稍后在我的代码中唯一检索此信息所必需的。

// Syntax to be parsed
UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}

//structure  to store the syntax above 
struct unitConstruct
{
    std::string key;
    std::string identifier;
    std::vector<std::string> variablereferences;
};


// code snapshot 
typedef std::vector<unitConstruct> eddlParsedData
template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        start = ascii::blank;
    }
private:
    qi::rule<Iterator> start, comment;
};

template <typename Iterator>
struct eddlparser : qi::grammar<Iterator, eddlParsedData(), skipper<Iterator> >
{
    eddlparser() : eddlparser::base_type(start)
    {
        unitkey = qi::string("UNIT");
        text = +qi::graph;
        unit = unitkey >> text >> qi::eol
                       >> '{' >> qi::eol
                       >> +text >> qi::eol
                       >> '}' ;

        BOOST_SPIRIT_DEBUG_NODE(unit);
        start = (unit) % *qi::eol;
    }

private:
    qi::rule<Iterator, std::string(), skipper<Iterator> > uni, unitkey, text;
    qi::rule<Iterator, unitConstruct(), skipper<Iterator> > unit;


// adapt unitConstruct as a Fusion sequence
BOOST_FUSION_ADAPT_STRUCT(
    unitConstruct,
    (std::string, key)
    (std::string, identifier)
    (std::vector<std::string>, variablereferences)
)

1 个答案:

答案 0 :(得分:2)

让我们从您的示例和我们在评论中讨论的细节开始。您想要解析如下所示的定义

UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
  • UNIT指定定义类型。只允许大写,我们希望只使用预定义的关键字(此时只是“UNIT”)。
  • TB_SENSOR_PRIMARY_VALUE_UNIT指定定义名称。它可能包含大写字母,数字或下划线。第一个符号可能不是数字。
  • 标识符可能包含大小写字母,数字或下划线。第一个符号可能不是数字。
  • trans1_primary_value_unit指定一个依赖。它是标识符
  • trans1_primary_value,... func1_AI_simulate_value指定依赖项。每个依赖都是一个标识符。多个依赖项以逗号分隔。

根据这些信息,我们需要一个如下所示的语法(在EBNF中)。

type = 'UNIT'
name = ( upper | "_" ), { upper | digit | "_" }
identifier = ( upper | lower | "_" ), { upper | lower | digit | "_" }
dependent = identifier
dependency = identifier
dependencies = dependency, { ",", dependency }
definition = type, name, "{", dependent, ":", dependencies, "}"

我们可以使用短语解析忽略空格,制表符,换行符和返回,因为我们不要求它们正确地解析它。

源代码

Live on Coliru

#include <string>
#include <iostream>

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
// ======================================================================
std::string TEST_INPUT = R"(UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
)";
// ======================================================================
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
namespace ascii = boost::spirit::ascii;
// ======================================================================
struct definition
{
    std::string type;
    std::string name;
    std::string dependent;
    std::vector<std::string> dependencies;
};
// ======================================================================
BOOST_FUSION_ADAPT_STRUCT(
    definition,
    (std::string, type)
    (std::string, name)
    (std::string, dependent)
    (std::vector<std::string>, dependencies)
)
// ======================================================================
template <typename Iterator>
struct skipper
    : qi::grammar<Iterator>
{
    skipper()
        : skipper::base_type(start)
    {
        start %= ascii::space;
    }

private:
    qi::rule<Iterator> start;
};
// ======================================================================
template <typename Iterator>
struct def_parser
    : qi::grammar<Iterator, definition(), skipper<Iterator> >
{
    def_parser()
        : def_parser::base_type(start)
    {
        def_type %= qi::string("UNIT");

        def_name %= (ascii::upper | ascii::char_('_'))
            >> *(ascii::upper | ascii::digit | ascii::char_('_'));

        identifier %= (ascii::upper | ascii::lower | ascii::char_('_'))
            >> *(ascii::upper | ascii::lower | ascii::digit | ascii::char_('_'));
        def_dependent %= identifier;
        def_dependency %= identifier;
        def_dependencies %= def_dependency % qi::lit(",");

        start %= def_type
            >> def_name
            >> qi::lit("{")
            >> def_dependent
            >> qi::lit(":")
            >> def_dependencies
            >> qi::lit("}")
            ;

        init_debug();
    }

    void init_debug()
    {
        def_type.name("def_type");
        def_name.name("def_name");
        identifier.name("identifier");
        def_dependent.name("def_dependent");
        def_dependency.name("def_dependency");
        def_dependencies.name("def_dependencies");
        start.name("start");

        qi::debug(def_type);
        qi::debug(def_name);
        qi::debug(identifier);
        qi::debug(def_dependent);
        qi::debug(def_dependency);
        qi::debug(def_dependencies);
        qi::debug(start);
    }

private:
    qi::rule<Iterator, std::string()> def_type;
    qi::rule<Iterator, std::string()> def_name;    
    qi::rule<Iterator, std::string()> identifier;
    qi::rule<Iterator, std::string()> def_dependent;
    qi::rule<Iterator, std::string()> def_dependency;
    qi::rule<Iterator, std::vector<std::string>(), skipper<Iterator>> def_dependencies;

    qi::rule<Iterator, definition(), skipper<Iterator>> start;
};
// ======================================================================
int main()
{
    typedef std::string::const_iterator iterator_type;

    def_parser<iterator_type> g;
    skipper<iterator_type> s;

    definition d;

    iterator_type iter = TEST_INPUT.begin();
    iterator_type end = TEST_INPUT.end();

    bool r = qi::phrase_parse(iter, end, g, s, d);

    if (r) {
        std::cout << "Bytes left = " << std::distance(iter, end) << " -> "
            << ((iter == end) ? "SUCCEEDED" : "FAILED") << "\n";
        std::cout << "Type = " << d.type << "\n";
        std::cout << "Name = " << d.name << "\n";
        std::cout << "Dependent = " << d.dependent << "\n";        
        for (auto const& ref : d.dependencies) {
            std::cout << "Dependency = " << ref << "\n";
        }
    } else {
        std::cout << "FAILED COMPLETELY\n";
    }

    return 0;
}
// ======================================================================

调试输出

<start>
  <try>UNIT TB_SENSOR_PRIMA</try>
  <def_type>
    <try>UNIT TB_SENSOR_PRIMA</try>
    <success> TB_SENSOR_PRIMARY_V</success>
    <attributes>[[U, N, I, T]]</attributes>
  </def_type>
  <def_name>
    <try> TB_SENSOR_PRIMARY_V</try>
    <success>{\ntrans1_primary_val</success>
    <attributes>[[T, B, _, S, E, N, S, O, R, _, P, R, I, M, A, R, Y, _, V, A, L, U, E, _, U, N, I, T]]</attributes>
  </def_name>
  <def_dependent>
    <try>\ntrans1_primary_valu</try>
    <identifier>
      <try>\ntrans1_primary_valu</try>
      <success>:\n    trans1_primary</success>
      <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t]]</attributes>
    </identifier>
    <success>:\n    trans1_primary</success>
    <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t]]</attributes>
  </def_dependent>
  <def_dependencies>
    <try>\n    trans1_primary_</try>
    <def_dependency>
      <try>\n    trans1_primary_</try>
      <identifier>
        <try>\n    trans1_primary_</try>
        <success>,\n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    trans1_scale_ou</try>
      <identifier>
        <try>\n    trans1_scale_ou</try>
        <success>,\n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    trans1_scale_ou</try>
      <identifier>
        <try>\n    trans1_scale_ou</try>
        <success>,\n    func1_AI_pv_up</success>
        <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_pv_up</success>
      <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_pv_upp</try>
      <identifier>
        <try>\n    func1_AI_pv_upp</try>
        <success>,\n    func1_AI_pv_lo</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_pv_lo</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_pv_low</try>
      <identifier>
        <try>\n    func1_AI_pv_low</try>
        <success>,\n    func1_AI_simul</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_simul</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_simula</try>
      <identifier>
        <try>\n    func1_AI_simula</try>
        <success>}\n</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>}\n</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <success>}\n</success>
    <attributes>[[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]]</attributes>
  </def_dependencies>
  <success>\n</success>
  <attributes>[[[U, N, I, T], [T, B, _, S, E, N, S, O, R, _, P, R, I, M, A, R, Y, _, V, A, L, U, E, _, U, N, I, T], [t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t], [[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]]]</attributes>
</start>

控制台输出

Bytes left = 0 -> SUCCEEDED
Type = UNIT
Name = TB_SENSOR_PRIMARY_VALUE_UNIT
Dependent = trans1_primary_value_unit
Dependency = trans1_primary_value
Dependency = trans1_scale_out_lower_value
Dependency = trans1_scale_out_upper_value
Dependency = func1_AI_pv_upper_range_value
Dependency = func1_AI_pv_lower_range_value
Dependency = func1_AI_simulate_value

解析多个定义

我们已经有一个单一定义的语法。要解析多个定义,我们只需重用它。

让我们对代码进行一些小修改:

  • std::vector<definition> d;
  • bool r = qi::phrase_parse(iter, end, +g, s, d);

Live on Coliru