使用Boost Spirit Qi

时间:2017-03-19 15:07:22

标签: c++ boost boost-spirit boost-spirit-qi

我是Boost Spirit的新手,正在努力创建一个正确的表达式来解析以下输入(实际上是某个命令的stdout的结果):

^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms

我需要解析为一组字符串和整数并记录在变量中。应该将大部分行解析为适当类型的变量(string或int)。所以最后,我得到了:

string:  "^+", "line-17532.dyn.kponet.fi", "+1503us", "+9103us", "55ms"
int   :   2, 7, 377, 1 

这对

+1503us[+9103us] 

也可以带空格

+503us[ +103us] 

我需要在方括号之前和方括号中放置的东西放在单独的字符串中。

另外,时间指定可以表示为

ns, ms, us, s

我很欣赏有关如何处理它的示例,因为可用的文档非常稀疏而且没有凝聚力。

大部分日志,以及描述各个字段的标题:

MS Name/IP address         Stratum Poll Reach LastRx Last sample               
===============================================================================
^+ ns2.sdi.fi                    2   9   377   381  -1476us[-1688us] +/-   72ms
^+ line-17532.dyn.kponet.fi      2  10   377   309   +302us[ +302us] +/-   59ms
^* heh.fi                        2  10   377   319  -1171us[-1387us] +/-   50ms
^+ stara.mulimuli.fi             3  10   377   705  -1253us[-1446us] +/-   73ms

3 个答案:

答案 0 :(得分:5)

由于总是,我首先勾勒出一个有用的AST:

namespace AST {
    using clock = std::chrono::high_resolution_clock;

    struct TimeSample {
        enum Direction { up, down } direction; // + or -
        clock::duration value;
    };

    struct Record {
        std::string prefix; // "^+"
        std::string fqdn;   // "line-17532.dyn.kponet.fi"
        int a, b, c, d;     // 2, 7, 377, 1
        TimeSample primary, braced;
        clock::duration tolerance;
    };
}

既然我们知道要解析什么,我们大多只是用规则来模仿AST:

using namespace qi;

start     = skip(blank) [record_];

record_   = prefix_ >> fqdn_ >> int_ >> int_ >> int_ >> int_ >> sample_ >> '[' >> sample_ >> ']' >> tolerance_;

prefix_   = string("^+"); // or whatever you need to match here
fqdn_     = +graph; // or whatever additional constraints you have
sample_   = direction_ >> duration_;
duration_ = (long_ >> units_) [ _val = _1 * _2 ];
tolerance_= "+/-" >> duration_;

当然,有趣的位是单位和方向:

struct directions : qi::symbols<char, AST::TimeSample::Direction> {
    directions() { add("+", AST::TimeSample::up)("-", AST::TimeSample::down); }
} direction_;
struct units : qi::symbols<char, AST::clock::duration> {
    units() {
        using namespace std::literals::chrono_literals;
        add("s", 1s)("ms", 1ms)("us", 1us)("µs", 1us)("ns", 1ns);
    }
} units_;

白空间接受由船长管理;我选择qi::blank_type作为非词法规则:

using Skipper = qi::blank_type;
qi::rule<It, AST::Record()> start;
qi::rule<It, AST::Record(), Skipper> record_;
qi::rule<It, AST::TimeSample(), Skipper> sample_;
qi::rule<It, AST::clock::duration(), Skipper> duration_, tolerance_;
// lexemes:
qi::rule<It, std::string()> prefix_;
qi::rule<It, std::string()> fqdn_;

样本

全部放在一起,使用它:

int main() {
    std::istringstream iss(R"(^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms
)");

    std::string line;

    while (getline(iss, line)) {
        auto f = line.cbegin(), l = line.cend();
        AST::Record record;
        if (parse(f, l, parser<>{}, record))
            std::cout << "parsed: " << boost::fusion::as_vector(record) << "\n";
        else
            std::cout << "parse error\n";

        if (f!=l)
            std::cout << "remaining unparsed input: '" << std::string(f,l) << "'\n";
    }
}

打印哪些内容: Live On Coliru

parsed: (^+ line-17532.dyn.kponet.fi 2 7 377 1 +0.001503s +0.009103s 0.055s)

(下面的调试输出)

完整代码:

<强> Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>
#include <sstream>
#include <chrono>

namespace std { namespace chrono {
    // for debug
    std::ostream& operator<<(std::ostream& os, duration<double> d) { return os << d.count() << "s"; }
} }

namespace AST {
    using clock = std::chrono::high_resolution_clock;

    struct TimeSample {
        enum Direction { up, down } direction; // + or -
        clock::duration value;

        // for debug:
        friend std::ostream& operator<<(std::ostream& os, Direction d) {
            char const* signs[] = {"+","-"};
            return os << signs[d];
        }
        friend std::ostream& operator<<(std::ostream& os, TimeSample const& sample) {
            return os << sample.direction << std::chrono::duration<double>(sample.value).count() << "s";
        }
    };

    struct Record {
        std::string prefix; // "^+"
        std::string fqdn;   // "line-17532.dyn.kponet.fi"
        int a, b, c, d;     // 2, 7, 377, 1
        TimeSample primary, braced;
        clock::duration tolerance;
    };
}

BOOST_FUSION_ADAPT_STRUCT(AST::Record, prefix, fqdn, a, b, c, d, primary, braced, tolerance)
BOOST_FUSION_ADAPT_STRUCT(AST::TimeSample, direction, value)

namespace qi = boost::spirit::qi;

template <typename It = std::string::const_iterator>
struct parser : qi::grammar<It, AST::Record()> {
    parser() : parser::base_type(start) {
        using namespace qi;

        start     = skip(blank) [record_];

        record_   = prefix_ >> fqdn_ >> int_ >> int_ >> int_ >> int_ >> sample_ >> '[' >> sample_ >> ']' >> tolerance_;

        prefix_   = string("^+"); // or whatever you need to match here
        fqdn_     = +graph; // or whatever additional constraints you have
        sample_   = direction_ >> duration_;
        duration_ = (long_ >> units_) [ _val = _1 * _2 ];
        tolerance_= "+/-" >> duration_;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(record_)
                (prefix_)(fqdn_)(sample_)(duration_)(tolerance_)
            )
    }
  private:
    struct directions : qi::symbols<char, AST::TimeSample::Direction> {
        directions() { add("+", AST::TimeSample::up)("-", AST::TimeSample::down); }
    } direction_;
    struct units : qi::symbols<char, AST::clock::duration> {
        units() {
            using namespace std::literals::chrono_literals;
            add("s", 1s)("ms", 1ms)("us", 1us)("µs", 1us)("ns", 1ns);
        }
    } units_;

    using Skipper = qi::blank_type;
    qi::rule<It, AST::Record()> start;
    qi::rule<It, AST::Record(), Skipper> record_;
    qi::rule<It, AST::TimeSample(), Skipper> sample_;
    qi::rule<It, AST::clock::duration(), Skipper> duration_, tolerance_;
    // lexemes:
    qi::rule<It, std::string()> prefix_;
    qi::rule<It, std::string()> fqdn_;
};

int main() {
    std::istringstream iss(R"(^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms
)");

    std::string line;

    while (getline(iss, line)) {
        auto f = line.cbegin(), l = line.cend();
        AST::Record record;
        if (parse(f, l, parser<>{}, record))
            std::cout << "parsed: " << boost::fusion::as_vector(record) << "\n";
        else
            std::cout << "parse error\n";

        if (f!=l)
            std::cout << "remaining unparsed input: '" << std::string(f,l) << "'\n";
    }
}

调试输出

<start>
  <try>^+ line-17532.dyn.kp</try>
  <record_>
    <try>^+ line-17532.dyn.kp</try>
    <prefix_>
      <try>^+ line-17532.dyn.kp</try>
      <success> line-17532.dyn.kpon</success>
      <attributes>[[^, +]]</attributes>
    </prefix_>
    <fqdn_>
      <try>line-17532.dyn.kpone</try>
      <success>      2   7   377   </success>
      <attributes>[[l, i, n, e, -, 1, 7, 5, 3, 2, ., d, y, n, ., k, p, o, n, e, t, ., f, i]]</attributes>
    </fqdn_>
    <sample_>
      <try>   +1503us[+9103us] </try>
      <duration_>
        <try>1503us[+9103us] +/- </try>
        <success>[+9103us] +/-   55ms</success>
        <attributes>[0.001503s]</attributes>
      </duration_>
      <success>[+9103us] +/-   55ms</success>
      <attributes>[[+, 0.001503s]]</attributes>
    </sample_>
    <sample_>
      <try>+9103us] +/-   55ms</try>
      <duration_>
        <try>9103us] +/-   55ms</try>
        <success>] +/-   55ms</success>
        <attributes>[0.009103s]</attributes>
      </duration_>
      <success>] +/-   55ms</success>
      <attributes>[[+, 0.009103s]]</attributes>
    </sample_>
    <tolerance_>
      <try> +/-   55ms</try>
      <duration_>
        <try>   55ms</try>
        <success></success>
        <attributes>[0.055s]</attributes>
      </duration_>
      <success></success>
      <attributes>[0.055s]</attributes>
    </tolerance_>
    <success></success>
    <attributes>[[[^, +], [l, i, n, e, -, 1, 7, 5, 3, 2, ., d, y, n, ., k, p, o, n, e, t, ., f, i], 2, 7, 377, 1, [+, 0.001503s], [+, 0.009103s], 0.055s]]</attributes>
  </record_>
  <success></success>
  <attributes>[[[^, +], [l, i, n, e, -, 1, 7, 5, 3, 2, ., d, y, n, ., k, p, o, n, e, t, ., f, i], 2, 7, 377, 1, [+, 0.001503s], [+, 0.009103s], 0.055s]]</attributes>
</start>

答案 1 :(得分:3)

注意:这个答案显示了一种更简单的方法,为sehe所示的其他技术奠定了基础。

序言

让我们启用Spirit调试输出,这样我们就可以在我们开发它们时跟踪解析的进度。

#define BOOST_SPIRIT_DEBUG 1

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi = boost::spirit::qi;

日志条目数据结构

第一步是定义一个结构来保存已解析的日志条目。

struct log_entry_t
{
    std::string element_0;
    std::string element_1;
    uint32_t element_2;
    uint32_t element_3;
    uint32_t element_4;
    uint32_t element_5;
    std::string element_6;
    std::string element_7;
    std::string element_8;
};

调整数据结构

为了能够将结构用作Spirit语法的属性,我们需要将其调整为融合元组。 (更多信息位于one of Spirit tutorials)这是使用BOOST_FUSION_ADAPT_STRUCT实现的。

BOOST_FUSION_ADAPT_STRUCT(
    log_entry_t
    , (std::string, element_0)
    , (std::string, element_1)
    , (uint32_t, element_2)
    , (uint32_t, element_3)
    , (uint32_t, element_4)
    , (uint32_t, element_5)
    , (std::string, element_6)
    , (std::string, element_7)
    , (std::string, element_8)
)

Log Line Grammar

接下来,我们定义日志条目的语法。由于各个条目可能由空格分隔,我们希望使用短语解析,因此需要指定跳过解析器。 qi::blank_type是一个合适的船长,因为它只匹配空格和制表符。

但是,所有元素都应该被视为lexemes,我们没有为其规则指定任何队长。

template <typename Iterator>
struct log_line_parser
    : qi::grammar<Iterator, log_entry_t(), qi::blank_type>
{
    typedef qi::blank_type skipper_t;

    log_line_parser()
        : log_line_parser::base_type(log_line)
    {
        element_0 %= qi::string("^+");
        element_1 %= qi::raw[(+qi::char_("-a-zA-Z0-9") % qi::char_('.'))];
        element_2 %= qi::uint_;
        element_3 %= qi::uint_;
        element_4 %= qi::uint_;
        element_5 %= qi::uint_;
        element_6 %= qi::raw[qi::char_('+') >> qi::uint_ >> time_unit];
        element_7 %= qi::raw[qi::char_('+') >> qi::uint_ >> time_unit];
        element_8 %= qi::raw[qi::uint_ >> time_unit];

        time_unit %= -qi::char_("nmu") >> qi::char_('s');

        log_line
            %=  element_0
            >>  element_1
            >>  element_2
            >>  element_3
            >>  element_4
            >>  element_5
            >>  element_6
            >>  qi::lit('[') >> element_7 >> qi::lit(']')
            >>  qi::lit("+/-")
            >>  element_8
            ;

        init_debug();
    }

    void init_debug()
    {
        BOOST_SPIRIT_DEBUG_NODE(element_0);
        BOOST_SPIRIT_DEBUG_NODE(element_1);
        BOOST_SPIRIT_DEBUG_NODE(element_2);
        BOOST_SPIRIT_DEBUG_NODE(element_3);
        BOOST_SPIRIT_DEBUG_NODE(element_4);
        BOOST_SPIRIT_DEBUG_NODE(element_5);
        BOOST_SPIRIT_DEBUG_NODE(element_6);
        BOOST_SPIRIT_DEBUG_NODE(element_7);
        BOOST_SPIRIT_DEBUG_NODE(element_8);

        BOOST_SPIRIT_DEBUG_NODE(time_unit);

        BOOST_SPIRIT_DEBUG_NODE(log_line);
    }

private:
    qi::rule<Iterator, std::string()> element_0;
    qi::rule<Iterator, std::string()> element_1;
    qi::rule<Iterator, uint32_t()> element_2;
    qi::rule<Iterator, uint32_t()> element_3;
    qi::rule<Iterator, uint32_t()> element_4;
    qi::rule<Iterator, uint32_t()> element_5;
    qi::rule<Iterator, std::string()> element_6;
    qi::rule<Iterator, std::string()> element_7;
    qi::rule<Iterator, std::string()> element_8;

    qi::rule<Iterator, std::string()> time_unit;

    qi::rule<Iterator, log_entry_t(), skipper_t> log_line;
};

让我们完成一些规则:

  • 元素0 - 这是一个我们需要匹配的简单字符串。由于我们也希望捕获它,我们需要使用string parser

  • 元素1 - 我们可以使用char_ parser来匹配单个字符或字符集。 + parser operator表示重复,而% (list) parser operator让我们解析由分隔符分隔的多次重复(在我们的示例中为点)。

  • 元素2 - 要解析数字,我们可以使用现有的numeric parsers

  • 元素6 - 由于我们想要捕获字符串中的整个序列,我们使用raw parser directive

为了在使用解析器运算符时确定结果属性类型,请参阅compound attribute rules的引用。

测试功能

bool test(std::string const& log)
{
    std::cout << "Parsing: " << log << "\n\n";

    std::string::const_iterator iter(log.begin());
    std::string::const_iterator end(log.end());

    log_line_parser<std::string::const_iterator> g;

    log_entry_t entry;

    bool r(qi::phrase_parse(iter, end, g, qi::blank, entry));

    std::cout << "-------------------------\n";

    if (r && (iter == end)) {
        std::cout << "Parsing succeeded\n";
        std::cout << entry.element_0 << "\n"
            << entry.element_1 << "\n"
            << entry.element_2 << "\n"
            << entry.element_3 << "\n"
            << entry.element_4 << "\n"
            << entry.element_5 << "\n"
            << entry.element_6 << "\n"
            << entry.element_7 << "\n"
            << entry.element_8 << "\n";
    } else {
        std::string::const_iterator some = iter + 30;
        std::string context(iter, (some > end) ? end : some);
        std::cout << "Parsing failed\n";
        std::cout << "stopped at: \": " << context << "...\"\n";
    }

    return r;
}

主要功能

最后,让我们对我们的解析器进行一些正面和负面的测试。

int main()
{
    bool result(true);
    result &= test("^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms");
    result &= test("^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[ +9103us] +/-   55ms");
    result &= test("^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503ms[+9103ns] +/-   55s");

    result &= !test("^- line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms");
    result &= !test("^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55 ms");
    result &= !test("^+ line-17532.dyn.kponet.fi      2   7   377     1   + 1503us[+9103us] +/-   55ms");
    result &= !test("^+ line-17532.dyn.kponet.fi      2   7   +377     1   +1503us[+9103us] +/-   55ms");
    result &= !test("^+ line-17532.dyn.kponet.fi      2   7   3 77     1   +1503us[+9103us] +/-   55ms");
    result &= !test("^+ line-17532.dyn.kponet.fi      2   7   -377     1   +1503us[+9103us] +/-   55ms");


    std::cout << "Test result = " << result << "\n";

    return 0;
}

经过大量的调试输出(第一次测试的例子):

Parsing: ^+ line-17532.dyn.kponet.fi      2   7   377     1   +1503us[+9103us] +/-   55ms

<log_line>
  <try>^+ line-17532.dyn.kp</try>
  <element_0>
    <try>^+ line-17532.dyn.kp</try>
    <success> line-17532.dyn.kpon</success>
    <attributes>[[^, +]]</attributes>
  </element_0>
  <element_1>
    <try>line-17532.dyn.kpone</try>
    <success>      2   7   377   </success>
    <attributes>[[l, i, n, e, -, 1, 7, 5, 3, 2, ., d, y, n, ., k, p, o, n, e, t, ., f, i]]</attributes>
  </element_1>
  <element_2>
    <try>2   7   377     1   </try>
    <success>   7   377     1   +</success>
    <attributes>[2]</attributes>
  </element_2>
  <element_3>
    <try>7   377     1   +150</try>
    <success>   377     1   +1503</success>
    <attributes>[7]</attributes>
  </element_3>
  <element_4>
    <try>377     1   +1503us[</try>
    <success>     1   +1503us[+91</success>
    <attributes>[377]</attributes>
  </element_4>
  <element_5>
    <try>1   +1503us[+9103us]</try>
    <success>   +1503us[+9103us] </success>
    <attributes>[1]</attributes>
  </element_5>
  <element_6>
    <try>+1503us[+9103us] +/-</try>
    <time_unit>
      <try>us[+9103us] +/-   55</try>
      <success>[+9103us] +/-   55ms</success>
      <attributes>[[u, s]]</attributes>
    </time_unit>
    <success>[+9103us] +/-   55ms</success>
    <attributes>[[+, 1, 5, 0, 3, u, s]]</attributes>
  </element_6>
  <element_7>
    <try>+9103us] +/-   55ms</try>
    <time_unit>
      <try>us] +/-   55ms</try>
      <success>] +/-   55ms</success>
      <attributes>[[u, s]]</attributes>
    </time_unit>
    <success>] +/-   55ms</success>
    <attributes>[[+, 9, 1, 0, 3, u, s]]</attributes>
  </element_7>
  <element_8>
    <try>55ms</try>
    <time_unit>
      <try>ms</try>
      <success></success>
      <attributes>[[m, s]]</attributes>
    </time_unit>
    <success></success>
    <attributes>[[5, 5, m, s]]</attributes>
  </element_8>
  <success></success>
  <attributes>[[[^, +], [l, i, n, e, -, 1, 7, 5, 3, 2, ., d, y, n, ., k, p, o, n, e, t, ., f, i], 2, 7, 377, 1, [+, 1, 5, 0, 3, u, s], [+, 9, 1, 0, 3, u, s], [5, 5, m, s]]]</attributes>
</log_line>
-------------------------
Parsing succeeded
^+
line-17532.dyn.kponet.fi
2
7
377
1
+1503us
+9103us
55ms

程序打印以下行:

Test result = 1

Live sample on Coliru

答案 2 :(得分:2)

对于那些声称C ++刚刚增加了复杂性并且C真的更好的人,我几乎可以感受到一些同情。它确实失去了类型安全等一些东西,但考虑一下使用C scanf阅读这些内容:{/ 1}:

struct record {
    char prefix[256];
    char url[256];
    int a, b, c, d;
    char time1[256];
    char time2[256];
    char time3[256];
};

sscanf(input, 
       "%255s %255s %d %d %d %d %255[^[][ %255[^]]] +/- %255s",
       r.prefix, r.url, &r.a, &r.b, &r.c, &r.d, r.time1, r.time2, r.time3);

这当然有一些潜在的责任:

  1. 它读入char数组而不是std::string s。
  2. scanf并且堂兄弟不安全。
  3. 它没有尝试验证时间上的后缀。
  4. 基于精神的解析器可能很容易变得更快。
  5. 如果其中任何一个对您的目的来说确实是一个严重问题,您可能真的需要一种不同的方法。鉴于它看起来像看起来的代码很可能是这样做的,但它们中的任何一个都可能导致真正的问题并不是很明显。