可选表达式和解析错误位置的问题

时间:2014-07-11 12:35:26

标签: c++ boost boost-spirit boost-spirit-qi boost-fusion

我试图为特定的消息传递格式编写我的第一个boost灵魂解析器,但是我遇到了一些问题。使用的boost库版本是1.49.0!

#include <iostream>
#include <sstream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>

namespace qi = boost::spirit::qi;

struct message
{
  std::string title;
  std::string sender;
  std::string receiver;
  unsigned int seqNo;
  std::string senderRef;
  std::string receiverRef;
  unsigned int seqNoRef;
  std::string id;
};

BOOST_FUSION_ADAPT_STRUCT(
    message,
    (std::string, title)
    (std::string, sender)
    (std::string, receiver)
    (unsigned int, seqNo)
    (std::string, senderRef)
    (std::string, receiverRef)
    (unsigned int, seqNoRef)
    (std::string, id)
)

template<typename Iterator>
struct MyQiGrammar : qi::grammar<Iterator, message(), qi::space_type>
{
  MyQiGrammar() : MyQiGrammar::base_type(start) {
    qi::uint_parser<unsigned int, 10, 3, 3> uint_3p;

    delim     = qi::char_("-/");            // some values are delimited by '-' or '/'

    title    %= qi::repeat(3)[qi::upper];   // exactly 3 upper case letters
    sender   %= +qi::upper;                 // at least one upper case letter
    receiver %= +qi::upper;                 // at least one upper case letter
    seqNo    %= uint_3p;                    // exactly 3 digits (e.g. 001)
    id       %= qi::repeat(1,7)[qi::alnum]; // at least 1 upper case letter and maximum 7

    start    %=
        '('
        >> title
        >> sender >> delim >> receiver >> seqNo
        >> -(sender >> delim >> receiver >> seqNo)
        >> delim >> id
        >>
        ')';
  }

  qi::rule<Iterator> delim;
  qi::rule<Iterator, std::string(), qi::space_type> title;
  qi::rule<Iterator, std::string(), qi::space_type> sender, receiver, id;
  qi::rule<Iterator, unsigned int(), qi::space_type> seqNo;
  qi::rule<Iterator, message(), qi::space_type> start;
};

int
main(int args, char** argv)
{
  typedef std::istreambuf_iterator<char> base_iterator_type;
  typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
  typedef boost::spirit::classic::position_iterator2<forward_iterator_type> pos_iterator_type;
  typedef MyQiGrammar<pos_iterator_type> qi_parser;

  std::string rawMsg = "(ABCZ/Y002-GWI4576)";
  qi_parser myGrammarParser;
  message msg;

  std::istringstream iss(rawMsg);
  base_iterator_type in_begin(iss);
  forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
  forward_iterator_type fwd_end;
  pos_iterator_type pos_begin(fwd_begin, fwd_end);
  pos_iterator_type pos_end;

  std::cout << rawMsg << std::endl;

  try {
    bool msgRes = qi::phrase_parse(pos_begin, pos_end,
                                   myGrammarParser,
                                   qi::space,
                                   msg);

    if(msgRes) {
      std::cout << "Parsing succeeded!" << std::endl;

      if(pos_begin == pos_end) {
        std::cout << "Full match!" << std::endl;
        std::cout << "Title                  : " << msg.title << std::endl;
        std::cout << "Sender                 : " << msg.sender << std::endl;
        std::cout << "Receiver               : " << msg.receiver << std::endl;
        std::cout << "Sequence number        : " << msg.seqNo << std::endl;
        std::cout << "Sender (ref.)          : " << msg.senderRef << std::endl;
        std::cout << "Receiver (ref.)        : " << msg.receiverRef << std::endl;
        std::cout << "Sequence number (ref.) : " << msg.seqNoRef << std::endl;
        std::cout << "Message Identifier     : " << msg.id << std::endl;
      }
    } else {
      std::cout << "Parsing failed!" << std::endl;
      std::cout << "Stopped at: " << pos_begin.get_position().line
                << ":" << pos_begin.get_position().column << std::endl;
    }
  } catch(qi::expectation_failure<pos_iterator_type>& e) {
    const boost::spirit::classic::file_position_base<std::string>& pos = e.first.get_position();
    std::stringstream ss;

    ss << "Parse error at line " << pos.line << " column " << pos.column
       << "\n\t" << e.first.get_currentline()
       << "\n\t" << std::string(pos.column, ' ') << "^--here";

    std::cerr << ss.str() << std::endl;
  }

  return 0;
}

通常,消息格式如下所示:

'('<TITLE><SENDER>'/'<RECEIVER><SEQNO>[<SENDERREF>'/'<RECEIVERREF><SEQNOREF>]'-'<MID>')'

输出显然表明我对可选的消息引用部分做错了:

Parsing succeeded!
Full match!
Title                  : ABC
Sender                 : Z
Receiver               : Y
Sequence number        : 2
Sender (ref.)          :
Receiver (ref.)        : GWI4576             <--- Message identifier
Sequence number (ref.) : 3072563792          <--- uninitialized, can be neglected
Message Identifier     :

如果 rawMsg 包含消息引用,则会有更多错误的成员分配,例如&#34;(ABCZ / Y002Y / Z001-GWI4576)&#34;

Parsing succeeded!
Full match!
Title                  : ABC
Sender                 : Z
Receiver               : Y
Sequence number        : 2
Sender (ref.)          : YZ                  <--- Sender and receiver!?
Receiver (ref.)        : GWI4576             <--- Message identifier
Sequence number (ref.) : 3214704440          <--- uninitialized, but should be 1
Message Identifier     :

我的规则中有什么错误?

此外,我观察到如果解析了不正确的消息格式,那么迭代器的行和列总是设置为 1 ,与错误位置无关:

Parsing failed!
Stopped at: 1:1

为什么?

0 个答案:

没有答案