提升精神2:跟随qi :: parser的进展百分比。我的代码有什么不好的地方?

时间:2015-12-13 09:26:55

标签: c++ boost boost-spirit

使用Boost Spirit 2开发,我正在尝试关注example以便在我的pgn解析器中获得进展(稍后将添加语义操作)(另请参阅related previous question)。但我无法避免编译错误:cpp

#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <tuple>

#include <iostream>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
namespace qi = boost::spirit::qi;

typedef std::tuple<std::size_t, game_move> move_t;
typedef std::tuple<std::vector<pgn_tag>, std::vector<move_t>> game_t;
typedef std::tuple<std::size_t, std::vector<game_t>> pgn_t;

template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
    pgn_parser() : pgn_parser::base_type(games) {
        using namespace qi;

        CurrentPos<Iterator> filepos;

        const std::string no_move;
        result.add
            ("1-0",     result_t::white_won)
            ("0-1",     result_t::black_won)
            ("1/2-1/2", result_t::draw)
            ("*",       result_t::undecided);

        quoted_string    = '"' >> *~char_('"') >> '"';
        tag              = '[' >> +alnum >> quoted_string >> ']';
        header           = +tag;
        regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        single_move      = raw [ regular_move >> -char_("+#") ];
        full_move        = filepos.current_pos >> uint_
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
            >> (single_move | attr(no_move))
            >> -result;

        game_description = +full_move;
        single_game      = -header >> game_description;
        games            = filepos.save_start_pos >> *single_game;

        BOOST_SPIRIT_DEBUG_NODES(
                    (tag)(header)(quoted_string)(regular_move)(single_move)
                    (full_move)(game_description)(single_game)(games)
                )
    }

  private:
    qi::rule<Iterator, pgn_tag(),              qi::space_type> tag;
    qi::rule<Iterator, std::vector<pgn_tag>,   qi::space_type> header;

    qi::rule<Iterator, move_t(),               qi::space_type> full_move;
    qi::rule<Iterator, std::vector<move_t>,    qi::space_type> game_description;

    qi::rule<Iterator, game_t(),               qi::space_type> single_game;
    qi::rule<Iterator, pgn_t(),  qi::space_type> games;

    // lexemes
    qi::symbols<char, result_t> result;
    qi::rule<Iterator, std::string()> quoted_string;
    qi::rule<Iterator> regular_move;
    qi::rule<Iterator, std::string()> single_move;
};
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    loloof64::pgn_parser<It> parser;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    //////////////////////////////////
    std::cout << "About to parse the file" << std::endl;
    //////////////////////////////////

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);

    //////////////////////////////////
    std::cout << "Finished to parse the file" << std::endl;
    //////////////////////////////////

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

和标题文件:header

#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/include/phoenix.hpp>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>

namespace loloof64 {

    namespace phx = boost::phoenix;
    namespace qi = boost::spirit::qi;


    /*
     * This class has been taken from http://marko-editor.com/articles/position_tracking/
    */
    template<typename Iterator>
    struct CurrentPos {
      CurrentPos() {
        save_start_pos = qi::omit[boost::spirit::repository::qi::iter_pos[
                phx::bind(&CurrentPos::setStartPos, this, qi::_1)]];
        current_pos = boost::spirit::repository::qi::iter_pos[
                qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)];
      }

      qi::rule<Iterator> save_start_pos;
      qi::rule<Iterator, std::size_t()> current_pos;

    private:
      void setStartPos(const Iterator &iterator) {
        start_pos_ = iterator;
      }

      std::size_t getCurrentPos(const Iterator &iterator) {
        return std::distance(start_pos_, iterator);
      }

      Iterator start_pos_;
    };

    enum result_t { white_won, black_won, draw, undecided };

    struct pgn_tag {
        std::string key;
        std::string value;
    };

    struct game_move {
        unsigned move_number;
        std::string white_move;
        std::string black_move;
        result_t result;
    };

    struct pgn_game {
        std::vector<pgn_tag> header;
        std::vector<game_move> moves;
    };

    class PgnGamesExtractor {
      public:
        PgnGamesExtractor(std::string inputFilePath);
        PgnGamesExtractor(std::istream &inputFile);
        /*
        Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
        */
        std::vector<pgn_game> getGames() const { return games; }
        virtual ~PgnGamesExtractor();

      protected:
      private:
        std::vector<pgn_game> games;
        void parseInput(std::istream &inputFile);
    };

    class PgnParsingException : public virtual std::runtime_error {
      public:
        PgnParsingException(std::string message) : std::runtime_error(message) {}
    };

    class InputFileException : public virtual std::runtime_error {
      public:
        InputFileException(std::string message) : std::runtime_error(message) {}
    };
}

#endif // PGNGAMESEXTRACTOR_HPP

我没有发布编译错误,因为文件太多而且文件很容易测试。

2 个答案:

答案 0 :(得分:2)

当然,它不适用于流媒体界面。你可以保留启动迭代器,但是

  1. 你不会提前知道流的长度(除非你得到它的带外)

  2. 计算当前位置(距离起始迭代器的距离)每次都会非常低效。

  3. 由于您在评论中提到您正在解析文件,因此您应该考虑使用内存映射(例如boost::iostream::mapped_file_sourcemmap)。这样,距离计算是瞬时的,使用随机访问迭代器上的指针算法。

    这是一个工作示例,包含以下更改/注释:

    1. 使用内存映射输入数据 3
    2. omit[]中的
    3. save_start_pos无用(没有声明的属性)
    4. getCurrentPos非常低效(在omit[current_pos]规则中仅使用full_move会使解析速度降低几个数量级。

        

      这是因为boost::spirit::istream_iterator保留了双端队列中所有先前读取的状态,并且在执行std::distance

      时遍历它们并不是免费的
    5. 您的CurrentPos<Iterator> filepos;实例在施工后超出了范围!这意味着调用save_start_pos / current_pos Undefined Behaviour ¹。将其移出构造函数。

    6. 添加语义操作时,更微妙的一点是使用full_move %= ...(请参阅docsblog

    7. 您更改了某些规则的类型,以包含位置信息以及AST类型。这既不必要又有缺陷:AST类型与规则的tuple<size_t, T>版本不兼容。

      此外,例如games规则甚至没有公开位置,因为save_start_pos合成了unused_type(没有属性)。

      因此,删除整个元组业务,并在语义操作中使用filepos成员的状态:

          full_move       %=
                              omit[filepos.current_pos [ reportProgress(_1) ]]  >> 
                              uint_
                              >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
                              >> (single_move | attr(no_move))
                              >> -result;
      
    8. 最后,作为如何报告严格增加进度指示的演示²,我包括了一个简单的凤凰演员:

      struct reportProgress_f {
          size_t total_;
          mutable double pct = 0.0;
      
          reportProgress_f(size_t total) : total_(total) {}
      
          template<typename T>
          void operator()(T pos) const { 
              double newpct = pos * 100.0 / total_;
              if ((newpct - pct) > 10) {
                  //sleep(1); // because it's way too fast otherwise...
                  pct = newpct;
                  std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << std::flush;
              };
          }
      };
      phx::function<reportProgress_f> reportProgress;
      
        需要使用有关开始和结束迭代器的知识构建

      注意 reportProgress,请参阅pgn_parser的构造函数

    9. the recorded live stream中的

      ¹你可以看到我在第一次阅读时发现错误,然后忘记了我编译后的错误。程序崩溃了,尽职尽责:)然后我记得。

      ²即使面对回溯

      3 (并非严格要求,但我想目标不是简单地让它实际上需要进度指示器这么慢吗?)

      <强> Live On Coliru

      #ifndef PGNGAMESEXTRACTOR_HPP
      #define PGNGAMESEXTRACTOR_HPP
      
      #include <string>
      #include <vector>
      #include <fstream>
      #include <stdexcept>
      
      #include <boost/spirit/include/qi.hpp>
      #include <boost/spirit/include/phoenix.hpp>
      #include <boost/spirit/repository/include/qi_iter_pos.hpp>
      
      namespace loloof64 {
      
          namespace phx = boost::phoenix;
          namespace qi  = boost::spirit::qi;
          namespace qr  = boost::spirit::repository::qi;
      
          /*
           * This class has been taken from http://marko-editor.com/articles/position_tracking/
          */
          template<typename Iterator>
          struct CurrentPos {
              CurrentPos() {
                  save_start_pos = qr::iter_pos [phx::bind(&CurrentPos::setStartPos, this, qi::_1)] >> qi::eps;
                  current_pos    = qr::iter_pos [qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)] >> qi::eps;
              }
      
              qi::rule<Iterator> save_start_pos;
              qi::rule<Iterator, std::size_t()> current_pos;
      
              private:
              void setStartPos(const Iterator &iterator) {
                  start_pos_ = iterator;
              }
      
              std::size_t getCurrentPos(const Iterator &iterator) {
                  return std::distance(start_pos_, iterator);
              }
      
              Iterator start_pos_;
          };
      
          enum result_t { white_won, black_won, draw, undecided };
      
          struct pgn_tag {
              std::string key;
              std::string value;
          };
      
          struct game_move {
              unsigned move_number;
              std::string white_move;
              std::string black_move;
              result_t result;
          };
      
          struct pgn_game {
              std::vector<pgn_tag> header;
              std::vector<game_move> moves;
          };
      
          class PgnGamesExtractor {
            public:
              PgnGamesExtractor(std::string const& inputFilePath);
              /*
              Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
              */
              std::vector<pgn_game> getGames() const { return games; }
              virtual ~PgnGamesExtractor();
      
            protected:
            private:
              std::vector<pgn_game> games;
              void parseInput(std::string const&);
          };
      
          class PgnParsingException : public virtual std::runtime_error {
            public:
              PgnParsingException(std::string message) : std::runtime_error(message) {}
          };
      
          class InputFileException : public virtual std::runtime_error {
            public:
              InputFileException(std::string message) : std::runtime_error(message) {}
          };
      }
      
      #endif // PGNGAMESEXTRACTOR_HPP
      //#include "pgn_games_extractor.h"
      
      #include <boost/spirit/include/qi.hpp>
      #include <boost/fusion/include/adapt_struct.hpp>
      
      #include <iostream>
      #include <iomanip>
      
      BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
      BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
      BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
      
      namespace loloof64 {
          namespace qi = boost::spirit::qi;
      
          template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>(), qi::space_type> {
              pgn_parser(Iterator start, Iterator end) 
                  : pgn_parser::base_type(games),
                    reportProgress(std::distance(start, end))
              {
                  using namespace qi;
      
                  const std::string no_move;
                  result.add
                      ("1-0",     result_t::white_won)
                      ("0-1",     result_t::black_won)
                      ("1/2-1/2", result_t::draw)
                      ("*",       result_t::undecided);
      
                  quoted_string    = '"' >> *~char_('"') >> '"';
                  tag              = '[' >> +alnum >> quoted_string >> ']';
                  header           = +tag;
                  regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
                  single_move      = raw [ regular_move >> -char_("+#") ];
                  full_move       %=
                                      omit[filepos.current_pos [ reportProgress(_1) ]]  >> 
                                      uint_
                                      >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
                                      >> (single_move | attr(no_move))
                                      >> -result;
      
                  game_description = +full_move;
                  single_game      = -header >> game_description;
                  games            = filepos.save_start_pos >> *single_game;
      
                  BOOST_SPIRIT_DEBUG_NODES(
                              (tag)(header)(quoted_string)(regular_move)(single_move)
                              (full_move)(game_description)(single_game)(games)
                          )
              }
      
          private:
              struct reportProgress_f {
                  size_t total_;
                  mutable double pct = 0.0;
      
                  reportProgress_f(size_t total) : total_(total) {}
      
                  template<typename T>
                  void operator()(T pos) const { 
                      double newpct = pos * 100.0 / total_;
                      if ((newpct - pct) > 10) {
                          //sleep(1); // because it's way too fast otherwise...
                          pct = newpct;
                          std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << "    " << std::flush;
                      };
                  }
              };
              phx::function<reportProgress_f> reportProgress;
      
              CurrentPos<Iterator> filepos;
      
              qi::rule<Iterator, pgn_tag(),               qi::space_type> tag;
              qi::rule<Iterator, std::vector<pgn_tag>,    qi::space_type> header;
      
              qi::rule<Iterator, game_move(),             qi::space_type> full_move;
              qi::rule<Iterator, std::vector<game_move>,  qi::space_type> game_description;
      
              qi::rule<Iterator, pgn_game(),              qi::space_type> single_game;
              qi::rule<Iterator, std::vector<pgn_game>(), qi::space_type> games;
      
              // lexemes
              qi::symbols<char, result_t> result;
              qi::rule<Iterator, std::string()> quoted_string;
              qi::rule<Iterator> regular_move;
              qi::rule<Iterator, std::string()> single_move;
          };
      }
      
      #include <boost/iostreams/device/mapped_file.hpp>
      
      loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
          // dtor
      }
      
      loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string const& inputFilePath) {
          parseInput(inputFilePath);
      }
      
      void loloof64::PgnGamesExtractor::parseInput(std::string const& inputFilePath) {
          boost::iostreams::mapped_file_source mf(inputFilePath);
      
          //if (inputFile.fail() || inputFile.bad())
              //throw new InputFileException("Could not read the input file !");
      
          typedef char const* It;
          std::vector<loloof64::pgn_game> temp_games;
      
          /* It iter(inputFile >> std::noskipws), end; */
          auto iter = mf.begin();
          auto end  = mf.end();
          loloof64::pgn_parser<It> parser(iter, end);
      
          //////////////////////////////////
          //std::cout << "About to parse the file" << std::endl;
          //////////////////////////////////
      
          bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
      
          //////////////////////////////////
          //std::cout << "Finished to parse the file" << std::endl;
          //////////////////////////////////
      
          if (success && iter == end) {
              games.swap(temp_games);
          } else {
              std::string error_fragment(iter, end);
              throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
          }
      }
      
      int main() {
          loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
          std::cout << "Parsed " << pge.getGames().size() << " games\n";
          for (auto& g : pge.getGames())
              for (auto& m : g.moves)
                  std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
      }
      

      使用样本输出

      Progress 32.6
      Progress 44.5
      Progress 55.5
      Progress 67.2
      Progress 77.2
      Progress 89.1
      Progress 100.0Parsed 1 games
      1.  e4  e5
      2.  Nf3 Nc6
      3.  d4  exd4
      4.  Bc4 Qf6
      5.  O-O d6
      6.  Ng5 Nh6
      7.  f4  Be7
      8.  e5  Qg6
      9.  exd6    cxd6
      10. c3  dxc3
      11. Nxc3    O-O
      12. Nd5 Bd7
      13. Rf3 Bg4
      14. Bd3 Bxf3
      15. Qxf3    f5
      16. Bc4 Kh8
      17. Nxe7    Nxe7
      18. Qxb7    Qf6
      19. Be3 Rfb8
      20. Qd7 Rd8
      21. Qb7 d5
      22. Bb3 Nc6
      23. Bxd5    Nd4
      24. Rd1 Ne2+
      25. Kf1 Rab8
      26. Qxa7    Rxb2
      27. Ne6 Qxe6
      28. Bxe6    Rxd1+
      29. Kf2 
      
        

      请注意,在终端上,进度指示将使用回车符而不是打印单独的行进行自我更新

答案 1 :(得分:0)

按照this Sehe video tutorial解决问题 另外,人们应该注意到,因为这次他正在使用boost :: iostreams :: mapped_file_source而不是像我一样使用ifstream,这个过程真的在加速!因此,此过程不再需要进度条。

Cpp fileHpp file