在boost.spirit中编码

时间:2010-09-09 11:42:03

标签: c++ encoding boost-spirit

如何在分配a时为值设置编码?我需要设置西里尔文,但我不知道该怎么做

    #include "filter_data.h"
    #include <boost/bind.hpp>
    #include <boost/spirit.hpp>
    #include <boost/spirit/actor.hpp>
    #include <boost/spirit/attribute.hpp>
    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/home/support/char_encoding/standard_wide.hpp>
    #include <boost/spirit/home/support/char_class.hpp>
    #include <string>

    using namespace boost::spirit;
    using char_encoding::standard_wide;
    class filter_grammar : public grammar<filter_grammar>
    {
    public:

        static filter_data _filter_data;

    protected:

        static std::pair<std::wstring,std::wstring> _replace_arg;
        static std::wstring _remove_arg;
        static std::wstring _duplicate_arg;
        static std::wstring _errorstr;

        static void add_replace_arg();
        static void add_remove_arg();
        static void add_duplicate_arg();
        static void err();

    public:
        template<typename ScannerT>
        class definition {
        public:
            definition( const filter_grammar & self );

            rule<ScannerT> const & start() const ;

        private:
            rule<ScannerT> filters, filter_replace, filter_remove, filter_duplicate,errstr,arg;

            typedef definition _self;
        };
    };


    template<typename ScannerT>
    filter_grammar::definition<ScannerT>::definition( const filter_grammar & self )
    {
            filters = *(filter_replace|filter_remove|filter_duplicate|errstr);
            filter_replace = str_p("replace_word")>>blank_p>>arg[assign_a(_replace_arg.first)]>>blank_p>>arg[assign_a(_replace_arg.second)][boost::bind(&filter_grammar::add_replace_arg)]>>!(ch_p('\r'))>>ch_p('\n');
            filter_remove  = str_p("remove_word")>>blank_p>>arg[assign_a(_remove_arg)][boost::bind(&filter_grammar::add_remove_arg)]>>!(ch_p('\r'))>>ch_p('\n');
            filter_duplicate =  str_p("duplicate_word")>>blank_p>>arg[assign_a(_duplicate_arg)][boost::bind(&filter_grammar::add_duplicate_arg)]>>!(ch_p('\r'))>>ch_p('\n');
            errstr = *(arg[assign_a(_errorstr)][boost::bind(&filter_grammar::err)]>>!ch_p('_')>>!arg[assign_a(_errorstr)][boost::bind(&filter_grammar::err)]>>!blank_p)>>!(ch_p('\r'))>>ch_p('\n');

            arg = lexeme_d[+anychar_p];
    }

    template<typename ScannerT>
    rule<ScannerT> const & filter_grammar::definition<ScannerT>::start() const
    {
            return filters;
    }
filter_data filter_grammar::_filter_data;

std::pair<std::wstring,std::wstring> filter_grammar::_replace_arg;
std::wstring filter_grammar::_remove_arg;
std::wstring filter_grammar::_duplicate_arg;
std::wstring filter_grammar::_errorstr;

void filter_grammar::add_replace_arg ()
{
    try
    {
        _filter_data._replace.insert(std::make_pair(_filter_data._total_count,_replace_arg));
        _filter_data._queue.insert(std::make_pair(_filter_data._total_count,std::make_pair(1,_filter_data._total_count)));
        _filter_data._total_count++;
    }
    catch(std::exception& e)
    {
        std::wcerr << "Exception:" << e.what () << std::endl;
    }
}

void filter_grammar::add_remove_arg ()
{
    try
    {
        _filter_data._remove.insert(std::make_pair(_filter_data._total_count,_remove_arg));
        _filter_data._queue.insert(std::make_pair(_filter_data._total_count,std::make_pair(2,_filter_data._total_count)));
        _filter_data._total_count++;
    }
    catch(std::exception& e)
    {
        std::wcerr << "Exception:" << e.what () << std::endl;
    }
}

void filter_grammar::add_duplicate_arg ()
{
    try
    {
        _filter_data._duplicate.insert(std::make_pair(_filter_data._total_count,_duplicate_arg));
        _filter_data._queue.insert(std::make_pair(_filter_data._total_count,std::make_pair(3,_filter_data._total_count)));
        _filter_data._total_count++;
    }
    catch(std::exception& e)
    {
        std::wcerr << "Exception:" << e.what () << std::endl;
    }
}

void filter_grammar::err ()
{
    std::wcerr<<"Error - unknown symbol: "<<_errorstr<<" in filter file"<<std::endl;

}

1 个答案:

答案 0 :(得分:1)

首先,您将Spirit.Classic(旧版本的Pof Spirit)与Spirit.Qi(当前版本)混合使用。请不要这样做,因为它不会起作用,在最好的情况下它不会干扰,但很可能它会破坏。

其次,Spirit.Classic不能很好地支持编码。我建议你完全切换到新版本(Spirit.Qi)。这为您提供了使用积极支持的代码库的额外好处,以及比旧版本快得多的东西。恕我直言,它也更容易使用,但是YMMV。

第三,如果您决定切换到Spirit.Qi,您可能想要使用预先存在的编码。这可以通过显式使用,例如,iso8859_1 :: char_或standard_wide :: char_而不是默认的qi :: char_(相当于ascii :: char_)来完成。 qi :: string具有类似编码的特定对应物。 Spirit.Qi也有一些初步的unicode支持,但这没有记录得太好。如果您对此感兴趣,请查看scheme解析器示例。另一种选择是为Spirit编写自己的编码,但这需要更多解释,所以你最好在Spirit邮件列表上讨论这个。