使用Boost.Xpressive时,为什么匹配结果中存在unmatched_results

时间:2018-08-02 11:58:20

标签: xpressive

使用Xpressive时出现问题。

我定义一个语法,然后使用regex_match()解析输入字符串。

regex_match()的结果为true,它将填充“ match_results”对象。

但是当我遍历match_results和nested_results时,会有一些nested_results不匹配

我将所有nested_results打印到控制台,而不匹配的结果以“ =”开头。

我怀疑这是由于我在语法定义中误用了keep()造成的。

有人能告诉我原因吗?为什么结果中会有不匹配的nested_results?

这是我的代码:

#include <string>
#include <map>
#include <iostream>
#include <assert.h>
#include <fstream>
#include <boost/xpressive/xpressive.hpp>
#include <boost/xpressive/regex_actions.hpp>

using namespace boost::xpressive;
using namespace std;

// attri
sregex _wsp_, _owsp_;
sregex _attri_, _attri_name_, _attri_name_colon_, _attri_value_;
sregex _near_, _near_expr_, _near_and_, _near_or_, _near_not_, _near_group_, _near_blob_, _near_num_;
sregex _string_;

sregex _a_, _b_, _c_;

// attri-expr
sregex _attri_not_, _attri_and_, _attri_or_;
sregex _attri_expr_, _attri_group_, _attri_blob_;

// debug
sregex _wsp1_, _wsp2_, _wsp3_;
sregex _owsp1_, _owsp2_, _owsp3_;

map<regex_id_type, string> regex_table;

ifstream in("input.txt");
//ofstream out("result.txt");
#define out (std::cout)

int lineno = 0;

void update_regex_table()
{
    regex_table.clear();

    regex_table[_wsp_.regex_id()] = "_wsp_";
    regex_table[_owsp_.regex_id()] = "_owsp_";
    regex_table[_attri_.regex_id()] = "_attri_";
    regex_table[_attri_name_.regex_id()] = "_attri_name_";
    regex_table[_attri_name_colon_.regex_id()] = "_attri_name_colon_";
    regex_table[_attri_value_.regex_id()] = "_attri_value_";
    regex_table[_near_.regex_id()] = "_near_";
    regex_table[_near_expr_.regex_id()] = "_near_expr_";
    regex_table[_near_and_.regex_id()] = "_near_and_";
    regex_table[_near_or_.regex_id()] = "_near_or_";
    regex_table[_near_not_.regex_id()] = "_near_not_";
    regex_table[_near_group_.regex_id()] = "_near_group_";
    regex_table[_near_blob_.regex_id()] = "_near_blob_";
    regex_table[_near_num_.regex_id()] = "_near_num_";
    regex_table[_string_.regex_id()] = "_string_";

    regex_table[_attri_not_.regex_id()] = "_attri_not_";
    regex_table[_attri_and_.regex_id()] = "_attri_and_";
    regex_table[_attri_or_.regex_id()] = "_attri_or_";
    regex_table[_attri_expr_.regex_id()] = "_attri_expr_";
    regex_table[_attri_group_.regex_id()] = "_attri_group_";
    regex_table[_attri_blob_.regex_id()] = "_attri_blob_";

    regex_table[_wsp1_.regex_id()] = "_wsp1_";
    regex_table[_wsp2_.regex_id()] = "_wsp2_";
    regex_table[_wsp3_.regex_id()] = "_wsp3_";
    regex_table[_owsp1_.regex_id()] = "_owsp1_";
    regex_table[_owsp2_.regex_id()] = "_owsp2_";
    regex_table[_owsp3_.regex_id()] = "_owsp3_";
}

const string get_regex_name(const regex_id_type &id)
{
    map<regex_id_type, string>::const_iterator it = regex_table.find(id);
    if (it == regex_table.end())
        return "";

    return it->second;
}

void output(const smatch& what, int &depth)
{
    char prefix = '-';
    if (!what)
        prefix = '='; // the not matched regex.

    string regex_name;
    regex_name = get_regex_name(what.regex_id());

    int tmp = depth;
    for (; tmp > 0; tmp--) { out << prefix; }

    out << "[";
    if (!regex_name.empty())
        out << regex_name;
    else
        out << what.regex_id();
    out << "]";
    out << what[0] << endl;

    const smatch::nested_results_type& nested = what.nested_results();

    auto it = nested.begin();
    for (; it != nested.end(); it++)
    {
        const smatch& in = *it;

        depth++;
        output(*it, depth);
        depth--;
    }
}

int main()
{
    // grammar definition
    _wsp_ = keep(+_s);
    _owsp_ = keep(*_s);

    _string_ = keep(+(boost::xpressive::range('\x80', '\xFF') | _w | '_') >> !as_xpr("*"))
        [check(str(_) != "AND" && str(_) != "OR" && str(_) != "NOT")];

    _near_num_ = keep(+_d);
    _near_ = keep(_string_ >> *(_wsp_ >> "NEAR" >> _owsp_ >> "/" >> _owsp_ >> _near_num_ >> _wsp_ >> _string_));
    _near_blob_ = _near_ | by_ref(_near_group_);
    _near_not_ = keep(_near_blob_ >> *(_wsp_ >> "NOT" >> _wsp_ >> _near_blob_));
    _near_and_ = keep(_near_not_ >> *(_wsp_ >> !("AND" >> _wsp_) >> _near_not_));
    _near_expr_ = keep(_near_and_ >> *(_wsp_ >> "OR" >> _wsp_ >> _near_and_));
    _near_group_ = "(" >> _owsp_ >> _near_expr_ >> _owsp_ >> ")";

    _attri_name_ = keep(+_w)[check(str(_) != "AND" && str(_) != "OR" && str(_) != "NOT")];
    _attri_value_ = _near_blob_;
    _attri_name_colon_ = as_xpr(":");
    _attri_ = (!(_attri_name_ >> _attri_name_colon_ >> _owsp_) >> _attri_value_) | _near_expr_ ;

    _attri_blob_ = _attri_ | by_ref(_attri_group_);
    _attri_not_ = _attri_blob_ >> keep(*(_wsp_ >> "NOT" >> _wsp_ >> _attri_blob_));     // here! keep()
    _attri_and_ = _attri_not_ >> keep(*(_wsp_ >> !("AND" >> _wsp_) >> _attri_not_));    // here! keep()
    _attri_expr_ = _attri_and_ >> keep(*(_wsp_ >> "OR" >> _wsp_ >> _attri_and_));       // here! keep()

    _attri_group_ = "(" >> _owsp_ >> _attri_expr_ >> _owsp_ >> ")";

    update_regex_table();

    // test
    string input = "a b c d e f g h i poi_admin_level_1:a NOT poi_category_id:722 NOT poi_category_id:723 NOT poi_category_id:1";

    smatch what;

    if (regex_match(input, what, _attri_expr_))
    {
        int depth = 0;
        out << "OK!" << endl;
        output(what, depth);
    }
    else
    {
        out << "ERROR!" << endl;
        if (regex_search(input, what, _attri_expr_))
        {
            int depth = 0;
            output(what, depth);
        }
    }
    out << "============================================" << endl;


    return 0;
}

0 个答案:

没有答案