使用Xpressive时出现问题。
我定义一个语法,然后使用regex_match()解析输入字符串。
regex_match()的结果为true,它将填充“ match_results”对象。
但是当我遍历match_results和nested_results时,会有一些nested_results不匹配
我将所有nested_results打印到控制台,而不匹配的结果以“ =”开头。
我怀疑这是由于我在语法定义中误用了keep()造成的。
有人能告诉我原因吗?为什么结果中会有不匹配的nested_results?
这是我的代码:
#include <string>
#include <map>
#include <iostream>
#include <assert.h>
#include <fstream>
#include <boost/xpressive/xpressive.hpp>
#include <boost/xpressive/regex_actions.hpp>
using namespace boost::xpressive;
using namespace std;
// attri
sregex _wsp_, _owsp_;
sregex _attri_, _attri_name_, _attri_name_colon_, _attri_value_;
sregex _near_, _near_expr_, _near_and_, _near_or_, _near_not_, _near_group_, _near_blob_, _near_num_;
sregex _string_;
sregex _a_, _b_, _c_;
// attri-expr
sregex _attri_not_, _attri_and_, _attri_or_;
sregex _attri_expr_, _attri_group_, _attri_blob_;
// debug
sregex _wsp1_, _wsp2_, _wsp3_;
sregex _owsp1_, _owsp2_, _owsp3_;
map<regex_id_type, string> regex_table;
ifstream in("input.txt");
//ofstream out("result.txt");
#define out (std::cout)
int lineno = 0;
void update_regex_table()
{
regex_table.clear();
regex_table[_wsp_.regex_id()] = "_wsp_";
regex_table[_owsp_.regex_id()] = "_owsp_";
regex_table[_attri_.regex_id()] = "_attri_";
regex_table[_attri_name_.regex_id()] = "_attri_name_";
regex_table[_attri_name_colon_.regex_id()] = "_attri_name_colon_";
regex_table[_attri_value_.regex_id()] = "_attri_value_";
regex_table[_near_.regex_id()] = "_near_";
regex_table[_near_expr_.regex_id()] = "_near_expr_";
regex_table[_near_and_.regex_id()] = "_near_and_";
regex_table[_near_or_.regex_id()] = "_near_or_";
regex_table[_near_not_.regex_id()] = "_near_not_";
regex_table[_near_group_.regex_id()] = "_near_group_";
regex_table[_near_blob_.regex_id()] = "_near_blob_";
regex_table[_near_num_.regex_id()] = "_near_num_";
regex_table[_string_.regex_id()] = "_string_";
regex_table[_attri_not_.regex_id()] = "_attri_not_";
regex_table[_attri_and_.regex_id()] = "_attri_and_";
regex_table[_attri_or_.regex_id()] = "_attri_or_";
regex_table[_attri_expr_.regex_id()] = "_attri_expr_";
regex_table[_attri_group_.regex_id()] = "_attri_group_";
regex_table[_attri_blob_.regex_id()] = "_attri_blob_";
regex_table[_wsp1_.regex_id()] = "_wsp1_";
regex_table[_wsp2_.regex_id()] = "_wsp2_";
regex_table[_wsp3_.regex_id()] = "_wsp3_";
regex_table[_owsp1_.regex_id()] = "_owsp1_";
regex_table[_owsp2_.regex_id()] = "_owsp2_";
regex_table[_owsp3_.regex_id()] = "_owsp3_";
}
const string get_regex_name(const regex_id_type &id)
{
map<regex_id_type, string>::const_iterator it = regex_table.find(id);
if (it == regex_table.end())
return "";
return it->second;
}
void output(const smatch& what, int &depth)
{
char prefix = '-';
if (!what)
prefix = '='; // the not matched regex.
string regex_name;
regex_name = get_regex_name(what.regex_id());
int tmp = depth;
for (; tmp > 0; tmp--) { out << prefix; }
out << "[";
if (!regex_name.empty())
out << regex_name;
else
out << what.regex_id();
out << "]";
out << what[0] << endl;
const smatch::nested_results_type& nested = what.nested_results();
auto it = nested.begin();
for (; it != nested.end(); it++)
{
const smatch& in = *it;
depth++;
output(*it, depth);
depth--;
}
}
int main()
{
// grammar definition
_wsp_ = keep(+_s);
_owsp_ = keep(*_s);
_string_ = keep(+(boost::xpressive::range('\x80', '\xFF') | _w | '_') >> !as_xpr("*"))
[check(str(_) != "AND" && str(_) != "OR" && str(_) != "NOT")];
_near_num_ = keep(+_d);
_near_ = keep(_string_ >> *(_wsp_ >> "NEAR" >> _owsp_ >> "/" >> _owsp_ >> _near_num_ >> _wsp_ >> _string_));
_near_blob_ = _near_ | by_ref(_near_group_);
_near_not_ = keep(_near_blob_ >> *(_wsp_ >> "NOT" >> _wsp_ >> _near_blob_));
_near_and_ = keep(_near_not_ >> *(_wsp_ >> !("AND" >> _wsp_) >> _near_not_));
_near_expr_ = keep(_near_and_ >> *(_wsp_ >> "OR" >> _wsp_ >> _near_and_));
_near_group_ = "(" >> _owsp_ >> _near_expr_ >> _owsp_ >> ")";
_attri_name_ = keep(+_w)[check(str(_) != "AND" && str(_) != "OR" && str(_) != "NOT")];
_attri_value_ = _near_blob_;
_attri_name_colon_ = as_xpr(":");
_attri_ = (!(_attri_name_ >> _attri_name_colon_ >> _owsp_) >> _attri_value_) | _near_expr_ ;
_attri_blob_ = _attri_ | by_ref(_attri_group_);
_attri_not_ = _attri_blob_ >> keep(*(_wsp_ >> "NOT" >> _wsp_ >> _attri_blob_)); // here! keep()
_attri_and_ = _attri_not_ >> keep(*(_wsp_ >> !("AND" >> _wsp_) >> _attri_not_)); // here! keep()
_attri_expr_ = _attri_and_ >> keep(*(_wsp_ >> "OR" >> _wsp_ >> _attri_and_)); // here! keep()
_attri_group_ = "(" >> _owsp_ >> _attri_expr_ >> _owsp_ >> ")";
update_regex_table();
// test
string input = "a b c d e f g h i poi_admin_level_1:a NOT poi_category_id:722 NOT poi_category_id:723 NOT poi_category_id:1";
smatch what;
if (regex_match(input, what, _attri_expr_))
{
int depth = 0;
out << "OK!" << endl;
output(what, depth);
}
else
{
out << "ERROR!" << endl;
if (regex_search(input, what, _attri_expr_))
{
int depth = 0;
output(what, depth);
}
}
out << "============================================" << endl;
return 0;
}