当我尝试使用boost 1.68正则表达式搜索带有模式的内容时,我写道:
#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include <string>
#include <boost/regex.hpp>
int main(int argc, char** argv)
{
const std::string pattern("\
(?(DEFINE)(?'NAMESPACE'\\w*::))(?#r)\
(?(DEFINE)(?'CONSTANT'(\"(?:[^\"\\\\]|\\\\.)*\")|(\\d+\\.?\\d*f?)))(?#r)\
(?(DEFINE)(?'VARIABLE'(?P>NAMESPACE)*([A-Za-z_]\\w*\\.)*[A-Za-z_]\\w*))(?#r)\
(?(DEFINE)(?'OPERAND'(\\+|-)*((?P>VARIABLE)|(?P>CONSTANT))))(?#r)\
(?(DEFINE)(?'EXPRESSION'\\s*(?P>OPERAND)\\s*(\\s*[\\*\\+-\\/]\\s*(?P>OPERAND))*))(?#r)\
(?(DEFINE)(?'ARGUMENTS'(?P>EXPRESSION)(,\\s*(?P>EXPRESSION))*))(?#r)\
(?(DEFINE)(?'FUNCTION_CALL'(?P>VARIABLE)\\(\\s*(?P>ARGUMENTS)?\\s*\\)))(?#r)\
(?P>FUNCTION_CALL)");
std::cout << "pattern: " << pattern << std::endl;
boost::regex simple_function(pattern, boost::regex_constants::perl);
std::ifstream file("flask");
if (file.is_open()) {
std::string context((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
boost::smatch results;
boost::match_flag_type flags = boost::match_default | boost::match_single_line;
auto start = context.cbegin();
auto end = context.cend();
int line_n = 0;
try {
while (start < end && boost::regex_search(start, end, results, simple_function, flags)) {
std::cout << '#' << line_n++ << ' ';
std::cout << results[0] << std::endl;
start = (results[0].length() == 0) ? results[0].first + 1 : results[0].second;
}
}
catch (...) {
std::cout << "exception throwed." << std::endl;
}
}
return 0;
}
,它将在内存位置boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<std::runtime_error>>
中抛出:0x00000073378FE638
。这是error_stack
。
但是我不知道为什么,我用相同的regex101和模式在context和regextester上测试了我的表达式,只有我的程序会失败并抛出异常。我做错了什么还是误解了使用boost regex的方式?有什么办法可以避免error_stack
?
答案 0 :(得分:1)
有趣。为此,我不得不学习正则表达式的全新领域。为此表示敬意。
问题是Recursive Expressions。您需要非常确定,模式不会轻易自由地递归,因此您不会陷入无限递归,或者就像这里所说的那样,“公正”递归在输入很长的情况下会很容易地深入。 >
所以,首先我整理一下:
const std::string pattern(
R"((?(DEFINE)(?'NAMESPACE'\w*::)))"
R"((?(DEFINE)(?'CONSTANT'("(?:[^"\\]|\\.)*")|(\d+\.?\d*f?))))"
R"((?(DEFINE)(?'VARIABLE'(?P>NAMESPACE)*([A-Za-z_]\w*\.)*[A-Za-z_]\w*)))"
R"((?(DEFINE)(?'OPERAND'(\+|-)*((?P>VARIABLE)|(?P>CONSTANT)))))"
R"((?(DEFINE)(?'EXPRESSION'\s*(?P>OPERAND)\s*(\s*[\*\+-\/]\s*(?P>OPERAND))*)))"
R"((?(DEFINE)(?'ARGUMENTS'(?P>EXPRESSION)(,\s*(?P>EXPRESSION))*)))"
R"((?(DEFINE)(?'FUNCTION_CALL'(?P>VARIABLE)\(\s*(?P>ARGUMENTS)?\s*\))))"
R"((?P>FUNCTION_CALL))");
现在,我开始“理解”模式,我决定不使用正则表达式进行语法¹,然后将其重写为Spirit X3:
namespace rules {
using namespace x3;
auto WORD = (alnum | char_('_'));
auto NAMESPACE = +WORD >> "::";
auto CONSTANT = ( lexeme [ '"' >> *~char_('"') >> '"' ] | double_ );
auto ident = lexeme [ char_("A-Za-z_") >> *WORD ];
auto VARIABLE = *NAMESPACE >> ident % '.';
auto OPERAND = *(char_("+-")) >> (VARIABLE | CONSTANT);
auto EXPRESSION = OPERAND % char_("*+/-");
auto ARGUMENTS = EXPRESSION % ',';
auto FUNCTION_CALL = VARIABLE >> '(' >> -ARGUMENTS >> ')';
auto simple_function = rule<struct simple_function_, std::string> {"simple_function"}
= skip(space) [ x3::raw[FUNCTION_CALL] ];
}
现在,由于在相关性更高的位置接受了空格(skip
与lexeme
²),因此此方法更加准确。而且,它显然没有遭受严重的回溯问题,显然:
#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include <string>
#include <boost/regex.hpp>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace rules {
using namespace x3;
auto WORD = (alnum | char_('_'));
auto NAMESPACE = +WORD >> "::";
auto CONSTANT = ( lexeme [ '"' >> *~char_('"') >> '"' ] | double_ );
auto ident = lexeme [ char_("A-Za-z_") >> *WORD ];
auto VARIABLE = *NAMESPACE >> ident % '.';
auto OPERAND = *(char_("+-")) >> (VARIABLE | CONSTANT);
auto EXPRESSION = OPERAND % char_("*+/-");
auto ARGUMENTS = EXPRESSION % ',';
auto FUNCTION_CALL = VARIABLE >> '(' >> -ARGUMENTS >> ')';
auto simple_function = rule<struct simple_function_, std::string> {"simple_function"}
= skip(space) [ x3::raw[FUNCTION_CALL] ];
}
int main()
{
std::ifstream file("flask");
std::string const context(std::istreambuf_iterator<char>(file), {});
std::vector<std::string> calls;
parse(context.begin(), context.end(), *x3::seek[rules::simple_function], calls);
for (auto& call : calls) {
std::cout << call << "\n";
}
}
哪些印刷品
anno::copyright_notice("XXXXX")
anno::author("Someone")
anno::contributor("")
state::texture_coordinate(0)
state::texture_tangent_u(0)
state::texture_tangent_v(0)
¹我知道Perl6很棒,但是仍然
仅显示与Spirit X3匹配的文本之外的内容,这是对快速端口的一点改进,该端口显示了如何使用相同的规则来解析为强类型的AST数据类型。
所做的更改:
lexeme
标识符的错误AST::Variable
,AST::Literal
(用于字符串或数字文字)和AST::FunctionCall
"A\"B"
现在将被正确解析为包含AST::Literal
的{{1}}。A"B
),您实际上可以看到已解析这些文字#define BOOST_SPIRIT_X3_DEBUG
同时打印“源”解析和“ AST”解析:
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <fstream>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
namespace x3 = boost::spirit::x3;
namespace AST {
struct Variable {
std::vector<std::string> namespaces, nested_objects;
friend std::ostream& operator<<(std::ostream& os, Variable const& v) {
for (auto ns : v.namespaces)
os << '[' << ns << "]::";
bool first = true;
for (auto obj : v.nested_objects) {
os << (first?"":".") << '[' << obj << ']';
first = false;
}
return os;
}
};
using Literal = boost::variant<std::string, double>;
struct FunctionCall {
Variable name;
std::vector<std::string> arguments;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::Variable, namespaces, nested_objects)
BOOST_FUSION_ADAPT_STRUCT(AST::FunctionCall, name, arguments)
namespace rules {
using namespace x3;
auto ident = rule<struct ident_, std::string> {"ident"}
= lexeme [ raw [ (alpha|'_') >> *(alnum|'_') ] ];
auto namespace_ = rule<struct namespace_, std::string> {"namespace_"}
= ident >> "::";
auto quoted_str = rule<struct quoted_str_, std::string> {"quoted_str"}
= lexeme [ '"' >> *('\\' >> char_ | ~char_('"')) >> '"' ];
auto constant = rule<struct constant_, AST::Literal> {"constant"}
= quoted_str | double_;
auto variable = rule<struct variable_, AST::Variable> {"variable"}
= *namespace_ >> ident % '.';
auto operand = rule<struct operand_> {"operand"}
= *char_("+-") >> (variable | constant);
auto expression = rule<struct expression_, std::string> {"expression"}
= raw [ operand % char_("*+/-") ];
auto arguments = expression % ',';
auto function_call = rule<struct function_call_, AST::FunctionCall> {"function_call"}
= variable >> '(' >> -arguments >> ')';
auto simple_function = skip(space) [ function_call ];
}
int main()
{
// parsing the raw sources out as string
{
std::ifstream file("flask");
boost::spirit::istream_iterator f(file), l;
std::vector<std::string> src;
parse(f, l, *x3::seek[x3::raw[rules::simple_function]], src);
for (auto& call : src)
std::cout << call << "\n";
}
// parsing AST::FunctionCall objects
{
std::ifstream file("flask");
boost::spirit::istream_iterator f(file), l;
std::vector<AST::FunctionCall> parsed;
parse(f, l, *x3::seek[rules::simple_function], parsed);
for (auto& call : parsed) {
std::cout << call.name << "\n";
for (auto& argument : call.arguments)
std::cout << " - argument: " << argument << "\n";
}
}
}