我有以下字符串:
arg1('value1') arg2('value '')2') arg3('user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21')
提取值的正则表达式如下:
boost::regex re_arg_values("('[^']*(?:''[^']*)*'[^)]*)");
上面的正则表达式正确地提取了值。但是当我包含逗号时,代码会失败。例如:
arg1('value1') arg2('value '')2') arg3('user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21**,**')
如何修改此正则表达式以包含逗号? 仅供参考。该值可以包含空格,特殊字符以及制表符。代码在CPP中。
提前致谢。
答案 0 :(得分:1)
我不会在这里使用正则表达式。
目标必须是解析值,毫无疑问,它们将具有您需要解释的有用值。
我会设计一个数据结构,如:
#include <map>
namespace Config {
using Key = std::string;
using Value = boost::variant<int, std::string, bool>;
using Setting = std::pair<Key, Value>;
using Settings = std::map<Key, Value>;
}
为此,您可以使用Boost Spirit 1:1编写解析器:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
namespace Parser {
using It = std::string::const_iterator;
using namespace Config;
namespace qi = boost::spirit::qi;
using Skip = qi::blank_type;
qi::rule<It, std::string()> quoted_ = "'" >> *(
"'" >> qi::char_("'") // double ''
| '\\' >> qi::char_ // any character escaped
| ~qi::char_("'") // non-quotes
) >> "'";
qi::rule<It, Key()> key_ = +qi::char_("a-zA-Z0-9_"); // for example
qi::rule<It, Value()> value_ = qi::int_ | quoted_ | qi::bool_;
qi::rule<It, Setting(), Skip> setting_ = key_ >> '(' >> value_ >> ')';
qi::rule<It, Settings()> settings_ = qi::skip(qi::blank) [*setting_];
}
注意这个
Value
在取消转义后包含“真实”字符串space_type
)您可以像以下一样使用它:
int main() {
std::string const input = R"( arg1('value1') arg2('value '')2') arg3('user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21**,**'))";
Config::Settings map;
if (parse(input.begin(), input.end(), Parser::settings_, map)) {
for(auto& entry : map)
std::cout << "config setting {" << entry.first << ", " << entry.second << "}\n";
}
}
打印
config setting {arg1, value1}
config setting {arg2, value ')2}
config setting {arg3, user'~!@#$%^&*_~!@#$%^&"*_-=+[{]}|;:<.>?21**,**}
<强> Live On Coliru 强>
#include <boost/spirit/include/qi.hpp>
#include <map>
#include <boost/fusion/adapted/std_pair.hpp>
namespace Config {
using Key = std::string;
using Value = boost::variant<int, std::string, bool>;
using Setting = std::pair<Key, Value>;
using Settings = std::map<Key, Value>;
}
namespace Parser {
using It = std::string::const_iterator;
using namespace Config;
namespace qi = boost::spirit::qi;
using Skip = qi::blank_type;
qi::rule<It, std::string()> quoted_ = "'" >> *(
"'" >> qi::char_("'") // double ''
| '\\' >> qi::char_ // any character escaped
| ~qi::char_("'") // non-quotes
) >> "'";
qi::rule<It, Key()> key_ = +qi::char_("a-zA-Z0-9_"); // for example
qi::rule<It, Value()> value_ = qi::int_ | quoted_ | qi::bool_;
qi::rule<It, Setting(), Skip> setting_ = key_ >> '(' >> value_ >> ')';
qi::rule<It, Settings()> settings_ = qi::skip(qi::blank) [*setting_];
}
int main() {
std::string const input = R"( arg1('value1') arg2('value '')2') arg3('user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21**,**'))";
Config::Settings map;
if (parse(input.begin(), input.end(), Parser::settings_, map)) {
for(auto& entry : map)
std::cout << "config setting {" << entry.first << ", " << entry.second << "}\n";
}
}
为了比较,这里是“相同”但使用正则表达式:
<强> Live On Coliru 强>
#include <boost/regex.hpp>
#include <boost/range/iterator_range.hpp>
#include <iostream>
#include <map>
namespace Config {
using Key = std::string;
using RawValue = std::string;
using Settings = std::map<Key, RawValue>;
Settings parse(std::string const& input) {
Settings settings;
boost::regex re(R"((\w+)\(('.*?')\))");
auto f = boost::make_regex_iterator(input, re);
for (auto& match : boost::make_iterator_range(f, {}))
settings.emplace(match[1].str(), match[2].str());
return settings;
}
}
int main() {
std::string const input = R"( arg1('value1') arg2('value '')2') arg3('user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21**,**'))";
Config::Settings map = Config::parse(input);
for(auto& entry : map)
std::cout << "config setting {" << entry.first << ", " << entry.second << "}\n";
}
打印
config setting {arg1, 'value1'}
config setting {arg2, 'value ''}
config setting {arg3, 'user\'~!@#$%^&*_~!@#$%^&"*_-=+[{]}\|;:<.>?21**,**'}
注意: