c ++正则表达式:如何捕获子匹配的向量

时间:2015-04-06 12:12:50

标签: c++ regex parsing cookies boost

让我们假设它的ECMAScript,c ++ 11或boost :: xpressive的正则表达式 在输入时,我有一个格式为key1=value1;key2=value2;key3=value3的字符串,所以正则表达式为

((w+)\=(w+)\;)*((\w)+\=(\w)+)\;?

我想把所有的键和值都放到两个std :: vector< std :: string> 所以子匹配1,2,3,4拥有我所需要的一切。并且存在问题 - 可能\ 1是阵列。我没有看到任何界面,我怎么能得到c ++正则表达式和boost :: xpressive。可能它在这里但不是那么明显。

1 个答案:

答案 0 :(得分:2)

虽然您从未回复过要求的确切性质,但我想向您展示我在http://tools.ietf.org/html/rfc2109§4.2中所述的/ {/ 1}}响应标头中提出的内容。 2“Set-Cookie语法”

请注意

  • 这是指规范的直接翻译。
  • 我试图严格遵守规范(关于日期格式,数字格式,区分大小写等)。
  • 另请注意,这借鉴了Boost Spirit库的丰富经验,因此很可能不是每个人都可以通过这种方式将规范转换为工作代码
  • 此外,通过坚持规范,许多现实生活中的cookie将被拒绝(例如Set-Cookie之后的空间不足)。使其成为现实证明需要进行一些调整(具体而言,更改船长类型并将子解析器表达式指定为; s
  • 最后,关键是:这只是其中一个RFC 。还有其他版本,它们指定了此处未涉及的变体。在现实生活中,HTTP客户端必须借助启发式方法来完善解密cookie的艺术。它们通常根据版本切换到完全不同的解析代码。

      

    轶事:就在今天,我追踪了我们软件中的一个错误,即OpenJDK 6吞噬了Cookie。毫无例外。你猜对了:由于功能不受支持。当然,Java 6已经过时了,但在我们的下一个版本发布之前,我们不会放弃对它的支持。所以......我们编写了更多的解析代码。

变式1:无界属性列表

这是代码中最通用的版本,其中AST包含变量属性的向量(如语法所示):

lexeme

<强> Live On Coliru

using expires_av   = attribute<attr::expires, rfc1123date>;
using max_age_av   = attribute<attr::max_age, int>;
using domain_av    = attribute<attr::domain>;
using path_av      = attribute<attr::path>;
using secure_av    = attribute<attr::secure, void>;
using httponly_av  = attribute<attr::httponly, void>;
using extension_av = attribute<attr::extension>;

using cookie_av = boost::variant<
        expires_av,
        max_age_av,
        domain_av,
        path_av,
        secure_av,
        httponly_av,
        extension_av
    >;

struct cookie_pair {
    std::string name, value;
};

struct cookie {
    cookie_pair pair;
    std::list<cookie_av> attributes;
};

变式2:固定的可选属性集

这是一个在现实生活中可能更方便的版本。因为,虽然语法不限制说,//#define BOOST_SPIRIT_DEBUG #include <boost/fusion/adapted/struct.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/date_time/posix_time/posix_time_io.hpp> namespace qi = boost::spirit::qi; #include <boost/date_time.hpp> #include <boost/date_time/local_time/local_time.hpp> #include <boost/date_time/posix_time/posix_time.hpp> #include <boost/filesystem/operations.hpp> namespace http { class datetime { using clock = boost::local_time::local_sec_clock; boost::local_time::local_date_time m_dt; public: datetime() : m_dt(clock::local_time(boost::local_time::time_zone_ptr())) { } friend std::ostream& operator<<(std::ostream& os, datetime const& o) { std::ostream imbued(os.rdbuf()); imbued.imbue(std::locale(imbued.getloc(), new boost::local_time::local_time_facet("%a, %d %b %Y %H:%M:%S GMT"))); imbued << o.m_dt; return os; } friend std::istream& operator>>(std::istream& is, datetime& o) { std::istream imbued(is.rdbuf()); imbued.imbue(std::locale(std::locale::classic(), new boost::local_time::local_time_input_facet("%a, %d %b %Y %H:%M:%S GMT"))); imbued >> o.m_dt; return is; } }; } namespace ast { using rfc1123date = http::datetime; // rfc1123 formatting namespace attr { struct expires { static constexpr char const* name() { return "expires"; } }; struct max_age { static constexpr char const* name() { return "max-age"; } }; struct domain { static constexpr char const* name() { return "domain"; } }; struct path { static constexpr char const* name() { return "path"; } }; struct secure { static constexpr char const* name() { return "secure"; } }; struct httponly { static constexpr char const* name() { return "httponly"; } }; struct extension { static constexpr char const* name() { return "extension"; } }; } template <typename tag, typename V = std::string> struct attribute { V value; attribute(V value = {}) : value(std::move(value)) {} friend std::ostream& operator<<(std::ostream& os, attribute const& attr) { return os << "[ " << tag::name() << "=" << attr.value << " ]"; } }; template <typename tag> struct attribute<tag, void> { //attribute(bool = true) {}; friend std::ostream& operator<<(std::ostream& os, attribute const&) { return os << "[ attribute: present ]"; } }; using expires_av = attribute<attr::expires, rfc1123date>; using max_age_av = attribute<attr::max_age, int>; using domain_av = attribute<attr::domain>; using path_av = attribute<attr::path>; using secure_av = attribute<attr::secure, void>; using httponly_av = attribute<attr::httponly, void>; using extension_av = attribute<attr::extension>; using cookie_av = boost::variant< expires_av, max_age_av, domain_av, path_av, secure_av, httponly_av, extension_av >; struct cookie_pair { std::string name, value; }; struct cookie { cookie_pair pair; std::list<cookie_av> attributes; }; } BOOST_FUSION_ADAPT_STRUCT(ast::cookie_pair, (std::string, name) (std::string, value) ) BOOST_FUSION_ADAPT_STRUCT(ast::cookie, (ast::cookie_pair, pair) (std::list<ast::cookie_av>, attributes) ) namespace ast { static inline std::ostream& operator<<(std::ostream& os, std::list<cookie_av> const&v) { os << "{"; std::copy(v.begin(), v.end(), std::ostream_iterator<cookie_av>(os, "; ")); return os << "}"; } static inline std::ostream& operator<<(std::ostream& os, cookie_pair const&v) { return os << boost::fusion::as_vector(v); } static inline std::ostream& operator<<(std::ostream& os, cookie const&v) { return os << boost::fusion::as_vector(v); } } template <typename It> struct set_cookie : qi::grammar<It, ast::cookie()> { set_cookie() : set_cookie::base_type(start) { using namespace qi; using boost::proto::deep_copy; ///////////////////////////////////////////////////////////////// // RFC2616 2.2 token #define RFC_CTLs "\x01-\x1f\x7f" constexpr char DQUOTE = '"'; token = +(~char_(RFC_CTLs /*separators:*/ "()<>@,;:\\\"/[]?={} \t") - '\0'); ///////////////////////////////////////////////////////////////// // RFC6265 4.1.1. Syntax (set-cookie) set_cookie_header = no_case["set-cookie: "] >> set_cookie_string; set_cookie_string = cookie_pair >> *("; " >> cookie_av); cookie_pair = cookie_name >> '=' >> cookie_value; cookie_name = token; auto cookie_octet = deep_copy(char_("\x21" "\x23-\x2B" "\x2D-\x3A" "\x3C-\x5B" "\x5D-\x7E")); cookie_value = *cookie_octet | (DQUOTE >> *cookie_octet >> DQUOTE); // ; US-ASCII characters excluding CTLs, // ; whitespace DQUOTE, comma, semicolon, // ; and backslash cookie_av = expires_av | max_age_av | domain_av | path_av | secure_av | httponly_av | extension_av ; expires_av = no_case["expires="] >> sane_cookie_date; sane_cookie_date = stream; // TODO <rfc1123_date, defined in [RFC2616], Section 3.3.1> max_age_av = no_case["max-age="] >> !char_('0') >> uint_; // ; In practice, both expires_av and max_age_av // ; are limited to dates representable by the // ; user agent. // non_zero_digit = %x31-39 // ; digits 1 through 9 domain_av = no_case["domain="] >> domain_value; domain_value = raw [ (alpha >> *(alpha|digit|'-')) % '.']; // ; defined in [RFC1034], Section 3.5, as // ; enhanced by [RFC1123], Section 2.1 path_av = no_case["path="] >> path_value; path_value = *(~char_(RFC_CTLs ";") - '\0'); // <any CHAR except CTLs or ";"> secure_av = no_case["secure"] >> attr(ast::secure_av{}); httponly_av = no_case["httponly"] >> attr(ast::httponly_av{}); extension_av = as_string [*(~char_(RFC_CTLs ";") - '\0')]; // <any CHAR except CTLs or ";"> start = set_cookie_header; BOOST_SPIRIT_DEBUG_NODES( (start) (set_cookie_header) (set_cookie_string) (cookie_pair) (cookie_name) (cookie_value) (token) (cookie_av) (expires_av) (sane_cookie_date) (max_age_av) (domain_av) (domain_value) (path_av) (path_value) (secure_av) (httponly_av) (extension_av) ); #undef RFC_CTLs } private: qi::rule<It, ast::cookie()> start; qi::rule<It, std::string()> token, cookie_name, cookie_value, domain_value, path_value; qi::rule<It, ast::cookie()> set_cookie_header, set_cookie_string; qi::rule<It, ast::cookie_pair()> cookie_pair; qi::rule<It, ast::cookie_av()> cookie_av; qi::rule<It, ast::expires_av()> expires_av; qi::rule<It, ast::rfc1123date()> sane_cookie_date; qi::rule<It, ast::max_age_av()> max_age_av; // non_zero_digit; qi::rule<It, ast::domain_av()> domain_av; qi::rule<It, ast::path_av()> path_av; qi::rule<It, ast::secure_av()> secure_av; qi::rule<It, ast::httponly_av()> httponly_av; qi::rule<It, ast::extension_av()> extension_av; }; int main() { using It = std::string::const_iterator; for (std::string const s : { "Set-Cookie: name=value", "Set-Cookie: name=value; Path=/; Domain=domain.com", "set-cookie: name=value; path=/; domain=domain.com", //// not actually rfc 6265 conformant: //"Set-Cookie: name=value;path=/;domain=domain.com", // actually a wednesday: "Set-Cookie: name=value; path=/; domain=.mydomain.com; expires=Thu, 01-Jan-2070 00:00:10 GMT; comment=no_comment" }) { It f = s.begin(), l = s.end(); std::cout << "Parsing '" << s << "'\n"; ast::cookie cookie; bool ok = qi::parse(f,l,set_cookie<It>(),cookie); if (ok) { std::cout << " -- Parse success: " << cookie << "\n"; } else std::cout << " -- Parse failure\n"; if (f!=l) std::cout << " -- Remaining unparsed: '" << std::string(f,l) << "'\n"; } } Expires属性只出现1次,但实际上重复这些属性是没有意义的。所以,这是一个变体,它只是解析为HttpOnly属性的集合而不是

optional<>

<强> Live On Coliru

using domain_av = string_attribute;
using path_av = string_attribute;
using extension_av = string_attribute;

struct cookie_av {
    optional<expires_av> expires;
    optional<max_age_av> max_age;
    optional<domain_av> domain;
    optional<path_av> path;
    bool secure   = false;
    bool httponly = false;
    optional<extension_av> extension;
};

struct cookie_pair {
    std::string name, value;
};

struct cookie {
    cookie_pair pair;
    cookie_av   attributes;
};

摘要

Cookie解析是图书馆资料。这看似微不足道。但是你必须处理过多的(遗留的)RFC以及更多的实现怪癖。

Cookies不是一件容易的事。