正则表达式不显示IPv6匹配的正确结果

时间:2014-08-20 15:05:38

标签: c++ regex c++11 sample

为什么在此示例的控制台中未正确显示第二个本地化字符串?

Found 3 words IPv6
2001:0db8:0000:0000:0000:ff00:0042:8329
2001:0db8::   --- wrong output is here
::1
#include <iostream>
#include <iterator>
#include <string>
#include <regex>

int main()
{
    std::string s = "ipv4 model 127.0.0.1 "
        "live 2001:0db8:0000:0000:0000:ff00:0042:8329 "
        "another 2001:0db8::ff00:0042:8329 "
        "zip form ::1 ";

    std::regex ex_ipv6 ("(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))");
    auto words_ipv6_begin = 
        std::sregex_iterator(s.begin(), s.end(), ex_ipv6);
    auto words_ipv6_end = std::sregex_iterator();

    if (std::regex_search(s, ex_ipv6)) 
    {
        std::cout << "Found "
                  << std::distance(words_ipv6_begin, words_ipv6_end)
                  << " words IPv6\n";

        for (;words_ipv6_begin != words_ipv6_end; words_ipv6_begin++)
        {
            std::cout <<  (*words_ipv6_begin).str() << "\n";
        }
    }
}

1 个答案:

答案 0 :(得分:1)

您的前缀检查比它应该更早地接受字符串。表达式组([0-9a-fA-F]{1,4}:){1,7}:将跳转前的地址标记为有效,消耗字符串,并且检查::ff00:0042:8329不匹配任何剩余模式(如果确实存在,则也将被视为单独的地址)。

关于正则表达式用法的一些注意事项。 {7,7}可以简单地写为{7}。当表示为base16时,IPv6地址应为[0-9a-f],因此您可以放弃[A-F]检查。

#include <algorithm>
#include <iostream>
#include <regex>
#include <string>

// RFC5952 outlines canonical formatting for rendering IPv6 addresses as 
// text. Hex values in an address SHOULD be lowercase.  Addresses can be
// shortened ONCE using the symbol '::'.  Whitespace is actually processed
// as part of the pattern, so use \\s to match whitespace.

int main() {
  std::string ipv6 = 
    "(?:"
    // For the first 6 fields, match addresses with no jump (::)...
    "  (?:                                              (?:[0-9a-f]{1,4}:){6}"
    // ...or a jump.
    "  |                                             :: (?:[0-9a-f]{1,4}:){5}"
    "  | (?:                         [0-9a-f]{1,4})? :: (?:[0-9a-f]{1,4}:){4}"
    "  | (?: (?:[0-9a-f]{1,4}:){0,1} [0-9a-f]{1,4})? :: (?:[0-9a-f]{1,4}:){3}"
    "  | (?: (?:[0-9a-f]{1,4}:){0,2} [0-9a-f]{1,4})? :: (?:[0-9a-f]{1,4}:){2}"
    "  | (?: (?:[0-9a-f]{1,4}:){0,3} [0-9a-f]{1,4})? :: (?:[0-9a-f]{1,4}:)   "
    "  | (?: (?:[0-9a-f]{1,4}:){0,4} [0-9a-f]{1,4})? ::                      "
    "  )                                                                     "
    // Match the base10/16 addresses with no jump (suffix of above).
    "  (?: [0-9a-f]{1,4} : [0-9a-f]{1,4}                                     "
    "      | (?: (?: 25[0-5] | 2[0-4][0-9] | [01]?[0-9]?[0-9])\\.){3}        "
    "        (?: (?: 25[0-5] | 2[0-4][0-9] | [01]?[0-9]?[0-9]))              "
    "  )                                                                     "
    // Not any above. Check to see if jump is between last 2 fields of addr.
    "  | (?: (?:[0-9a-f]{1,4}:){0,5} [0-9a-f]{1,4})? :: [0-9a-f]{1,4}        "
    "  | (?: (?:[0-9a-f]{1,4}:){0,6} [0-9a-f]{1,4})? ::                      "
    ")";
  // End of ipv6 string pattern.

  // Convert readable pattern above into the applicable regex pattern.
  ipv6.erase(std::remove_if(ipv6.begin(), ipv6.end(), ::isspace), ipv6.cend());

  std::regex ipv6_pattern(ipv6);
  const std::string test = "ipv4 model 127.0.0.1 "
                           "live 2001:0db8:0000:0000:0000:ff00:0042:8329 "
                           "another 2001:0db8::ff00:0042:8329 "
                           "zip form ::1 ";
  auto result = std::sregex_iterator(test.cbegin(), test.cend(), ipv6_pattern);
  const auto results_end = std::sregex_iterator();

  if (result != results_end) {
    std::cout << "Found "
              << std::distance(result, results_end)
              << " IPv6 address matches.\n";

    while (result != results_end) {
      std::cout << (*result++).str() << '\n';
    }
  }
}

控制台:

Found 3 IPv6 address matches. 
2001:0db8:0000:0000:0000:ff00:0042:8329
2001:0db8::ff00:0042:8329
::1
相关问题