这个正则表达式会更有效吗?

时间:2009-11-11 15:28:26

标签: javascript regex

我修改了一个正则表达式here。我需要更改它,因为我需要它来匹配以下附加条件:

  1. 仅有月份和年份的日期
  2. 表格中的完整日期mm dd,yyyy
  3. 仅限年份的日期
  4. 输入无关信息(如Vol.51,No。1,2008年3月)
  5. 这是我到目前为止所拥有的。我用RegexBuddy来帮助我解析逻辑,但它太复杂了我不确定我有最有效的解决方案。

    \b(?:((Jan(uary)?|Feb(ruary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?)|((((Jan(uary)?|Ma(r(ch)?|y)|Jul(y)?|Aug(ust)?|Oct(ober)?|Dec(ember)?) 31)|((Jan(uary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?) (0?[1-9]|([12]\d)|30))|(Feb(ruary)? (0?[1-9]|1\d|2[0-8]|(29(?=, ((1[6-9]|[2-9]\d)(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))))),)) ((1[6-9]|[2-9]\d)\d{2}))|((1[6-9]|[2-9]\d)\d{2})

    有什么方法可以保留原始正则表达式和我的附加条件的功能吗?

    这是我实现此代码的代码,如果它可以帮助您了解我正在尝试做什么。 parseDate函数的输出应该是“yyyy mm dd”形式的字符串日期(即示例4应输出“2008 Mar”):

    //generalized RegEx function
    function returnRegExMatch(ex,haystack) {
      var needle = ex.exec(haystack);
      if (needle) { return needle[0]; }
    }
    
    // date extraction (uses returnRegExMatch)
    function parseDate(date) {
      //strip anything other than a valid date
      var dateRe = /\b(?:((Jan(uary)?|Feb(ruary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?)|((((Jan(uary)?|Ma(r(ch)?|y)|Jul(y)?|Aug(ust)?|Oct(ober)?|Dec(ember)?) 31)|((Jan(uary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?) (0?[1-9]|([12]\d)|30))|(Feb(ruary)? (0?[1-9]|1\d|2[0-8]|(29(?=, ((1[6-9]|[2-9]\d)(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))))),)) ((1[6-9]|[2-9]\d)\d{2}))|((1[6-9]|[2-9]\d)\d{2})/;
      date = returnRegExMatch(dateRe,date);
    
      var yearRe = /[0-9][0-9][0-9][0-9]/;
      var monthRe = /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/;
      var dayRe = /[0-9]?[0-9],/;
    
      var year = returnRegExMatch(yearRe,date);
      var month = returnRegExMatch(monthRe,date);
      var day = parseInt(returnRegExMatch(dayRe,date),10);
    
      var dateReturned = "";
      if (year) { dateReturned = year; }
      if (month) { dateReturned = dateReturned + " " + month; }
      if (month && day) { dateReturned = dateReturned + " " + day; }
    
      return dateReturned;
    }
    

    谢谢!

    修改 感谢所有花时间回应的人。你们做了我所希望的,指出了我实施中最荒谬的事情。我决定简化主要的正则表达式。结果如下:

    \b(?:(?:Jan(?:uary)?|Feb(?:ruary)?|Ma(?:r(?:ch)?|y)|Apr(?:il)?|Ju(?:(?:ly?)|(?:ne?))|Aug(?:ust)?|Oct(?:ober)?|(?:Sept|Nov|Dec)(?:ember)?) (?:\d{1,2}, )?)?\d{4}
    

    这并不担心根据闰年或其他情况检测无效日期。 @Bart让我确信这可能最好用原生JS而不是正则表达式完成。感谢@Tim也指出需要非捕获括号。

    如果有人对如何改进这个正则表达式有进一步的建议,请开火。

2 个答案:

答案 0 :(得分:4)

我必须说我在训练这个怪物时遇到了麻烦:)

立即显而易见的两件事:

  1. 如果您以后没有计划使用他们的(子)匹配,那么使用非捕获括号(?:...)比使用常规括号更有效。

  2. 如果您将括号嵌套到十个级别,则表示出错。它可能会起作用,但这是一个维持的b * tch。或者理解。

  3. 我会检查RegexMagic是否有更好的方式来获得你需要的东西。但是,由于没有人强迫你在一个正则表达式中做你想要做的所有事情,为什么不把问题分解成组件,每个使用一个更简单的正则表达式呢?

答案 1 :(得分:3)

这样的事情怎么样:

#!/usr/bin/js

function getMonth(monthStr) {
    var monthMap = new Array();
    monthMap['jan'] = monthMap['january']   = 1;
    monthMap['feb'] = monthMap['february']  = 2;
    monthMap['mar'] = monthMap['march']     = 3;
    monthMap['apr'] = monthMap['april']     = 4;
    monthMap['may']                         = 5;
    monthMap['jun'] = monthMap['june']      = 6;
    monthMap['jul'] = monthMap['july']      = 7;
    monthMap['aug'] = monthMap['august']    = 8;
    monthMap['sep'] = monthMap['september'] = 9;
    monthMap['oct'] = monthMap['october']   = 10;
    monthMap['nov'] = monthMap['november']  = 11;
    monthMap['dec'] = monthMap['december']  = 12;
    return monthMap[monthStr.toLowerCase()];
}

function isLeapYear(year) {
    return year%400 == 0 || (year%100 != 0 && year%4 == 0);
}

function isPositiveNumber(str) {
    return str.match(/^\d+$/);
}

function parseDate(date) {
    var tokens = date.split(/,?\s+/);

    var m = getMonth(tokens[0]);
    var d = tokens[1];
    var y = tokens[2];

    if(!isPositiveNumber(d) || !m || !isPositiveNumber(y)) return false;

    if(
        ((m==4 || m==6 || m==9 || m==11) && d <= 30) ||
        (m==2 && ((isLeapYear(y) && d <= 29) || d <= 28)) ||
        ((m==1 || m==3 || m==5 || m==7 || m==8 || m==10 || m==12) && d <= 31)
    ) {
        var dateObj = new Date();
        dateObj.setFullYear(y, m-1, d);
        return dateObj;
    }

    return false;
}

var tests = new Array('January 31, 2009', 'Nov 31, 2009', 'Feb 29, 2001', 'Feb 29, 2000', 'Feb 29, 1900');

for(var i in tests) {
    var date = parseDate(tests[i]);
    print(date ? tests[i]+" is a valid date, parsed as: "+date : tests[i]+" invalid");
}

输出:

January 31, 2009 is a valid date, parsed as: Sat Jan 31 2009 20:31:33 GMT+0100 (CET)
Nov 31, 2009 invalid
Feb 29, 2001 invalid
Feb 29, 2000 is a valid date, parsed as: Tue Feb 29 2000 20:31:33 GMT+0100 (CET)
Feb 29, 1900 invalid