如何在旧的IE版本中使用String.split的捕获组?

时间:2015-02-05 10:57:33

标签: javascript regex split internet-explorer-7

var textt = "L'architecture du système d'information devient";
var pattern = "/(ARCH)/gi";
var array = textt.split(pattern);
console.log(array)

这导致:

[
    L',
    itecture du système d'information devient
]

预期的结果是:

[
    L',
    arch,
    itecture du système d'information devien
]

另一个例子

var textt = "ARCHIMAG";
var pattern = "/(ARCH)/gi";
var array = textt.split(pattern);
console.log(array)

结果:

[
    IMAG
]

,预期是:

[
    ARCH,
    IMAG
]

3 个答案:

答案 0 :(得分:1)

如果您使用功能测试和功能来替换非捕获分割,请尝试以下操作。它在加载脚本时测试支持并为 nonCaptureSplit 分配适当的函数,因此测试只进行一次。

如果您使用的不是字母字符或数字(例如,如果字符串中有标点符号),则需要转义模式。

被修改

如果缺乏对非捕获分割的支持,现在进行完全手动分割。

// Do case insensitive, non-capture split
var nonCaptureSplit = (function() {

  // Feature test for non-capturing split
  if ( 'ab'.split(/(a)/).length == 3) {
    return function (str, pattern) {
      var re = new RegExp('(' + pattern + ')','i');
      return str.split(re);
    };

  // Otherise, do it with a function
  } else {
    return function(str, pattern) {

      // Do case insensitive split
      var result = [];
      var ts = str.toLowerCase(); // copy of string in lower case
      var tp = pattern.toLowerCase();
      var first = true;

      while (ts.indexOf(tp) != -1) {
        var i = ts.indexOf(tp);

        // If first match is at the start, insert empty string + pattern
        // Trim pattern from front of temp strings
        if (i == 0 && first) {
          result.push('', pattern);
          ts = ts.substring(tp.length);
          str = str.substring(tp.length);

        // If match is at the end, append pattern and ''
        // Set temp string to '' (i.e. finished)
        } else if (i == ts.length - tp.length) {
          result.push(str.substr(0,i), pattern);
          ts = '';
          str = '';

        // Otherwise, append the next unmatched part
        // and pattern
        } else {
          result.push(str.substring(0,i), pattern);
          ts = ts.substring(i + pattern.length);
          str = str.substring(i + pattern.length);

        }
        first = false;
      }

      // Append remainder of string or '' if used, i.e. last match
      // must have been at end of string
      result.push( ts.length? str : '');

      return result;

    };
  }
}());

测试:

alert(nonCaptureSplit('wa', 'wa'));        // ,wa,
alert(nonCaptureSplit('qwqwaba', 'wa'));   // qwq,wa,ba
alert(nonCaptureSplit('qwqwaba', 'qw'));   // ,qw,,qw,aba
alert(nonCaptureSplit('qwqwaba', 'ba'));   // qwqwa,ba,
alert(nonCaptureSplit('baaqwqbawaba', 'ba')); // ,ba,aqwq,ba,wa,ba,

alert(nonCaptureSplit("L'architecture du système d'information devient", "ARCH"));
// L',arch,itecture du système d'information devient

alert(nonCaptureSplit("ARCHIMAG", "ARCH")); // ,ARCH,IMAG

对于具有大量匹配的大字符串,这可能有点低效,但仅限于不支持非捕获拆分的浏览器。测试结果在Safari和IE 6中匹配。请彻底测试,如果有问题请告诉我。

哦,这不是一般解决方案,但它应该适用于与OP类似的有限情况。

答案 1 :(得分:0)

如果您希望在String.prototype.split中解决已知的浏览器问题,那么您需要查看Steven Levithan's cross browser fixes并在2007年看到他的original post。这也用于他的XRegExp library。它也用在ES5 shim和其他人中。

/*!
 * Cross-Browser Split 1.1.1
 * Copyright 2007-2012 Steven Levithan <stevenlevithan.com>
 * Available under the MIT License
 * ECMAScript compliant, uniform cross-browser split method
 */

/**
 * Splits a string into an array of strings using a regex or string separator. Matches of the
 * separator are not included in the result array. However, if `separator` is a regex that contains
 * capturing groups, backreferences are spliced into the result each time `separator` is matched.
 * Fixes browser bugs compared to the native `String.prototype.split` and can be used reliably
 * cross-browser.
 * @param {String} str String to split.
 * @param {RegExp|String} separator Regex or string to use for separating the string.
 * @param {Number} [limit] Maximum number of items to include in the result array.
 * @returns {Array} Array of substrings.
 * @example
 *
 * // Basic use
 * split('a b c d', ' ');
 * // -> ['a', 'b', 'c', 'd']
 *
 * // With limit
 * split('a b c d', ' ', 2);
 * // -> ['a', 'b']
 *
 * // Backreferences in result array
 * split('..word1 word2..', /([a-z]+)(\d+)/i);
 * // -> ['..', 'word', '1', ' ', 'word', '2', '..']
 */
var split;

// Avoid running twice; that would break the `nativeSplit` reference
split = split || function (undef) {

    var nativeSplit = String.prototype.split,
        compliantExecNpcg = /()??/.exec("")[1] === undef, // NPCG: nonparticipating capturing group
        self;

    self = function (str, separator, limit) {
        // If `separator` is not a regex, use `nativeSplit`
        if (Object.prototype.toString.call(separator) !== "[object RegExp]") {
            return nativeSplit.call(str, separator, limit);
        }
        var output = [],
            flags = (separator.ignoreCase ? "i" : "") +
                    (separator.multiline  ? "m" : "") +
                    (separator.extended   ? "x" : "") + // Proposed for ES6
                    (separator.sticky     ? "y" : ""), // Firefox 3+
            lastLastIndex = 0,
            // Make `global` and avoid `lastIndex` issues by working with a copy
            separator = new RegExp(separator.source, flags + "g"),
            separator2, match, lastIndex, lastLength;
        str += ""; // Type-convert
        if (!compliantExecNpcg) {
            // Doesn't need flags gy, but they don't hurt
            separator2 = new RegExp("^" + separator.source + "$(?!\\s)", flags);
        }
        /* Values for `limit`, per the spec:
         * If undefined: 4294967295 // Math.pow(2, 32) - 1
         * If 0, Infinity, or NaN: 0
         * If positive number: limit = Math.floor(limit); if (limit > 4294967295) limit -= 4294967296;
         * If negative number: 4294967296 - Math.floor(Math.abs(limit))
         * If other: Type-convert, then use the above rules
         */
        limit = limit === undef ?
            -1 >>> 0 : // Math.pow(2, 32) - 1
            limit >>> 0; // ToUint32(limit)
        while (match = separator.exec(str)) {
            // `separator.lastIndex` is not reliable cross-browser
            lastIndex = match.index + match[0].length;
            if (lastIndex > lastLastIndex) {
                output.push(str.slice(lastLastIndex, match.index));
                // Fix browsers whose `exec` methods don't consistently return `undefined` for
                // nonparticipating capturing groups
                if (!compliantExecNpcg && match.length > 1) {
                    match[0].replace(separator2, function () {
                        for (var i = 1; i < arguments.length - 2; i++) {
                            if (arguments[i] === undef) {
                                match[i] = undef;
                            }
                        }
                    });
                }
                if (match.length > 1 && match.index < str.length) {
                    Array.prototype.push.apply(output, match.slice(1));
                }
                lastLength = match[0].length;
                lastLastIndex = lastIndex;
                if (output.length >= limit) {
                    break;
                }
            }
            if (separator.lastIndex === match.index) {
                separator.lastIndex++; // Avoid an infinite loop
            }
        }
        if (lastLastIndex === str.length) {
            if (lastLength || !separator.test("")) {
                output.push("");
            }
        } else {
            output.push(str.slice(lastLastIndex));
        }
        return output.length > limit ? output.slice(0, limit) : output;
    };

    // For convenience
    String.prototype.split = function (separator, limit) {
        return self(this, separator, limit);
    };

    return self;

}();

答案 2 :(得分:0)

这是最好的解决方案:

function ieSplit(str, separator) {
var match = str.match(RegExp(separator, 'g'));
var notmatch = str.replace(new RegExp(separator, 'g'), '[|]').split('[|]');
var merge = [];
for(i in notmatch) {
    merge.push(notmatch[i]);
    if (match != null && match[i] != undefined) {
        merge.push(match[i]);
    }
}
return merge;
}