在JavaScript中以n大小的块拆分大字符串

时间:2011-08-11 22:22:36

标签: javascript regex string split

我想将一个非常大的字符串(比方说,10,000个字符)拆分成N个大小的块。

在性能方面,最好的方法是什么?

例如: {2}除以"1234567890"将变为["12", "34", "56", "78", "90"]

使用String.prototype.match是否可以做到这一点?如果是这样的话,这会是在性能方面做到这一点的最佳方法吗?

23 个答案:

答案 0 :(得分:366)

您可以这样做:

"1234567890".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "90"]

该方法仍然适用于大小不是块大小的精确倍数的字符串:

"123456789".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "9"]

通常,对于您想要提取最多 n 大小的字符串的任何字符串,您可以这样做:

str.match(/.{1,n}/g); // Replace n with the size of the substring

如果你的字符串可以包含换行符或回车符,你可以这样做:

str.match(/(.|[\r\n]){1,n}/g); // Replace n with the size of the substring

就性能而言,我尝试了大约10k个字符,并且在Chrome上花了一秒多一点。 YMMV。

这也可用于可重复使用的功能:

function chunkString(str, length) {
  return str.match(new RegExp('.{1,' + length + '}', 'g'));
}

答案 1 :(得分:27)

底线:

  • match效率非常低,slice更好,在Firefox上substr / substring更好
  • match对于短字符串来说效率更低(即使使用缓存的正则表达式 - 可能是因为正则表达式解析设置时间)
  • match对于大块大小来说效率更低(可能是由于无法“跳转”)
  • 对于块长度非常小的较长字符串,match在旧IE上优于slice但在所有其他系统上仍然丢失
  • jsperf岩石

答案 2 :(得分:25)

我创建了几个更快的变体,你可以see on jsPerf。我最喜欢的是:

function chunkSubstr(str, size) {
  const numChunks = Math.ceil(str.length / size)
  const chunks = new Array(numChunks)

  for (let i = 0, o = 0; i < numChunks; ++i, o += size) {
    chunks[i] = str.substr(o, size)
  }

  return chunks
}

答案 3 :(得分:13)

这是最快,最高效的解决方案:

function chunkString(str, len) {
  var _size = Math.ceil(str.length/len),
      _ret  = new Array(_size),
      _offset
  ;

  for (var _i=0; _i<_size; _i++) {
    _offset = _i * len;
    _ret[_i] = str.substring(_offset, _offset + len);
  }

  return _ret;
}

Compare it to the others;我赢了:)。

答案 4 :(得分:9)

惊奇!您可以使用split进行拆分。

var parts = "1234567890 ".split(/(.{2})/).filter(O=>O)

结果[ '12', '34', '56', '78', '90', ' ' ]

答案 5 :(得分:4)

var str = "123456789";
var chunks = [];
var chunkSize = 2;

while (str) {
    if (str.length < chunkSize) {
        chunks.push(str);
        break;
    }
    else {
        chunks.push(str.substr(0, chunkSize));
        str = str.substr(chunkSize);
    }
}

alert(chunks); // chunks == 12,34,56,78,9

答案 6 :(得分:3)

我编写了一个扩展函数,所以块长度也可以是一个数组,如[1,3]

String.prototype.chunkString = function(len) {
    var _ret;
    if (this.length < 1) {
        return [];
    }
    if (typeof len === 'number' && len > 0) {
        var _size = Math.ceil(this.length / len), _offset = 0;
        _ret = new Array(_size);
        for (var _i = 0; _i < _size; _i++) {
            _ret[_i] = this.substring(_offset, _offset = _offset + len);
        }
    }
    else if (typeof len === 'object' && len.length) {
        var n = 0, l = this.length, chunk, that = this;
        _ret = [];
        do {
            len.forEach(function(o) {
                chunk = that.substring(n, n + o);
                if (chunk !== '') {
                    _ret.push(chunk);
                    n += chunk.length;
                }
            });
            if (n === 0) {
                return undefined; // prevent an endless loop when len = [0]
            }
        } while (n < l);
    }
    return _ret;
};

代码

"1234567890123".chunkString([1,3])

将返回:

[ '1', '234', '5', '678', '9', '012', '3' ]

答案 7 :(得分:2)

将大字符串拆分为给定的小字符串。

function chunkSubstr(str, words) {
  var parts = str.split(" ") , values = [] , i = 0 , tmpVar = "";
  $.each(parts, function(index, value) {
      if(tmpVar.length < words){
          tmpVar += " " + value;
      }else{
          values[i] = tmpVar.replace(/\s+/g, " ");
          i++;
          tmpVar = value;
      }
  });
  if(values.length < 1 &&  parts.length > 0){
      values[0] = tmpVar;
  }
  return values;
}

答案 8 :(得分:1)

我会使用正则表达式...

var chunkStr = function(str, chunkLength) {
    return str.match(new RegExp('[\\s\\S]{1,' + +chunkLength + '}', 'g'));
}

答案 9 :(得分:1)

var l = str.length, lc = 0, chunks = [], c = 0, chunkSize = 2;
for (; lc < l; c++) {
  chunks[c] = str.slice(lc, lc += chunkSize);
}

答案 10 :(得分:1)

您可以在不使用任何正则表达式的情况下使用reduce()

(str, n) => {
  return str.split('').reduce(
    (acc, rec, index) => {
      return ((index % n) || !(index)) ? acc.concat(rec) : acc.concat(',', rec)
    },
    ''
  ).split(',')
}

答案 11 :(得分:1)

您绝对可以做类似的事情

let pieces = "1234567890 ".split(/(.{2})/).filter(x => x.length == 2);

得到这个:

[ '12', '34', '56', '78', '90' ]

如果要动态输入/调整块大小以使块大小为n,则可以执行以下操作:

n = 2;
let pieces = "1234567890 ".split(new RegExp("(.{"+n.toString()+"})")).filter(x => x.length == n);

要查找原始字符串中所有可能的大小为n的块,请尝试以下操作:

let subs = new Set();
let n = 2;
let str = "1234567890 ";
let regex = new RegExp("(.{"+n.toString()+"})");     //set up regex expression dynamically encoded with n

for (let i = 0; i < n; i++){               //starting from all possible offsets from position 0 in the string
    let pieces = str.split(regex).filter(x => x.length == n);    //divide the string into chunks of size n...
    for (let p of pieces)                 //...and add the chunks to the set
        subs.add(p);
    str = str.substr(1);    //shift the string reading frame
}

您应该最终得到:

[ '12', '23', '34', '45', '56', '67', '78', '89', '90', '0 ' ]

答案 12 :(得分:0)

以原型函数的形式:

String.prototype.lsplit = function(){
    return this.match(new RegExp('.{1,'+ ((arguments.length==1)?(isFinite(String(arguments[0]).trim())?arguments[0]:false):1) +'}', 'g'));
}

答案 13 :(得分:0)

以下是我正在使用的代码,它使用String.prototype.slice

是的,答案很长,因为它试图尽可能接近当前标准,当然包含合理数量的JSDOC条评论。但是,一旦缩小,代码只有828个字节,一旦gzip传输它只有497个字节。

这会将String.prototype添加到/*jslint maxlen:80, browser:true, devel:true */ /* * Properties used by toChunks. */ /*property MAX_SAFE_INTEGER, abs, ceil, configurable, defineProperty, enumerable, floor, length, max, min, pow, prototype, slice, toChunks, value, writable */ /* * Properties used in the testing of toChunks implimentation. */ /*property appendChild, createTextNode, floor, fromCharCode, getElementById, length, log, pow, push, random, toChunks */ (function () { 'use strict'; var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1; /** * Defines a new property directly on an object, or modifies an existing * property on an object, and returns the object. * * @private * @function * @param {Object} object * @param {string} property * @param {Object} descriptor * @return {Object} * @see https://goo.gl/CZnEqg */ function $defineProperty(object, property, descriptor) { if (Object.defineProperty) { Object.defineProperty(object, property, descriptor); } else { object[property] = descriptor.value; } return object; } /** * Returns true if the operands are strictly equal with no type conversion. * * @private * @function * @param {*} a * @param {*} b * @return {boolean} * @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4 */ function $strictEqual(a, b) { return a === b; } /** * Returns true if the operand inputArg is undefined. * * @private * @function * @param {*} inputArg * @return {boolean} */ function $isUndefined(inputArg) { return $strictEqual(typeof inputArg, 'undefined'); } /** * The abstract operation throws an error if its argument is a value that * cannot be converted to an Object, otherwise returns the argument. * * @private * @function * @param {*} inputArg The object to be tested. * @throws {TypeError} If inputArg is null or undefined. * @return {*} The inputArg if coercible. * @see https://goo.gl/5GcmVq */ function $requireObjectCoercible(inputArg) { var errStr; if (inputArg === null || $isUndefined(inputArg)) { errStr = 'Cannot convert argument to object: ' + inputArg; throw new TypeError(errStr); } return inputArg; } /** * The abstract operation converts its argument to a value of type string * * @private * @function * @param {*} inputArg * @return {string} * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring */ function $toString(inputArg) { var type, val; if (inputArg === null) { val = 'null'; } else { type = typeof inputArg; if (type === 'string') { val = inputArg; } else if (type === 'undefined') { val = type; } else { if (type === 'symbol') { throw new TypeError('Cannot convert symbol to string'); } val = String(inputArg); } } return val; } /** * Returns a string only if the arguments is coercible otherwise throws an * error. * * @private * @function * @param {*} inputArg * @throws {TypeError} If inputArg is null or undefined. * @return {string} */ function $onlyCoercibleToString(inputArg) { return $toString($requireObjectCoercible(inputArg)); } /** * The function evaluates the passed value and converts it to an integer. * * @private * @function * @param {*} inputArg The object to be converted to an integer. * @return {number} If the target value is NaN, null or undefined, 0 is * returned. If the target value is false, 0 is returned * and if true, 1 is returned. * @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4 */ function $toInteger(inputArg) { var number = +inputArg, val = 0; if ($strictEqual(number, number)) { if (!number || number === Infinity || number === -Infinity) { val = number; } else { val = (number > 0 || -1) * Math.floor(Math.abs(number)); } } return val; } /** * The abstract operation ToLength converts its argument to an integer * suitable for use as the length of an array-like object. * * @private * @function * @param {*} inputArg The object to be converted to a length. * @return {number} If len <= +0 then +0 else if len is +INFINITY then * 2^53-1 else min(len, 2^53-1). * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength */ function $toLength(inputArg) { return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER); } if (!String.prototype.toChunks) { /** * This method chunks a string into an array of strings of a specified * chunk size. * * @function * @this {string} The string to be chunked. * @param {Number} chunkSize The size of the chunks that the string will * be chunked into. * @returns {Array} Returns an array of the chunked string. */ $defineProperty(String.prototype, 'toChunks', { enumerable: false, configurable: true, writable: true, value: function (chunkSize) { var str = $onlyCoercibleToString(this), chunkLength = $toInteger(chunkSize), chunked = [], numChunks, length, index, start, end; if (chunkLength < 1) { return chunked; } length = $toLength(str.length); numChunks = Math.ceil(length / chunkLength); index = 0; start = 0; end = chunkLength; chunked.length = numChunks; while (index < numChunks) { chunked[index] = str.slice(start, end); start = end; end += chunkLength; index += 1; } return chunked; } }); } }()); /* * Some tests */ (function () { 'use strict'; var pre = document.getElementById('out'), chunkSizes = [], maxChunkSize = 512, testString = '', maxTestString = 100000, chunkSize = 0, index = 1; while (chunkSize < maxChunkSize) { chunkSize = Math.pow(2, index); chunkSizes.push(chunkSize); index += 1; } index = 0; while (index < maxTestString) { testString += String.fromCharCode(Math.floor(Math.random() * 95) + 32); index += 1; } function log(result) { pre.appendChild(document.createTextNode(result + '\n')); } function test() { var strLength = testString.length, czLength = chunkSizes.length, czIndex = 0, czValue, result, numChunks, pass; while (czIndex < czLength) { czValue = chunkSizes[czIndex]; numChunks = Math.ceil(strLength / czValue); result = testString.toChunks(czValue); czIndex += 1; log('chunksize: ' + czValue); log(' Number of chunks:'); log(' Calculated: ' + numChunks); log(' Actual:' + result.length); pass = result.length === numChunks; log(' First chunk size: ' + result[0].length); pass = pass && result[0].length === czValue; log(' Passed: ' + pass); log(''); } } test(); log(''); log('Simple test result'); log('abcdefghijklmnopqrstuvwxyz'.toChunks(3)); }());(使用可用的Object.defineProperty)的方法是:

  1. toChunks
  2. 已包含许多测试以检查功能。

    担心代码长度会影响性能吗?无需担心,http://jsperf.com/chunk-string/3

    大部分额外代码都是为了确保代码在多个JavaScript环境中响应相同。

    <pre id="out"></pre>
    fmt:formatDate

答案 14 :(得分:0)

    window.format = function(b, a) {
        if (!b || isNaN(+a)) return a;
        var a = b.charAt(0) == "-" ? -a : +a,
            j = a < 0 ? a = -a : 0,
            e = b.match(/[^\d\-\+#]/g),
            h = e && e[e.length - 1] || ".",
            e = e && e[1] && e[0] || ",",
            b = b.split(h),
            a = a.toFixed(b[1] && b[1].length),
            a = +a + "",
            d = b[1] && b[1].lastIndexOf("0"),
            c = a.split(".");
        if (!c[1] || c[1] && c[1].length <= d) a = (+a).toFixed(d + 1);
        d = b[0].split(e);
        b[0] = d.join("");
        var f = b[0] && b[0].indexOf("0");
        if (f > -1)
            for (; c[0].length < b[0].length - f;) c[0] = "0" + c[0];
        else +c[0] == 0 && (c[0] = "");
        a = a.split(".");
        a[0] = c[0];
        if (c = d[1] && d[d.length -
                1].length) {
            for (var d = a[0], f = "", k = d.length % c, g = 0, i = d.length; g < i; g++) f += d.charAt(g), !((g - k + 1) % c) && g < i - c && (f += e);
            a[0] = f
        }
        a[1] = b[1] && a[1] ? h + a[1] : "";
        return (j ? "-" : "") + a[0] + a[1]
    };

var str="1234567890";
var formatstr=format( "##,###.", str);
alert(formatstr);


This will split the string in reverse order with comma separated after 3 char's. If you want you can change the position.

答案 15 :(得分:0)

使用slice()方法:

function returnChunksArray(str, chunkSize) {
  var arr = [];
  while(str !== '') {
    arr.push(str.slice(0, chunkSize));
    str = str.slice(chunkSize);
  }
  return arr;
}

使用substring()方法也可以这样做。

function returnChunksArray(str, chunkSize) {
  var arr = [];
  while(str !== '') {
    arr.push(str.substring(0, chunkSize));
    str = str.substring(chunkSize);
  }
  return arr;
}

答案 16 :(得分:0)

这段小代码怎么样:

function splitME(str, size) {
    let subStr = new RegExp('.{1,' + size + '}', 'g');
    return str.match(subStr);
};

答案 17 :(得分:0)

const getChunksFromString = (str, chunkSize) => {
    var regexChunk = new RegExp(`.{1,${chunkSize}}`, 'g')   // '.' represents any character
    return str.match(regexChunk)
}

根据需要调用

console.log(getChunksFromString("Hello world", 3))   // ["Hel", "lo ", "wor", "ld"]

答案 18 :(得分:0)

上述解决方案的我的问题是,不管句子中的位置如何,都将字符串分成正式大小的块。

我认为以下更好的方法;尽管它需要一些性能调整:

 static chunkString(str, length, size,delimiter='\n' ) {
        const result = [];
        for (let i = 0; i < str.length; i++) {
            const lastIndex = _.lastIndexOf(str, delimiter,size + i);
            result.push(str.substr(i, lastIndex - i));
            i = lastIndex;
        }
        return result;
    }

答案 19 :(得分:0)

这是经过一些试验后我想到的用于模板字符串的解决方案:

用法:

chunkString(5)`testing123`

function chunkString(nSize) {
    return (strToChunk) => {
        let result = [];
        let chars = String(strToChunk).split('');

        for(let i = 0; i < (String(strToChunk).length / nSize); i++) {
            result = result.concat(chars.slice(i*nSize,(i+1)*nSize).join(''));
        }
        return result
    }
}

document.write(chunkString(5)`testing123`);
// returns: testi,ng123

document.write(chunkString(3)`testing123`);
// returns: tes,tin,g12,3

答案 20 :(得分:0)

包括具有预分配功能的左右版本。 这与RegExp对小块的暗示速度一样快,但是它随着块大小的增长而变快。而且可以提高记忆力。

function chunkLeft (str, size = 3) {
  if (typeof str === 'string') {
    const length = str.length
    const chunks = Array(Math.ceil(length / size))
    for (let i = 0, index = 0; index < length; i++) {
      chunks[i] = str.slice(index, index += size)
    }
    return chunks
  }
}

function chunkRight (str, size = 3) {
  if (typeof str === 'string') {
    const length = str.length
    const chunks = Array(Math.ceil(length / size))
    if (length) {
      chunks[0] = str.slice(0, length % size || size)
      for (let i = 1, index = chunks[0].length; index < length; i++) {
        chunks[i] = str.slice(index, index += size)
      }
    }
    return chunks
  }
}

console.log(chunkRight())  // undefined
console.log(chunkRight(''))  // []
console.log(chunkRight('1'))  // ["1"]
console.log(chunkRight('123'))  // ["123"]
console.log(chunkRight('1234'))  // ["1", "234"]
console.log(chunkRight('12345'))  // ["12", "345"]
console.log(chunkRight('123456'))  // ["123", "456"]
console.log(chunkRight('1234567'))  // ["1", "234", "567"]

答案 21 :(得分:0)

使用这个 npm 库“chkchars” 但请记住确保给定字符串的长度被“数字”参数完全分割。

const phrase = "1110010111010011100101110100010000011100101110100111001011101001011101001110010111010001000001110010111010011100101110100"
const number = 7

chkchars.splitToChunks(phrase, number)

// result => ['1110010', '1110100','1110010', '1110100','0100000', '1110010','1110100', '1110010','1110100', '1011101','0011100', '1011101','0001000','0011100','1011101', '0011100','1011101']

// perf => 0.287ms

答案 22 :(得分:-1)

columns: [
  {data: 'job'},
  {data: 'persons.name'}
]