Question

如果此类型字符'這' = NonEnglish每个占用2个字空间，则英语将占用1个字空间，最大长度限制为10个字空间;如何获得前10个空格以下示例如何获得结果This這 is？
我正试图从第一个单词开始使用for循环，但我不知道如何在字符串中获取每个单词...

string = "This這 is是 English中文 …";

var NonEnglish = "[^\u0000-\u0080]+",
    Pattern = new RegExp(NonEnglish),
    MaxLength = 10,
    Ratio = 2;

Answer 1

如果你的意思是想要获得长度达到10的字符串中的那部分，这就是答案：

var string = "This這 is是 English中文 …";

function check(string){
  // Length of A-Za-z characters is 1, and other characters which OP wants is 2
  var length = i = 0, len = string.length; 

  // you can iterate over strings just as like arrays
  for(;i < len; i++){

    // if the character is what the OP wants, add 2, else 1
    length += /\u0000-\u0080/.test(string[i]) ? 2 : 1;

    // if length is >= 10, come out of loop
    if(length >= 10) break;
  }

  // return string from the first letter till the index where we aborted the for loop
  return string.substr(0, i);
}

alert(check(string));

Live Demo

编辑1：

用.match替换.test。前者返回一个完整的数组，而后者只返回true或false。
改进了RegEx。由于我们只检查一个字符，因此不需要之前的^和+。
用len替换string.length。 Here's why。

Answer 2

我建议采用以下几行（假设您正在尝试将字符串分解为长度<= 10个字节的片段）：

string = "This這 is是 English中文 …";

function byteCount(text) {
    //get the number of bytes consumed by a string
    return encodeURI(text).split(/%..|./).length - 1;
}

function tokenize(text, targetLen) {
    //break a string up into snippets that are <= to our target length
    var result = [];

    var pos = 0;
    var current = "";
    while (pos < text.length) {
        var next = current + text.charAt(pos);

        if (byteCount(next) > targetLen) {
            result.push(current);
            current = "";
            pos--;
        }
        else if (byteCount(next) == targetLen) {
            result.push(next);
            current = "";
        }
        else {
            current = next;
        }

        pos++;
    }
    if (current != "") {
       result.push(current);
    }

    return result;
};

console.log(tokenize(string, 10));

http://jsfiddle.net/5pc6L/

for循环字符串每个单词

2 个答案:

Live Demo