如果此类型字符'這'
= NonEnglish
每个占用2个字空间,则英语将占用1个字空间,最大长度限制为10个字空间;如何获得前10个空格
以下示例如何获得结果This這 is
?
我正试图从第一个单词开始使用for循环,但我不知道如何在字符串中获取每个单词...
string = "This這 is是 English中文 …";
var NonEnglish = "[^\u0000-\u0080]+",
Pattern = new RegExp(NonEnglish),
MaxLength = 10,
Ratio = 2;
答案 0 :(得分:8)
如果你的意思是想要获得长度达到10的字符串中的那部分,这就是答案:
var string = "This這 is是 English中文 …";
function check(string){
// Length of A-Za-z characters is 1, and other characters which OP wants is 2
var length = i = 0, len = string.length;
// you can iterate over strings just as like arrays
for(;i < len; i++){
// if the character is what the OP wants, add 2, else 1
length += /\u0000-\u0080/.test(string[i]) ? 2 : 1;
// if length is >= 10, come out of loop
if(length >= 10) break;
}
// return string from the first letter till the index where we aborted the for loop
return string.substr(0, i);
}
alert(check(string));
编辑1:
.match
替换.test
。前者返回一个完整的数组,而后者只返回true或false。^
和+
。len
替换string.length
。 Here's why。答案 1 :(得分:0)
我建议采用以下几行(假设您正在尝试将字符串分解为长度<= 10个字节的片段):
string = "This這 is是 English中文 …";
function byteCount(text) {
//get the number of bytes consumed by a string
return encodeURI(text).split(/%..|./).length - 1;
}
function tokenize(text, targetLen) {
//break a string up into snippets that are <= to our target length
var result = [];
var pos = 0;
var current = "";
while (pos < text.length) {
var next = current + text.charAt(pos);
if (byteCount(next) > targetLen) {
result.push(current);
current = "";
pos--;
}
else if (byteCount(next) == targetLen) {
result.push(next);
current = "";
}
else {
current = next;
}
pos++;
}
if (current != "") {
result.push(current);
}
return result;
};
console.log(tokenize(string, 10));