不要计算字符串中的br和nbsp

时间:2018-05-22 07:36:52

标签: javascript

我有一个由“br”和“nbsp;”组成的字符串标签,我需要的是,我需要将字符限制为100,这意味着只应显示100个字符,因为每个“br”需要4个字符而不是100我得到108,要获得低于输出我可以在单行

data.substr(0,100) 

output =>

  

制作一本样本书。

它不仅存在了五个世纪,而且还存在了飞跃

但它包含br标签我不想删除br和nbsp;但不要指望

预期输出=>

  

制作一本样本书。

它不仅存在了五个世纪,而且还存在于跳跃中。

我已经完成了一些代码片段,但它的计数为108

var data = `it to make a type specimen book. <br><br>It has survived not only five centuries, but also the leap into electronic typesetting, <br><br>remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages<br><br>, and more recently with desktop publishing software like Aldus PageMaker including&nbsp; versions of Lorem Ipsum.`
// removes nbsp
var docDesc = data.replace(/[&]nbsp[;]/gi," "); 
// removes br
var stringData = docDesc.replace(/[<]br[^>]*[>]/gi,""); 
var subData =  stringData.substr(0,100)
function test(subData) {
    
	var n = subData.split(" ");
    
	return n.slice(Math.max(n.length - 5, 1))


}
var lastData = test(subData);
var lastString = lastData.join(" ")
var finalData = data.substring(0,data.indexOf(lastString)) + lastString

console.log(finalData)
console.log(finalData.length)

2 个答案:

答案 0 :(得分:1)

在它最简单的形式中,您可以编写一个像子字符串一样的函数,但不包括某些单词&#39;,就像这样:

function substringWithExcludes(str, excludes, length) {
    let idx = 0;
    let len = 0;

    while(idx < str.length && len < length){
        let match = false;

        for(let exclude of excludes) {
            if(str.startsWith(exclude, idx)) {
                idx += exclude.length;
                match = true;
                break;
            }
        }

        if(!match) {
            len++;
            idx++;
        }
    }

    return str.substring(0, idx);
}

这叫做:

const data = `it to make a type specimen book. <br>\r\n<br>\r\nIt has survived not only five centuries, but also the leap into electronic typesetting, <br>\r\n<br>\r\nremaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages<br>\r\n<br>\r\n, and more recently with desktop publishing software like Aldus PageMaker including&nbsp; versions of Lorem Ipsum.`;

const result = substringWithExcludes(data, ["\r", "\n", "&nbsp;", "<br>"], 100);

len会在没有<br>的情况下跟踪字符串的长度以及所有这些,而idx包含这些匹配。我们需要为每个排除做的是,首先查看它是否匹配,以及它是否确实将长度添加到idx。如果它不匹配需要包含的有效字符(增加lenidx)。

对于大型length和许多excludes,可能会很慢,但它可以完成工作。您可以为不区分大小写添加特定案例,并在必要时进行<br />匹配。可以在需要时使用正则表达式匹配来交换startsWith

答案 1 :(得分:0)

@DILEEP ,请查看以下代码。

  

如果您在理解代码时遇到任何问题,也请发表评论,我会尽力回答您。

/*
	Function takes a string (data)
	Returns string with first 100 characters from index 0 to 100 (default)
	Returns string based on explicity passed values of start and end
*/
function get100charsNoCountBrsAndSpaces(data, start=0, end=100) {
	var arr = stringToArrWithNoBrsAndSpaces(data)
	let arrSpaces = arr.map((item) => {
		return item.join(' ')
	})

	let strBrsSpaces = arrSpaces.join(' '); // "sdd fhhf fhhhf fhhf"
	var finalStr;
	var spacesCount = 0;

    // 
	do {
		finalStr = strBrsSpaces.slice(start, end + spacesCount)
		
		spacesCount = finalStr.match(/\s/gi).length
	} 
	while(finalStr.slice(start, end + spacesCount).split(' ').join('').length < 100);
	
    return finalStr.slice(start, end + spacesCount)
}

/*
	Function that removes <br> and spaces from string (data) 
	and returns a 2d array (it helps us in recontruction of original string)
*/
function stringToArrWithNoBrsAndSpaces(data)  {
	var arrNoBrs = data.split('<br>')
	// console.log(JSON.stringify(arrNoBrs, null, 4))

	let arrNoBrsSpaces = arrNoBrs.map((item) => {
		let a = []; //let: local scope of a
		a = item.split(' ')
		return a;
	})

	// console.log(JSON.stringify(arrNoBrsSpaces, null, 4))
	return arrNoBrsSpaces
}

/*
	Function which reconstructs the string from the 2 array
	Adds spaces and <br> at proper places
*/
function arrWithNoBrsAndSpacesToString(array)  {
	let arrSpaces = array.map((item) => {
		return item.join(' ')
	})
	console.log(arrSpaces)
	// console.log(arrSpaces)
	let strBrsSpaces = arrSpaces.join('<br>')
	return strBrsSpaces
}

// ********* Testing: stringToArrsWithNoBrsAndSpaces()
var inputStr = `it to make a type specimen book. <br><br>It has survived not only five centuries, but also the leap into electronic typesetting, <br><br>remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages<br><br>, and more recently with desktop publishing software like Aldus PageMaker including&nbsp; versions of Lorem Ipsum.`
var arr = stringToArrWithNoBrsAndSpaces(inputStr)
console.log(arr)

console.log('\n')

// ********* Testing: arrWithNoBrsAndSpacesToString()
var str = arrWithNoBrsAndSpacesToString(arr)
console.log(str)

// ********* Testing: get100charsNoCountBrsAndSpaces(inputStr)
var finalData = get100charsNoCountBrsAndSpaces(inputStr)
console.log('finalData:', finalData)
console.log('Length:', finalData.length) // 122 (100 char + 22 spaces), see below line
console.log('Number of spaces:', finalData.match(/\s/ig).length)
console.log('Number of chars :', finalData.split(' ').join('').length) // 100

/* ...** Output: stringToArrsWithNoBrsAndSpaces(inputStr) **...

[
    [
        "it",
        "to",
        "make",
        "a",
        "type",
        "specimen",
        "book.",
        ""
    ],
    [
        ""
    ],
    [
        "It",
        "has",
        "survived",
        "not",
        "only",
        "five",
        "centuries,",
        "but",
        "also",
        "the",
        "leap",
        "into",
        "electronic",
        "typesetting,",
        ""
    ],
    [
        ""
    ],
    [
        "remaining",
        "essentially",
        "unchanged.",
        "It",
        "was",
        "popularised",
        "in",
        "the",
        "1960s",
        "with",
        "the",
        "release",
        "of",
        "Letraset",
        "sheets",
        "containing",
        "Lorem",
        "Ipsum",
        "passages"
    ],
    [
        ""
    ],
    [
        ",",
        "and",
        "more",
        "recently",
        "with",
        "desktop",
        "publishing",
        "software",
        "like",
        "Aldus",
        "PageMaker",
        "including&nbsp;",
        "versions",
        "of",
        "Lorem",
        "Ipsum."
    ]
]
*/


/* ...** Output: arrWithNoBrsAndSpacesToString(arr) **...

it to make a type specimen book. <br><br>It has survived not only five centuries, but also the leap into electronic typesetting, <br><br>remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages<br><br>, and more recently with desktop publishing software like Aldus PageMaker including&nbsp; versions of Lorem Ipsum.

*/


/* ...** Output: get100charsNoCountBrsAndSpaces(inputStr) **...

it to make a type specimen book. <br><br>It has survived not only five centuries, but also the leap into electronic typesetting, <br><br>remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages<br><br>, and more recently with desktop publishing software like Aldus PageMaker including&nbsp; versions of Lorem Ipsum.
finalData: it to make a type specimen book.   It has survived not only five centuries, but also the leap into electronic typesetting,
Length: 122
Number of spaces: 22
Number of chars : 100

*/

感谢。