正则表达式:匹配未被其他字符包围的单词,也不在HTML标记中

时间:2017-10-19 14:55:56

标签: javascript regex

我不知道我的代码有什么问题。出于某种原因,它只匹配以字母s开头的短语,当它应该与未被其他单词字符包围的特定单词匹配时,并且不在HTML标记中。这是我的代码:



<!DOCTYPE html>
<html>
	<!--
YOUR WINNER REGEX IS...
(?![A-Za-z0-9\-])(?![^<]*>)

Your word before this
-->
	<head>
		<title>Edititing Tool</title>
		<style>
			#content {
				width: 100%;
				height: 100%;
			}
		</style>
	</head>
	<body>
		<h1>EDITING TOOL</h1>
		<h3>Paste in text and it SHOULD highlight each word in red</h3>
		<div id="content" contenteditable onkeyup="contentchange()"></div>
		<script>
			var content = document.getElementById("content");//This is what you type into
			var words, newText;
			function contentchange() {
				//Set variables
				var contentText = content.innerText.split(/\s/g);//split what's inside the div by all whitespace (tabs, spaces, etc.)
				newText = contentText;//make a new variable for that
				words = {};
				for (var i = 0; i < newText.length; i++) {
					//make it all lowercase
					newText[i] = newText[i].toLowerCase();
					//Remove double-quotes
					newText[i] = newText[i].replace('"', "");
					//Remove other punctuation except for single quotes (for contractions) and dashes
					//The reason this regex is so long is because of google drive and unicode. it really just means /[^\w\s'’-]/g
					newText[i] = newText[i].replace(/(?:[\0-\x08\x0E-\x1F!-&\(-,\.\/:-@\[-\^`\{-\x9F\xA1-\u167F\u1681-\u1FFF\u200B-\u2018\u201A-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uD7FF\uE000-\uFEFE\uFF00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g,"");
				}
				//Remove all empty strings
				newText = without(newText, "");
				//Index everything; Make a variable holding how many times the word is used for every word
				for (var i = 0; i < newText.length; i++) {
					if (words[newText[i]]) {
						words[newText[i]]++;
					} else {
						words[newText[i]] = 1;
					}
				}
				var contentHTML = decodeHtml(content.innerHTML);//Take away all &nbsp; (because HTML does that)
				//Get how many total words you have
				var totalWords = 0;
				for (var i = 0; i < Object.keys(words).length; i++) {
					totalWords += words[Object.keys(words)[i]];
				}
				
				for (var i = 0; i < Object.keys(words).length; i++) {
					var currentword = Object.keys(words)[i];
					contentHTML = contentHTML.replace(new RegExp("(^|\s)("+currentword+")(?![A-Za-z\-])(?![^<]*>)", "ig"), function myFunction(x){return "<span style='color: red'>" + x + "</span>"; console.log("BOOBS LALALALA" + x);});
					console.log(currentword);
					console.log(words);
					console.log(i);
				}
				content.innerHTML = contentHTML;
			}
			//Copied from stackoverflow, removes a certain string
			function without(array, what) {
				return array.filter(function(element) { 
					return element !== what;
				});
			}
			//Change "September" to "[Ss][Ee][Pp][Tt][Ee][Mm][Bb][Ee][Rr]"
			function anycaseRegex(string) {
				var returnVal = "";
				for (var j = 0; j < string.length; j++) {
					returnVal += "[" + string.substring(j, j+1).toUpperCase() + string.substring(j, j+1).toLowerCase() + "]";
				}
				return returnVal;
			}
			//Because HTML does that
			function decodeHtml(html) {
				var txt = document.createElement("textarea");
				txt.innerHTML = html;
				return txt.value;
			}
			//PHP REGEX: (?<![A-Za-z0-9\-])[Hh][Ee][Ll][Ll][Oo](?![A-Za-z0-9\-])(?![^<]*>)
		</script>
	</body>
</html>
&#13;
&#13;
&#13;

以下是我使用的几个正则表达式:

  • /[Hh][Ee][Ll][Ll][Oo](?![A-Za-z\-])(?![^<]*>)/g - 匹配hello和ahello,当它只匹配hello

0 个答案:

没有答案