我不知道我的代码有什么问题。出于某种原因,它只匹配以字母s开头的短语,当它应该与未被其他单词字符包围的特定单词匹配时,并且不在HTML标记中。这是我的代码:
<!DOCTYPE html>
<html>
<!--
YOUR WINNER REGEX IS...
(?![A-Za-z0-9\-])(?![^<]*>)
Your word before this
-->
<head>
<title>Edititing Tool</title>
<style>
#content {
width: 100%;
height: 100%;
}
</style>
</head>
<body>
<h1>EDITING TOOL</h1>
<h3>Paste in text and it SHOULD highlight each word in red</h3>
<div id="content" contenteditable onkeyup="contentchange()"></div>
<script>
var content = document.getElementById("content");//This is what you type into
var words, newText;
function contentchange() {
//Set variables
var contentText = content.innerText.split(/\s/g);//split what's inside the div by all whitespace (tabs, spaces, etc.)
newText = contentText;//make a new variable for that
words = {};
for (var i = 0; i < newText.length; i++) {
//make it all lowercase
newText[i] = newText[i].toLowerCase();
//Remove double-quotes
newText[i] = newText[i].replace('"', "");
//Remove other punctuation except for single quotes (for contractions) and dashes
//The reason this regex is so long is because of google drive and unicode. it really just means /[^\w\s'’-]/g
newText[i] = newText[i].replace(/(?:[\0-\x08\x0E-\x1F!-&\(-,\.\/:-@\[-\^`\{-\x9F\xA1-\u167F\u1681-\u1FFF\u200B-\u2018\u201A-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uD7FF\uE000-\uFEFE\uFF00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g,"");
}
//Remove all empty strings
newText = without(newText, "");
//Index everything; Make a variable holding how many times the word is used for every word
for (var i = 0; i < newText.length; i++) {
if (words[newText[i]]) {
words[newText[i]]++;
} else {
words[newText[i]] = 1;
}
}
var contentHTML = decodeHtml(content.innerHTML);//Take away all (because HTML does that)
//Get how many total words you have
var totalWords = 0;
for (var i = 0; i < Object.keys(words).length; i++) {
totalWords += words[Object.keys(words)[i]];
}
for (var i = 0; i < Object.keys(words).length; i++) {
var currentword = Object.keys(words)[i];
contentHTML = contentHTML.replace(new RegExp("(^|\s)("+currentword+")(?![A-Za-z\-])(?![^<]*>)", "ig"), function myFunction(x){return "<span style='color: red'>" + x + "</span>"; console.log("BOOBS LALALALA" + x);});
console.log(currentword);
console.log(words);
console.log(i);
}
content.innerHTML = contentHTML;
}
//Copied from stackoverflow, removes a certain string
function without(array, what) {
return array.filter(function(element) {
return element !== what;
});
}
//Change "September" to "[Ss][Ee][Pp][Tt][Ee][Mm][Bb][Ee][Rr]"
function anycaseRegex(string) {
var returnVal = "";
for (var j = 0; j < string.length; j++) {
returnVal += "[" + string.substring(j, j+1).toUpperCase() + string.substring(j, j+1).toLowerCase() + "]";
}
return returnVal;
}
//Because HTML does that
function decodeHtml(html) {
var txt = document.createElement("textarea");
txt.innerHTML = html;
return txt.value;
}
//PHP REGEX: (?<![A-Za-z0-9\-])[Hh][Ee][Ll][Ll][Oo](?![A-Za-z0-9\-])(?![^<]*>)
</script>
</body>
</html>
&#13;
以下是我使用的几个正则表达式:
/[Hh][Ee][Ll][Ll][Oo](?![A-Za-z\-])(?![^<]*>)/g
- 匹配hello和ahello,当它只匹配hello