根据单词数组替换句子中所有出现的特定单词

时间:2017-09-14 12:43:53

标签: javascript jquery regex

我有一个像这样的数组

var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"];

所以我试图用一个函数或任何快速方法从句子中删除上述单词的出现。不使用任何循环如何快速实现。

他们现在就这样做了



var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"];
var sentence = "The first solution does not work for any UTF-8 alphaben. (It will cut text such as Привіт). I have managed to create function which do not use RegExp and use good UTF-8 support in JavaScript engine. The idea is simple if symbol is equal in uppercase and lowercase it is special character. The only exception is made for whitespace.";

$(excludeWords).each(function(index, item) {
  var s = new RegExp(item, "gi");
  sentence = sentence.replace(s, "");
});
alert(sentence);

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
&#13;
&#13;
&#13;

但是有没有比循环更好的解决方案?

根据评论更多细节..

永远不应该删除部分单词。它应该只替换一个完整的单词

6 个答案:

答案 0 :(得分:3)

你快到了。诀窍是将所有单词组合成一个大的正则表达式,只进行一次替换。 \\b确保您实际上替换整个单词而不仅仅是子串。

var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"];

var sentence = "The first solution does not work for any UTF-8 alphaben. (It will cut text such as Привіт). I have managed to create function which do not use RegExp and use good UTF-8 support in JavaScript engine. The idea is simple if symbol is equal in uppercase and lowercase it is special character. The only exception is made for whitespace.";

var re = new RegExp(`\\b(${excludeWords.join('|')})\\b`, 'gi');
sentence = sentence.replace(re, "");
console.log(sentence);

请注意,这最终会在字符串中创建连续的空格。可以使用replace(/\s+/g, ' ').trim()轻松删除这些内容。

答案 1 :(得分:1)

您可以添加单词边界\b以仅获取要替换的单词。

&#13;
&#13;
var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"],
    sentence = "The first solution does not work for any UTF-8 alphaben. (It will cut text such as Привіт). I have managed to create function which do not use RegExp and use good UTF-8 support in JavaScript engine. The idea is simple if symbol is equal in uppercase and lowercase it is special character. The only exception is made for whitespace.";

sentence  = excludeWords.reduce(function(r, s) {
    return r.replace(new RegExp('\\b' + s + '\\b', "gi"), "");
}, sentence);

console.log(sentence);
&#13;
&#13;
&#13;

答案 2 :(得分:1)

如果我们根据单词边界进行拆分,

会更好。

sentence = sentence.split(/\b/).reduce((str, word) => {
  return new Set(excludeWords).has(word)
    ? str + word.replace(/./g, '')
    : str + word;
}, '').replace(/\s\s+/,' ').trim();

答案 3 :(得分:0)

你在空间上分裂,只是检查单词是否在过滤器中的数组中

&#13;
&#13;
var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"];

var sentence = "The first solution does not work for any UTF-8 alphaben. (It will cut text such as Привіт). I have managed to create function which do not use RegExp and use good UTF-8 support in JavaScript engine. The idea is simple if symbol is equal in uppercase and lowercase it is special character. The only exception is made for whitespace.";

var res = sentence.split(" ").filter(w=>!excludeWords.includes(w.toUpperCase())).join(" ");

console.log(res)
&#13;
&#13;
&#13;

如果您只是使用正则表达式替换字符串,则会遇到一些问题,例如solution最终为luti,因为soon都是<style type="text/css"> h1 { font-size: 50px; font-weight: bold; display: inline-block; position: relative; } h1:before { content: ""; position: absolute; width: 50%; bottom: 0; border-bottom: 5px solid blue; } </style> <div> <h1>TITLE</h1> </div>在数组中,所以你需要比较完整的单词

答案 4 :(得分:0)

您可以创建一个数组值的字符串,然后在其上应用正则表达式并再次将其转换为数组。

var excludeWords = ["A", "ABOUT", "ABOVE", "ACROSS", "ALL", "ALONG", "AM", "AN", "AND", "ANY", "ASK", "AT", "AWAY", "CAN", "DID", "DIDN'T", "DO", "DON'T", "FOR", "FROM", "HAD", "HAS", "HER", "HIS", "IN", "INTO", "IS", "IT", "NONE", "NOT", "OF", "ON", "One", "OUT", "SO", "SOME", "THAT", "THE", "THEIR", "THERE", "THEY", "THESE", "THIS", "TO", "TWIT", "WAS", "WERE", "WEREN'T", "WHICH", "WILL", "WITH", "WHAT", "WHEN", "WHY"];

var array_to_string = excludeWords.join(' ');
var s = new RegExp(array_to_string, "gi");
var sentence = sentence.replace(s, "");
var excludewords_updated = sentence.split(' ');

所以这就是你如何在没有循环的情况下做到的。

答案 5 :(得分:-1)

您可以使用preg_replace_all("~[\"](.*)[\"]~isuU", $data, $found)