每个起始字母只匹配一个单词

时间:2016-04-04 15:33:15

标签: javascript regex

您可以在此处查看我希望如何过滤字词:

string = 'Take all first words for each letter... this is a test';
first_letters = {};

words = string.match(/\w+/g);
words.forEach(function(x){
    var first = x[0].toLowerCase();
    first_letters[first] = (first_letters[first] || x);
});

result = [];
for (var x in first_letters) {
    result.push(first_letters[x]);
}

O.innerHTML = result;
<pre id=O>

我正在尝试使用正则表达式获取上面显示的数组result,我开始尝试首先获取首字母:

result = words.match(/\b(?!\1)(\w)/gi);

(?!\1)并没有像我想象的那样过滤掉这些字母。您可以看到here

欢迎任何帮助。感谢

3 个答案:

答案 0 :(得分:1)

This regexp will find unique first letters only:

r = /\b(\w)(?!.*\b\1)/gi;

This regexp says:

Find the first letter after a word break, but only if that same letter does not occur immediately after a word break later in the string (negative lookahead).

string = 'Take all first words for each letter... this is a test';

> string.match(r)
< ["w", "f", "e", "l", "i", "a", "t"]

To find one word per unique first letter:

r = /\b((\w)\w*)(?!.*\b\2)/gi

> string.match(r)
< ["words", "for", "each", "letter", "is", "a", "test"]

As @karthik manchala mentioned in a comment, due to the limitations of regexp, this will only find the last word starting with each letter. To find the first word starting with each letter, you'd have to reverse the words in the string:

> string . split(' ') . reverse() . join(' ') . match(r) . reverse()
< ["Take", "all", "first", "words", "each", "letter", "is"]

Your initial regexp

/\b(?!\1)(\w)/gi

was not functioning properly since the \1 must refer to an earlier capturing group.

答案 1 :(得分:0)

我做到了!

var string = 'Take all first words for each letter... this is a test';

Array.prototype.contains = function(obj) {
    var i = this.length;
    while (i--) {
        if (this[i] == obj) {
            return true;
        }
    }
    return false;
}

function removeSpecials(string) {
  return string.replace(/[&\/\\#,+()$~%.'":*?<>{}]/g,'');
}

function toLowerCase(string) {
  return string.toLowerCase();
}

function getWords(string) {
  return string.split(' ');  
}

function getChar(array) {
  return array.reduce(function(chars, item) {
    if (!chars.contains(item[0])) { chars.push(item[0]) }
     return chars
  }, []);
}

console.log(getChar(getWords(toLowerCase(removeSpecials(string)))))

Codepen:http://codepen.io/anon/pen/QNOypw?editors=0010

答案 2 :(得分:0)

Please try:

var string = 'Take all first words for each letter... this is a test';

// reverse the input string
string = string.split(" ").reverse().join(" ");
var re = /\b((\w)[a-z]*)(?!.*?\b\2)/gi;

var words = [], m;
while (m = re.exec(string)) {
    words.push(m[0])
}

document.write(words.reverse().join(","));