我正在尝试从twitter识别艺术家。所以我有一条推文,我使用natural for node来标记推文并将其与使用Levenshtein距离的艺术家数组进行比较,以便将令牌与艺术家相匹配。我的问题是我在实际上将每个令牌与艺术家列表进行比较并匹配推文所指的那个逻辑时遇到了困难。
以下示例应将Clean Bandit作为艺术家。
var saturday = ["Kanye West", "Pharrell Williams", "Paloma Faith", "Burt Bacharach", "Clean Bandit"];
var tweet = "My queen @graciechatto about to go on The Other Stage at Glastonbury #cleanbandit #glastonbury…"
tokenizer = new natural.WordTokenizer(); //new tokeniser
var tweetTokenised = tokenizer.tokenize(tweet); //tokenise the tweet and store it in tweetTokenised
var i , j;
//loop through tokenised tweet
for(i=0;i<tweetTokenised.length;i++){
console.log(tweetTokenised[i] + "--------------------------");
var temp = [];
//compare token with list of artists performing on saturday
for(j=0;j<saturday.length;j++){
//remove whitespace from the tweet tokens
console.log(tweetTokenised[i]+ "--->"+saturday[j]); //testing purposes
var score = natural.LevenshteinDistance(tweetTokenised[i].replace(/\s+/g, '').toLowerCase(),saturday[j].toLowerCase());
//store score for each token vs artists in a temp dictionary
temp.push({
key: saturday[j],
value: score
});
}
}
答案 0 :(得分:0)
//sort array from lowest to biggest
temp.sort(function(a, b) {
return parseFloat(a.value) - parseFloat(b.value);
});
//console.log(temp);
//get the first object (the smallest in this instance as its been sorted)
lowest = temp.shift();
console.log(lowest);
if(lowest.value < 2){
distances.push(lowest);
}
}
console.log("printing distances");
console.log(distances[0].key); //get artist name
}