我有一个字符串列表,我想检查字符串是否包含特定的单词,如果它确实拆分了字符串中的所有单词并将其添加到关联数组中。
myString
我想循环浏览myString
,如果它在newWord = {@Arsenal:[{RT:1},{Waiting:1},{for:1},{the:1},{international:1}]}
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z].key) > -1){
myString[i].split(" ")
}
}
}
中,则将 cns2 <- function(x,y){
b <- numeric(length(nrow(y)))
for(i in 1:nrow(x)){
a<- distm(x=x[i,],
y=y,
fun = distVincentyEllipsoid)
b[i] = which.min(a)
}
return(y[b,])
}
拆分为单个字并创建一个像
library(microbenchmark)
microbenchmark(cns(x,y), ###where x is your first dataframe, y the second
cns2(x,y)
)
答案 0 :(得分:2)
我会说喜欢的东西会起作用,这也会计算句子中单词的出现次数。例如,JavaScript没有像PHP
这样的关联数组。他们只有objects
或编号arrays
:
var myString = ['RT @Arsenal: Waiting for the international', 'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish'];
var wordtoFind = ['@Arsenal'];
var result = {};
for(var i = 0, l = wordtoFind.length; i < l; i++) {
for(var ii = 0, ll = myString.length; ii < ll; ii++) {
if(myString[ii].indexOf(wordtoFind[i]) !== -1) {
var split = myString[ii].split(' ');
var resultpart = {};
for(var iii = 0, lll = split.length; iii < lll; iii++) {
if(split[iii] !== wordtoFind[i]) {
if(!resultpart.hasOwnProperty(split[iii])) {
resultpart[split[iii]] = 0;
}
resultpart[split[iii]]++;
}
}
result[wordtoFind[i]] = resultpart;
}
}
}
console.log(result);
//{"@Arsenal":{"RT":1,"Waiting":1,"for":1,"the":1,"international":1}}
答案 1 :(得分:1)
此方法使用forEach - 函数和回调。 containsWord函数暂时留下for循环以减少一些回调,这显然可以改变。
var myString = [
'RT @Arsenal: Waiting for the international',
'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish',
'@Arsenal: one two three four two four three four three four'
];
var wordtoFind = ['@Arsenal'];
// define the preprocessor that is used before the equality check
function preprocessor(word) {
return word.replace(':', '');
}
function findOccurences(array, search, callback, preprocessor) {
var result = {};
var count = 0;
// calculate the maximum iterations
var max = search.length * array.length;
// iterate the search strings that should be matched
search.forEach(function(needle) {
// iterate the array of strings that should be searched in
array.forEach(function(haystack) {
if (containsWord(haystack, needle, preprocessor)) {
var words = haystack.split(' ');
// iterate every word to count the occurences and write them to the result
words.forEach(function(word) {
countOccurence(result, needle, word);
})
}
count++;
// once every iteration finished, call the callback
if (count == max) {
callback && callback(result);
}
});
});
}
function containsWord(haystack, needle, preprocessor) {
var words = haystack.split(' ');
for (var i = 0; i < words.length; i++) {
var word = words[i];
// preprocess a word before it's compared
if (preprocessor) {
word = preprocessor(word);
}
// if it matches return true
if (word === needle) {
return true;
}
}
return false;
}
function countOccurence(result, key, word) {
// add array to object if it doesn't exist yet
if (!result.hasOwnProperty(key)) {
result[key] = [];
}
var entry = result[key];
// set the count to 0 if it doesn't exist yet
if (!entry.hasOwnProperty(word)) {
entry[word] = 0;
}
entry[word]++;
}
// call our function to find the occurences
findOccurences(myString, wordtoFind, function(result) {
// do something with the result
console.log(result);
}, preprocessor);
// output:
/*
{ '@Arsenal':
[ RT: 1,
'@Arsenal:': 2,
Waiting: 1,
for: 1,
the: 1,
international: 1,
one: 1,
two: 2,
three: 3,
four: 4 ] }
*/
如果答案需要澄清,请随意提出任何问题。
我希望这符合您的需求。
答案 2 :(得分:0)
你走在正确的轨道上。您只需将拆分字符串存储到关联数组变量中。
var assocArr = [];
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z]) > -1){
myString[i].split(" ").forEach(function(word){
assocArr.push(word);
});
}
}
}
答案 3 :(得分:0)
我认为困扰你的关键问题是数据结构。最佳结构应该是这样的:
{
@Arsenal:[
{RT:1, Waiting:1, for:1, the:1, international:1},
{xxx:1, yyy:1, zzz:3}, //for there are multiple ones in 'myString' that contain the same '@Arsenal'
{slkj:1, sldjfl:2, lsdkjf:1} //maybe more
]
someOtherWord:[
{},
{},
....
]
}
代码:
var result = {};
//This function will return an object like {RT:1, Waiting:1, for:1, the:1, international:1}.
function calculateCount(string, key) {
var wordCounts = {};
string.split(" ").forEach(function (word) {
if (word !== key) {
if (wordCounts[word] === undefined) wordCounts[word] = 1;
else wordCounts[word]++;
}
});
return wordCounts;
}
//For each 'word to find' and each string that contain the 'word to find', push in that returned object {RT:1, Waiting:1, for:1, the:1, international:1}.
wordToFind.forEach(function (word) {
var current = result[word] = [];
myString.forEach(function (str) {
if (str.indexOf(word) > -1) {
current.push(
calculateCount(str, word)
);
}
}); //Missed the right parenthesis here
});