Question

我有一个字符串列表，我想检查字符串是否包含特定的单词，如果它确实拆分了字符串中的所有单词并将其添加到关联数组中。

myString

我想循环浏览myString，如果它在newWord = {@Arsenal:[{RT:1},{Waiting:1},{for:1},{the:1},{international:1}]} for(z=0; z <wordtoFind.length; z++){ for ( i = 0 ; i < myString.length; i++) { if (myString[i].indexOf(wordtoFind[z].key) > -1){ myString[i].split(" ") } } }中，则将cns2 <- function(x,y){ b <- numeric(length(nrow(y))) for(i in 1:nrow(x)){ a<- distm(x=x[i,], y=y, fun = distVincentyEllipsoid) b[i] = which.min(a) } return(y[b,]) }拆分为单个字并创建一个像

这样的对象

library(microbenchmark)
microbenchmark(cns(x,y), ###where x is your first dataframe, y the second
               cns2(x,y)
               )

Answer 1

我会说喜欢的东西会起作用，这也会计算句子中单词的出现次数。例如，JavaScript没有像PHP这样的关联数组。他们只有objects或编号arrays：

var myString = ['RT @Arsenal: Waiting for the international', 'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish'];

var wordtoFind = ['@Arsenal'];

var result = {};

for(var i = 0, l = wordtoFind.length; i < l; i++) {

    for(var ii = 0, ll = myString.length; ii < ll; ii++) {
        if(myString[ii].indexOf(wordtoFind[i]) !== -1) {
            var split = myString[ii].split(' ');
            var resultpart = {};
            for(var iii = 0, lll = split.length; iii < lll; iii++) {
                if(split[iii] !== wordtoFind[i]) {
                    if(!resultpart.hasOwnProperty(split[iii])) {
                      resultpart[split[iii]] = 0;
                    }
                    resultpart[split[iii]]++;
                }
            }
            result[wordtoFind[i]] = resultpart;
        }
    }
}

console.log(result); 
//{"@Arsenal":{"RT":1,"Waiting":1,"for":1,"the":1,"international":1}}

Answer 2

此方法使用forEach - 函数和回调。 containsWord函数暂时留下for循环以减少一些回调，这显然可以改变。

var myString = [
    'RT @Arsenal: Waiting for the international',
    'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish',
    '@Arsenal: one two three four two four three four three four'
];

var wordtoFind = ['@Arsenal'];

// define the preprocessor that is used before the equality check
function preprocessor(word) {
    return word.replace(':', '');
}

function findOccurences(array, search, callback, preprocessor) {
    var result = {};
    var count = 0;
    // calculate the maximum iterations
    var max = search.length * array.length;
    // iterate the search strings that should be matched
    search.forEach(function(needle) {
        // iterate the array of strings that should be searched in
        array.forEach(function(haystack) {
            if (containsWord(haystack, needle, preprocessor)) {
                var words = haystack.split(' ');
                // iterate every word to count the occurences and write them to the result
                words.forEach(function(word) {
                    countOccurence(result, needle, word);
                })
            }
            count++;
            // once every iteration finished, call the callback
            if (count == max) {
                callback && callback(result);
            }
        });
    });
}

function containsWord(haystack, needle, preprocessor) {
    var words = haystack.split(' ');
    for (var i = 0; i < words.length; i++) {
        var word = words[i];
        // preprocess a word before it's compared
        if (preprocessor) {
            word = preprocessor(word);
        }
        // if it matches return true
        if (word === needle) {
            return true;
        }
    }
    return false;
}

function countOccurence(result, key, word) {
    // add array to object if it doesn't exist yet
    if (!result.hasOwnProperty(key)) {
        result[key] = [];
    }
    var entry = result[key];
    // set the count to 0 if it doesn't exist yet
    if (!entry.hasOwnProperty(word)) {
        entry[word] = 0;
    }
    entry[word]++;
}

// call our function to find the occurences
findOccurences(myString, wordtoFind, function(result) {
    // do something with the result
    console.log(result);
}, preprocessor);

// output:
/*
 { '@Arsenal':
   [ RT: 1,
    '@Arsenal:': 2,
    Waiting: 1,
    for: 1,
    the: 1,
    international: 1,
    one: 1,
    two: 2,
    three: 3,
    four: 4 ] }
 */

如果答案需要澄清，请随意提出任何问题。

我希望这符合您的需求。

Answer 3

你走在正确的轨道上。您只需将拆分字符串存储到关联数组变量中。

var assocArr = [];
for(z=0; z <wordtoFind.length; z++){
     for ( i = 0 ; i < myString.length; i++) {
         if (myString[i].indexOf(wordtoFind[z]) > -1){

             myString[i].split(" ").forEach(function(word){
                 assocArr.push(word);
             });

         }
     }
}

Answer 4

我认为困扰你的关键问题是数据结构。最佳结构应该是这样的：

{
    @Arsenal:[
        {RT:1, Waiting:1, for:1, the:1, international:1},
        {xxx:1, yyy:1, zzz:3}, //for there are multiple ones in 'myString' that contain the same '@Arsenal'
        {slkj:1, sldjfl:2, lsdkjf:1} //maybe more
    ]
    someOtherWord:[
        {},
        {},
        ....
    ]
}

代码：

var result = {};

//This function will return an object like {RT:1, Waiting:1, for:1, the:1, international:1}.
function calculateCount(string, key) {
    var wordCounts = {};
    string.split(" ").forEach(function (word) {
        if (word !== key) {
            if (wordCounts[word] === undefined) wordCounts[word] = 1;
            else wordCounts[word]++;
        }
    });
    return wordCounts;
}

//For each 'word to find' and each string that contain the 'word to find', push in that returned object {RT:1, Waiting:1, for:1, the:1, international:1}.
wordToFind.forEach(function (word) {
    var current = result[word] = [];
    myString.forEach(function (str) {
        if (str.indexOf(word) > -1) {
            current.push(
                calculateCount(str, word)
            );
        }
    });  //Missed the right parenthesis here
});

将字符串拆分为多维数组

4 个答案: