我正在尝试查找通常在数组中彼此相邻出现的值。
E.G。给定数组:
["dog","cat","goat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"]
它应该返回类似于以下内容的内容:
[[["dog","cat"],4],[["cat","pig"],2],[["dog","cat","pig"],2]]
以下是一些更好的数据:https://pastebin.com/UG4iswrZ
我们将不胜感激。这是我目前在做类似事情时失败的尝试:
function findAssociations(words){
var temp = [],tempStore = [],store = [],found = false;
//loop through the words counting occurrances of words together with a window of 5
for(var i = 0;i<words.length-1;i++){
if(i % 5 == 0){
//on every fith element, loop through store attempting to add combinations of words stored in tempStore
for(var j = 0;j<5;j++){
temp = []
//create the current combination
for(var k = 0;k<j;k++){
temp.push(tempStore[k]);
}
//find if element is already stored, if it is, increment the occurrence counter
for(var k = 0;k<store.length;k++){
if(store[k][0]===temp){
found = true;
store[k][1] = store[k][1]+1;
}
}
//if it isn't add it
if(found == false){
store.push([temp,1]);
}
found == false;
}
tempStore = [];
} else {
//add word to tempStore if it i isnt a multiple of 5
tempStore.push(words[i]);
}
}
}
此脚本不会删除出现一次的组合,不会按出现次数对输出进行排序,也不起作用。这只是可能的解决方案如何工作的概述(如benvc所建议)。
答案 0 :(得分:0)
这是我想出的。它只能找到对,但是您可以根据自己的%
const animals = ['dog','cat','goat','dog','cat','elephant','dog','cat','pig','seal','dog','cat','pig','monkey'];
let pairs = ',';
animals.forEach((animal, i) => {
let separator = ',';
if (i % 2 === 0) {
separator = ';'
}
pairs += animal + separator;
});
const evenPairs = pairs.split(',');
const oddPairs = pairs.split(';');
const allPairs = evenPairs.concat(oddPairs).map(pair => pair.replace(/[;,]/, ' '));
let result = {}
allPairs.forEach(pair => {
if (pair.length) {
if (result[pair] === undefined) {
result[pair] = 1;
} else {
result[pair]++;
}
}
});
导致:
dog: 1
cat elephant: 1
cat goat: 1
cat pig: 2
dog cat: 4
elephant dog: 1
goat dog: 1
monkey : 1
pig monkey: 1
pig seal: 1
seal dog: 1
答案 1 :(得分:0)
您需要弄清楚“亲密”和“亲密”的含义。只要看看第一个邻居,您就可以尝试:
const findAssociations = words => {
const associations = {}
for (let i = 0; i < words.length - 1; i++) {
const word = words[i]
const wordRight = words[i+1]
const wordOne = word < wordRight ? word : wordRight;
const wordTwo = word < wordRight ? wordRight : word;
const keys = Object.keys(associations)
const key = `${wordOne}:${wordTwo}`
if (keys.indexOf(key) >= 0) {
associations[key]++
} else {
associations[key] = 1
}
}
const keys = Object.keys(associations)
const values = Object.values(associations)
const zipped = keys.map((key, index) => [key, values[index]])
zipped.sort((a, b) => a[1] < b[1] ? 1 : -1);
return zipped;
}
答案 2 :(得分:0)
您可以在另一个函数中使用此函数,并且每次将元素添加到["dog", "cat"]
const arr = ["dog", "cat", "goat", "dog", "cat", "dog", "cat", "elephant", "dog", "cat", "pig", "seal", "dog", "cat", "pig", "monkey"]
const findArrayInArray = (arr1, arr2) => {
let count = 0,
arrString1 = arr1.join(""),
arrString2 = arr2.join("");
while (arrString2.indexOf(arrString1) > -1) {
count += 1;
arrString2 = arrString2.replace(arrString1, '');
}
return count;
}
console.log(`["dog", "cat"] exist ${findArrayInArray(["dog", "cat"], arr)} times`)
答案 3 :(得分:0)
假定列表中的每个项目都是集合的定界符,并且每个集合对每个项目计数一次(即["dog", "cat", "goat"]
分别计为["dog", "cat"]
和["dog", "cat", "goat"]
,并假设您不如果不想出现任何单个事件,那么这是一种方法:
const full_list = ["dog","cat","goat","dog","cat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"];
// create list of unique items
const distinct = (value, index, self) => {
return self.indexOf(value) ===index;
}
const unique_items = full_list.filter(distinct);
// get all patterns
var pre_report = {};
for (var i in unique_items) {
item = unique_items[i];
var pattern = [item];
var appending = false;
for (var j = full_list.indexOf(item) + 1; j < full_list.length; ++j) {
const related_item = full_list[j];
if (item == related_item) {
pattern = [item]
continue;
}
pattern.push(related_item);
if (pattern in pre_report) {
++pre_report[pattern];
} else {
pre_report[pattern] = 1;
}
}
}
// filter out only single occurring patterns
var report = {};
for (key in pre_report) {
if (pre_report[key] > 1) {
report[key] = pre_report[key];
}
}
console.log(report);
产生:
{ 'dog,cat': 5, 'dog,cat,pig': 2, 'cat,pig': 2 }
答案 4 :(得分:0)
这是适用于多个小组人数的通用解决方案。
您可以指定组大小范围,例如[2,4]
用于2到4个元素的组,并且出现的次数最少。
然后该函数生成给定大小的所有邻居组,对每个组进行排序并计算重复项。如果组中的顺序很重要,则可以删除排序步骤。
通过创建字典来对重复项进行计数,该字典的键是排序的组元素并与特殊标记结合在一起。字典中的值是计数。
然后返回按出现次数然后按组大小排序的组。
const data = ["dog","cat","goat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"];
function findSimilarNeighbors(groupSizeRange, minOccurences, data) {
const getNeighbors = (size, arr) => arr.reduce((acc, x) => {
acc.push([]);
for (let i = 0; i < size; ++ i) {
const idx = acc.length - i - 1;
(acc[idx] || []).push(x);
}
return acc;
}, []).filter(x => x.length === size);
const groups = [];
for (let groupSize = groupSizeRange[0]; groupSize <= groupSizeRange[1]; ++groupSize) {
groups.push(...getNeighbors(groupSize, data));
}
const groupName = group => group.sort().join('@#@'); // use a separator that won't occur in the strings
const groupsInfo = groups.reduce((acc, group) => {
const name = groupName(group);
acc[name] = acc[name] || {};
acc[name] = { group, count: (acc[name].count || 0) + 1 };
return acc;
}, {});
return Object.values(groupsInfo)
.filter(group => group.count >= minOccurences)
.sort((a, b) => {
const countDiff = b.count - a.count;
return countDiff ? countDiff : b.group.length - a.group.length;
})
.map(({ group, count }) => [group, count]);
};
console.log(findSimilarNeighbors([2, 4], 2, data));
console.log(findSimilarNeighbors([4, 4], 2, data));