查找通常在数组中彼此相邻出现的元素

时间:2019-03-08 16:29:53

标签: javascript node.js

我正在尝试查找通常在数组中彼此相邻出现的值。

E.G。给定数组:

["dog","cat","goat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"]

它应该返回类似于以下内容的内容:

[[["dog","cat"],4],[["cat","pig"],2],[["dog","cat","pig"],2]]

以下是一些更好的数据:https://pastebin.com/UG4iswrZ

我们将不胜感激。这是我目前在做类似事情时失败的尝试:

function findAssociations(words){
  var temp = [],tempStore = [],store = [],found = false;
  //loop through the words counting occurrances of words together with a window of 5
  for(var i = 0;i<words.length-1;i++){
    if(i % 5 == 0){
      //on every fith element, loop through store attempting to add combinations of words stored in tempStore
      for(var j = 0;j<5;j++){
        temp = []
        //create the current combination
        for(var k = 0;k<j;k++){
          temp.push(tempStore[k]);
        }
        //find if element is already stored, if it is, increment the occurrence counter
        for(var k = 0;k<store.length;k++){
          if(store[k][0]===temp){
            found = true;
            store[k][1] = store[k][1]+1;
          }
        }
        //if it isn't add it
        if(found == false){
          store.push([temp,1]);
        }
        found == false;
      }
      tempStore = [];
    } else {
      //add word to tempStore if it i isnt a multiple of 5
      tempStore.push(words[i]);
    }
  }
}

此脚本不会删除出现一次的组合,不会按出现次数对输出进行排序,也不起作用。这只是可能的解决方案如何工作的概述(如benvc所建议)。

5 个答案:

答案 0 :(得分:0)

这是我想出的。它只能找到对,但是您可以根据自己的%

对其进行修改,以找到3、4等的集合。
const animals = ['dog','cat','goat','dog','cat','elephant','dog','cat','pig','seal','dog','cat','pig','monkey'];

let pairs = ',';
animals.forEach((animal, i) => {
  let separator = ',';
  if (i % 2 === 0) {
    separator = ';'
  }
  pairs += animal + separator;
});

const evenPairs = pairs.split(',');
const oddPairs = pairs.split(';');
const allPairs = evenPairs.concat(oddPairs).map(pair => pair.replace(/[;,]/, ' '));

let result = {}

allPairs.forEach(pair => {
  if (pair.length) {
    if (result[pair] === undefined) {
      result[pair] = 1;
    } else {
      result[pair]++;
    }
  }
});

导致:

dog: 1
cat elephant: 1
cat goat: 1
cat pig: 2
dog cat: 4
elephant dog: 1
goat dog: 1
monkey : 1
pig monkey: 1
pig seal: 1
seal dog: 1

https://stackblitz.com/edit/typescript-wvuvnr

答案 1 :(得分:0)

您需要弄清楚“亲密”和“亲密”的含义。只要看看第一个邻居,您就可以尝试:

const findAssociations = words => {
    const associations = {}

    for (let i = 0; i < words.length - 1; i++) {
        const word = words[i]
        const wordRight = words[i+1]

        const wordOne = word < wordRight ? word : wordRight;
        const wordTwo = word < wordRight ? wordRight : word;

        const keys = Object.keys(associations)
        const key = `${wordOne}:${wordTwo}`
        if (keys.indexOf(key) >= 0) {
            associations[key]++
        } else {
            associations[key] = 1
        }
    }

    const keys = Object.keys(associations)
    const values = Object.values(associations)

    const zipped = keys.map((key, index) => [key, values[index]])
    zipped.sort((a, b) => a[1] < b[1] ? 1 : -1);

    return zipped;
}

https://stackblitz.com/edit/js-3ppdit

答案 2 :(得分:0)

您可以在另一个函数中使用此函数,并且每次将元素添加到["dog", "cat"]

const arr = ["dog", "cat", "goat", "dog", "cat", "dog", "cat", "elephant", "dog", "cat", "pig", "seal", "dog", "cat", "pig", "monkey"]


const findArrayInArray = (arr1, arr2) => {
  let count = 0,
    arrString1 = arr1.join(""),
    arrString2 = arr2.join("");
  while (arrString2.indexOf(arrString1) > -1) {
    count += 1;
    arrString2 = arrString2.replace(arrString1, '');
  }
  return count;
}

console.log(`["dog", "cat"] exist ${findArrayInArray(["dog", "cat"], arr)} times`)

答案 3 :(得分:0)

假定列表中的每个项目都是集合的定界符,并且每个集合对每个项目计数一次(即["dog", "cat", "goat"]分别计为["dog", "cat"]["dog", "cat", "goat"],并假设您不如果不想出现任何单个事件,那么这是一种方法:

const full_list = ["dog","cat","goat","dog","cat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"];

// create list of unique items
const distinct = (value, index, self) => {
    return self.indexOf(value) ===index;
}
const unique_items = full_list.filter(distinct);

// get all patterns
var pre_report = {};
for (var i in unique_items) {
    item = unique_items[i];
    var pattern = [item];

    var appending = false;
    for (var j = full_list.indexOf(item) + 1; j < full_list.length; ++j) {
         const related_item = full_list[j];
         if (item == related_item) {
             pattern = [item]
             continue;
         }

         pattern.push(related_item);
         if (pattern in pre_report) {
             ++pre_report[pattern];
         } else {
             pre_report[pattern] = 1;
         }  
    }
}

// filter out only single occurring patterns
var report = {};
for (key in pre_report) {
    if (pre_report[key] > 1) {
        report[key] = pre_report[key];
    }
}

console.log(report);

产生:

{ 'dog,cat': 5, 'dog,cat,pig': 2, 'cat,pig': 2 }

答案 4 :(得分:0)

这是适用于多个小组人数的通用解决方案。

您可以指定组大小范围,例如[2,4]用于2到4个元素的组,并且出现的次数最少。

然后该函数生成给定大小的所有邻居组,对每个组进行排序并计算重复项。如果组中的顺序很重要,则可以删除排序步骤。

通过创建字典来对重复项进行计数,该字典的键是排序的组元素并与特殊标记结合在一起。字典中的值是计数。

然后返回按出现次数然后按组大小排序的组。

const data = ["dog","cat","goat","dog","cat","elephant","dog","cat","pig","seal","dog","cat","pig","monkey"];

function findSimilarNeighbors(groupSizeRange, minOccurences, data) {
  const getNeighbors = (size, arr) => arr.reduce((acc, x) => {
    acc.push([]);
    for (let i = 0; i < size; ++ i) {
      const idx = acc.length - i - 1;
      (acc[idx] || []).push(x);
    }
    return acc;
  }, []).filter(x => x.length === size);

  const groups = [];
  for (let groupSize = groupSizeRange[0]; groupSize <= groupSizeRange[1]; ++groupSize) {
    groups.push(...getNeighbors(groupSize, data));
  }
  const groupName = group => group.sort().join('@#@'); // use a separator that won't occur in the strings

  const groupsInfo = groups.reduce((acc, group) => {
    const name = groupName(group);
    acc[name] = acc[name] || {};
    acc[name] = { group, count: (acc[name].count || 0) + 1 };
    return acc;
  }, {});
  
  return Object.values(groupsInfo)
    .filter(group => group.count >= minOccurences)
    .sort((a, b) => {
      const countDiff = b.count - a.count;
      return countDiff ? countDiff : b.group.length - a.group.length;
    })
    .map(({ group, count }) => [group, count]);
};

console.log(findSimilarNeighbors([2, 4], 2, data));
console.log(findSimilarNeighbors([4, 4], 2, data));