如何通过groupBy on键过滤对象数组?

时间:2019-11-28 03:12:03

标签: javascript arrays ecmascript-6 javascript-objects

我遇到了以下基于算法的问题,我正在尝试使用ES6来解决它,但是我对groupBy感到困惑,

let data = [
  {id: "2", time: "3/11/2016 02:02:58", value: 7.0},
  {id: "1", time: "3/11/2016 02:12:32", value: 6.5},
  {id: "1", time: "3/11/2016 02:13:11", value: 7.25},
  {id: "4", time: "3/11/2016 02:13:54", value: 8.75},
  {id: "2", time: "3/11/2016 05:02:45", value: 11.0},
  {id: "4", time: "3/11/2016 06:32:42", value: 5.0},
  {id: "2", time: "3/11/2016 06:35:12", value: 2.0},
  {id: "1", time: "3/11/2016 06:45:01", value: 12.0},
  {id: "1", time: "3/11/2016 06:59:59", value: 11.75},
  {id: "2", time: "3/11/2016 07:01:53", value: 1.0},
  {id: "1", time: "3/11/2016 07:02:54", value: 4.5},
  {id: "3", time: "3/11/2016 07:02:54", value: 15.75},
  {id: "6", time: "3/11/2016 07:02:54", value: 14.25},
  {id: "2", time: "3/11/2016 07:03:15", value: 12.0},
  {id: "2", time: "3/11/2016 08:02:22", value: 3.0},
  {id: "2", time: "3/11/2016 09:41:50", value: 4.0},
  {id: "2", time: "3/11/2016 10:02:54", value: 5.0},
  {id: "2", time: "3/11/2016 11:05:35", value: 10.0},
  {id: "2", time: "3/11/2016 13:02:21", value: 6.0},
  {id: "5", time: "3/11/2016 13:02:40", value: 8.0},
  {id: "4", time: "3/11/2016 13:02:55", value: 8.0},
  {id: "5", time: "3/11/2016 13:33:34", value: 8.0},
  {id: "5", time: "3/11/2016 13:42:24", value: 8.0},
  {id: "5", time: "3/11/2016 13:47:44", value: 6.25},
  {id: "5", time: "3/11/2016 14:02:54", value: 4.25},
  {id: "5", time: "3/11/2016 14:03:04", value: 5.25},
  {id: "5", time: "3/11/2016 15:12:55", value: 6.25},
  {id: "2", time: "3/11/2016 16:02:36", value: 8.0},
  {id: "5", time: "3/11/2016 16:22:11", value: 8.5},
  {id: "5", time: "3/11/2016 17:18:19", value: 11.25},
  {id: "5", time: "3/11/2016 18:19:20", value: 9.0},
  {id: "2", time: "3/11/2016 23:59:59", value: 9.0}
];

尝试获取具有以下条件的数组子集:

  1. 在每个小时的时段内,每个id的结果集中都应仅包含最昂贵的值。
  2. 如果多个具有相同id的对象等于一小时内最昂贵的值,则仅放置最早的一个。
  3. 如果整个对象数组中id的对象超过8个,请将其删除。

我知道在SQL中使用'value'上的MAX和在'time'上使用GroupBy可以很容易地在SQL中实现,但是在这里,我对JS循环感到困惑。如果可能的话,请提出一些技巧。

我在尝试以下方法,但似乎还有很长的路要走

var newArr = [];
data.forEach(function (el) {
    var findIndex = newArr.findIndex(function (item) {
        return item.time === el.time;
    });
    if (findIndex === -1) {
        newArr.push(el);
    } else if (el.value > newArr[findIndex].value) {
        newArr[findIndex].value = el.value;
        newArr[findIndex].time = el.time;
    } else {
        newArr[findIndex].time = el.time;
    }
});

仅是为了使输出更加清晰

[
  { id: "4",time: "3/11/2016 02:13:54",value: 8.75 },
  { id: "1", time: "3/11/2016 06:45:01",value: 12.0 },
  { id: "3",time: "3/11/2016 07:02:54",value: 15.75 },
  { id: "4", time: "3/11/2016 13:02:55", value: 8.0}
]

ID记录:“ 2”和“ 5”已删除,因为它的外观超过8 我发现了一些here,但似乎是红宝石

1 个答案:

答案 0 :(得分:1)

您需要
a)从数据['时间']创建日期对象
b)使用对象以某种方式对数组进行排序。
c)过滤掉过多的数据。

希望代码能自我解释。我尝试以动态方式进行操作,以便您可以在数年,数月甚至数天的时间内使用它,因为我认为这不仅仅是您想要的时间。

filterOutOldestIds有一个非常丑陋的for循环嵌套,但是如果我希望它是动态的但仍可管理,我想不出更好的方法。

let data = [
  {id: "2", time: "3/11/2016 02:02:58", value: 7.0},
  {id: "1", time: "3/11/2016 02:12:32", value: 6.5},
  {id: "1", time: "3/11/2016 02:13:11", value: 7.25},
  {id: "4", time: "3/11/2016 02:13:54", value: 8.75},
  {id: "2", time: "3/11/2016 05:02:45", value: 11.0},
  {id: "4", time: "3/11/2016 06:32:42", value: 5.0},
  {id: "2", time: "3/11/2016 06:35:12", value: 2.0},
  {id: "1", time: "3/11/2016 06:45:01", value: 12.0},
  {id: "1", time: "3/11/2016 06:59:59", value: 11.75},
  {id: "2", time: "3/11/2016 07:01:53", value: 1.0},
  {id: "1", time: "3/11/2016 07:02:54", value: 4.5},
  {id: "3", time: "3/11/2016 07:02:54", value: 15.75},
  {id: "6", time: "3/11/2016 07:02:54", value: 14.25},
  {id: "2", time: "3/11/2016 07:03:15", value: 12.0},
  {id: "2", time: "3/11/2016 08:02:22", value: 3.0},
  {id: "2", time: "3/11/2016 09:41:50", value: 4.0},
  {id: "2", time: "3/11/2016 10:02:54", value: 5.0},
  {id: "2", time: "3/11/2016 11:05:35", value: 10.0},
  {id: "2", time: "3/11/2016 13:02:21", value: 6.0},
  {id: "5", time: "3/11/2016 13:02:40", value: 8.0},
  {id: "4", time: "3/11/2016 13:02:55", value: 8.0},
  {id: "5", time: "3/11/2016 13:33:34", value: 8.0},
  {id: "5", time: "3/11/2016 13:42:24", value: 8.0},
  {id: "5", time: "3/11/2016 13:47:44", value: 6.25},
  {id: "5", time: "3/11/2016 14:02:54", value: 4.25},
  {id: "5", time: "3/11/2016 14:03:04", value: 5.25},
  {id: "5", time: "3/11/2016 15:12:55", value: 6.25},
  {id: "2", time: "3/11/2016 16:02:36", value: 8.0},
  {id: "5", time: "3/11/2016 16:22:11", value: 8.5},
  {id: "5", time: "3/11/2016 17:18:19", value: 11.25},
  {id: "5", time: "3/11/2016 18:19:20", value: 9.0},
  {id: "2", time: "3/11/2016 23:59:59", value: 9.0}
];

function filterOnMaxValue(arr) {
  let MAX_OCCURANCES = 8;
  var controlObj = {},
      sortedArr = [];   // don't really need this, but using it for added clarity
  
  arr.forEach((el) => {
    controlObj = setHighestValueBasedOnDate(el, controlObj);
  });
  
  sortedArr = filterOutOldestIds(controlObj, MAX_OCCURANCES);

  return sortedArr;
}

function setHighestValueBasedOnDate(el, controlObj) {
  let date = new Date(el.time),
      year = date.getFullYear(),
      month = date.getMonth(),
      day = date.getDay(),
      hour = date.getHours(),
      id = el.id;

  controlObj = createDefaultStructure(id, year, month, day, hour, controlObj);

  let previousEl = controlObj[id][year][month][day][hour];
  controlObj[id][year][month][day][hour] = (previousEl && previousEl.value > el.value) ? previousEl : el;

  return controlObj;
}

function createDefaultStructure(id, year, month, day, hour, controlObj) {
  controlObj[id] = controlObj[id] || {};
  controlObj[id][year] = controlObj[id][year] || {};
  controlObj[id][year][month] = controlObj[id][year][month] || {};
  controlObj[id][year][month][day] = controlObj[id][year][month][day] || {};

  return controlObj;
}

function filterOutOldestIds(controlObj, maxAllowedOccurances) {
  var sortedArr = [],
      idOccurances = {},
      idObj, yearObj, monthObj, dayObj, hourObj;

  for (id in controlObj) {
    idOccurances[id] = -1;
    idObj = controlObj[id];
    
    for (year in idObj) {
      yearObj = idObj[year];
      
      for (month in yearObj) {
        dayObj = yearObj[month];
        
        for (hour in dayObj) {
          hourObj = dayObj[hour];

          for (el in hourObj) {
            idOccurances[id]++;
            
            // this check should honestly be in every for loop, apart from 'id in controlObj'
            if (idOccurances[id] < maxAllowedOccurances) {
              sortedArr.push(hourObj[el]);   
            } else {
              break;
            }
          }
        }
      }
    }
  }

  return sortedArr;
}

let sortedData = filterOnMaxValue(data);
console.log(sortedData);
<p>Trying to get subset of array with below condition</p>

<ul>
  <li>Each Id within each one hour period, only the most expensive value should be in resultset.</li>
  <li>If more than one objects from the same Id equal for the most expensive value in a one hour period, only place the earliest one.</li>
  <li>If there are more than 8 object for an Id in the overall array of objects, remove it.</li>
</ul>