Question

我在millions of rows中有一个稍大的数据集，其中包含时间戳形式的start time和end time。

我需要找出最有效或最有效的方法来计算这些时间范围相交的次数。开始时间按升序排列，但结束时间可能不是也可能不是。

E.g。

1 - Start: 1484725031, End: 1484725045
2 - Start: 1484725033, End: 1484725039
3 - Start: 1484725040, End: 1484725049

这里我们有记录1＆amp; 2相交并记录相交的1和3。

目前我只是循环遍历所有时间范围，然后将其与整个列表进行比较，这非常慢......

有关如何改善这一点的任何建议吗？

Answer 1

您可以使用一个对象仅对整个数组进行迭代计数，直到更大的起始值。

此提案使用非重叠的开始和结束时间，因此例如start = 5和end = 5不重叠。

          1111111111
01234567890123456789   overlapping
---                    1
------                 3
   -----               2
    ------             3
        ---            2
         -----         1
               -----   0

结果对象如下所示

{
    "0|2": {           // 1 intersection
        "0|5": true
    },
    "0|5": {           // { start: 0, end: 5 } intersects with 3 objects
        "0|2": true,   // { start: 0, end: 2 } and
        "3|7": true,   // { start: 3, end: 7 } and
        "4|9": true    // { start: 4, end: 9 } 
    },
    "3|7": {
        "0|5": true,
        "4|9": true
    },
    "4|9": {
        "0|5": true,
        "3|7": true,
        "8|10": true
    },
    "8|10": {
        "4|9": true,
        "9|13": true
    },
    "9|13": {
        "8|10": true
    },
    "15|19": {}        // no intersection
}

var data = [{ start: 0, end: 5 }, { start: 3, end: 7 }, { start: 4, end: 9 }, { start: 8, end: 10 }, { start: 9, end: 13 }, { start: 15, end: 19 }],
    intersections = {};

data.forEach(function (a, i, aa) {
    var j = i + 1,
        keyA = a.start + '|' + a.end,
        keyB;

    intersections[keyA] = intersections[keyA] || {};
    while (j < aa.length && aa[j].start < a.end) {				
        keyB = aa[j].start + '|' + aa[j].end;
        intersections[keyA][keyB] = true;
        intersections[keyB] = intersections[keyB] || {};
        intersections[keyB][keyA] = true;
        j++;
    }
});
  
console.log(intersections);

.as-console-wrapper { max-height: 100% !important; top: 0; }

Answer 2

比较每个可能的匹配，如果两个匹配都没有在另一个开始之前结束，则它们相交。

＆＃13;

var times = [{
  start: 1484725031,
  end: 1484725045
}, {
  start: 1484725033,
  end: 1484725039
}, {
  start: 1484725040,
  end: 1484725049
}];

function findIntersections(times) {
    var newtimes = times.map(function(a, b) {
      //Generate list to track intersections
      var intersections = [];
      a["intersections"] = intersections;
      //Generate a unique id, i use index in array
      a["id"] = b;
      return a;
    });
    //Loop 1
    for (var timeIndexA = 0; timeIndexA < newtimes.length; timeIndexA++) {
      var timeA = newtimes[timeIndexA];
      //Loop 2, notice how we start from Loop 1 + 1. That way we only check each matchup once
      for (var timeIndexB = timeIndexA + 1; timeIndexB < newtimes.length; timeIndexB++) {
        var timeB = newtimes[timeIndexB];
        if (
          //If none end before the other start, they must intersect
          (timeA.end < timeB.start || timeB.end < timeA.start) == false) {
          //Save intersections by index parameter
          timeA.intersections.push(timeB.id);
          timeB.intersections.push(timeA.id);
        }
      }
    }
    //Return result
    console.log(newtimes);
  }
  //Find indexes
var indexed = findIntersections(times);
//log indexes
console.log(indexed);

＆＃13;

Answer 3

如你所知，你需要降低你所做的比较次数。通常情况下，如果您已经比较了J和I，那么路由就是不比较I和J.这可以通过以下伪代码轻松完成：

for i over all values{
    for j over 0 to i -1 // alternatively over i+1 to end
       //compare here
    }
}

将您的比较次数从N²降至N（N-1）/ 2（您仍处于O（n²）区域，但它更好）。

幸运的是，您的时间跨度数组按开始时间排序，因此您可以更进一步：

for i over all values{
    for j over i +1 to end of array
       if(intersects){
          //do your thing
       }else{
          // break the looping over j,
          // as no new value will start before times[j]
          break;
       }
    }
}

这应该会大大降低计算时间，但是以不可预测的方式，因为它依赖于数据。

Answer 4

let timeIntervals = [(start: TimeInterval, end: TimeInterval)]()

var count = 0
for x in 0..<timeIntervals.count {
    for y in x+1..<timeIntervals.count {
        if timeIntervals[x].end >= timeIntervals[y].start {
            count += 1
        } else {
             break
        }
    }
}

如何有效地计算时间范围的交集？

4 个答案: