如何减少数据图但保持极端

时间:2016-09-12 23:25:51

标签: javascript node.js statistics outliers

我有一个数据库,每隔10分钟就有一个月的数据集。 (所以每10分钟一个数据集)

现在我想在三个图表上显示数据:过去24小时,过去7天和过去30天。

数据如下所示:

{ "data" : 278, "date" : ISODate("2016-08-31T01:51:05.315Z") }
{ "data" : 627, "date" : ISODate("2016-08-31T01:51:06.361Z") }
{ "data" : 146, "date" : ISODate("2016-08-31T01:51:07.938Z") }
// etc

对于24小时图,我只输出过去24小时的数据,这很容易。

对于其他图表,我将数据缩小:

const data = {}; //data from database
let newData = [];
const interval = 7; //for 7 days the interval is 7, for 30 days it's 30

for( let i = 0; i < data.length; i += interval ) {
    newData.push( data[ i ] );
};

此工作正常,但data0或与其他平均值大不相同的极端事件可能会丢失,具体取决于您搜索数据的时间。然而,不稀疏数据将导致通过管道发送的大量数据点并且必须在前端处理。我想避免这种情况。

现在我的问题

如何在保持极端情况的同时将数据减少7天?什么是最有效的方式?

附加: 本质上,我认为我正在尝试简化图形以减少点数但保持整体形状。 (如果从纯图像角度来看)

类似于节点中Douglas–Peucker algorithm的实现? enter image description here

1 个答案:

答案 0 :(得分:2)

正如您在评论中提到的,Ramer-Douglas-Peucker(RDP)算法用于处理2D图形中的数据点,但您希望将其用于X值固定的图形数据。我修改了M Oehm提供的this Javascript implementation of the algorithm,仅考虑计算中的垂直(Y)距离。

另一方面,通常建议使用数据平滑来减少图表中的数据点数量(请参阅csgillespie的this post)。

为了比较这两种方法,我做了一个小测试程序。 “重置”按钮可创建新的测试数据。可以选择并应用算法以获得由指定间隔分开的减少数量的点。然而,在RDP算法的情况下,得到的点不是均匀间隔的。为了获得与指定间隔相同的点数,我迭代地运行计算,每次调整espilon值,直到达到正确的点数。

从我的测试中,RDP算法可以提供更好的结果。唯一的缺点是点之间的间距不同。我不认为这是可以避免的,因为我们希望保留原始数据中不均匀分布的极值点。

以下是代码段,在“全页”模式下可以更好地看到:

var svgns = 'http://www.w3.org/2000/svg';
var graph = document.getElementById('graph1');
var grpRawData = document.getElementById('grpRawData');
var grpCalculatedData = document.getElementById('grpCalculatedData');

var btnReset = document.getElementById('btnReset');
var cmbMethod = document.getElementById('cmbMethod');
var btnAddCalculated = document.getElementById('btnAddCalculated');
var btnClearCalculated = document.getElementById('btnClearCalculated');

var data = [];
var calculatedCount = 0;
var colors = ['black', 'red', 'green', 'blue', 'orange', 'purple'];

var getPeriod = function () {
    return parseInt(document.getElementById('txtPeriod').value, 10);
};

var clearGroup = function (grp) {
    while (grp.lastChild) {
        grp.removeChild(grp.lastChild);
    }
};

var showPoints = function (grp, pts, markerSize, color) {
    var i, point;
    for (i = 0; i < pts.length; i++) {
        point = pts[i];
        var marker = document.createElementNS(svgns, 'circle');
        marker.setAttributeNS(null, 'cx', point.x);
        marker.setAttributeNS(null, 'cy', point.y);
        marker.setAttributeNS(null, 'r', markerSize);
        marker.setAttributeNS(null, 'fill', color);
        grp.appendChild(marker);
    }
};

// Create and display test data
var showRawData = function () {
    var i, x, y;
    var r = 0;
    data = [];
    for (i = 1; i < 500; i++) {
        x = i;
        r += 15.0 * (Math.random() * Math.random() - 0.25);
        y = 150 + 30 * Math.sin(x / 200) * Math.sin((x - 37) / 61) + 2 * Math.sin((x - 7) / 11) + r;
        data.push({ x: x, y: y });
    }
    showPoints(grpRawData, data, 1, '#888');
};

// Gaussian kernel smoother
var createGaussianKernelData = function () {
    var i, x, y;
    var r = 0;
    var result = [];
    var period = getPeriod();
    for (i = Math.floor(period / 2) ; i < data.length; i += period) {
        x = data[i].x;
        y = gaussianKernel(i);
        result.push({ x: x, y: y });
    }
    return result;
};

var gaussianKernel = function (index) {
    var halfRange = Math.floor(getPeriod() / 2);
    var distance, factor;
    var totalValue = 0;
    var totalFactor = 0;
    for (i = index - halfRange; i <= index + halfRange; i++) {
        if (0 <= i && i < data.length) {
            distance = Math.abs(i - index);
            factor = Math.exp(-Math.pow(distance, 2));
            totalFactor += factor;
            totalValue += data[i].y * factor;
        }
    }
    return totalValue / totalFactor;
};

// Ramer-Douglas-Peucker algorithm
var ramerDouglasPeuckerRecursive = function (pts, first, last, eps) {
    if (first >= last - 1) {
        return [pts[first]];
    }

    var slope = (pts[last].y - pts[first].y) / (pts[last].x - pts[first].x);

    var x0 = pts[first].x;
    var y0 = pts[first].y;

    var iMax = first;
    var max = -1;
    var p, dy;

    // Calculate vertical distance
    for (var i = first + 1; i < last; i++) {
        p = pts[i];
        y = y0 + slope * (p.x - x0);
        dy = Math.abs(p.y - y);

        if (dy > max) {
            max = dy;
            iMax = i;
        }
    }

    if (max < eps) {
        return [pts[first]];
    }

    var p1 = ramerDouglasPeuckerRecursive(pts, first, iMax, eps);
    var p2 = ramerDouglasPeuckerRecursive(pts, iMax, last, eps);

    return p1.concat(p2);
}

var internalRamerDouglasPeucker = function (pts, eps) {
    var p = ramerDouglasPeuckerRecursive(data, 0, pts.length - 1, eps);
    return p.concat([pts[pts.length - 1]]);
}

var createRamerDouglasPeuckerData = function () {
    var finalPointCount = Math.round(data.length / getPeriod());
    var epsilon = getPeriod();
    var pts = internalRamerDouglasPeucker(data, epsilon);
    var iteration = 0;
    // Iterate until the correct number of points is obtained
    while (pts.length != finalPointCount && iteration++ < 20) {
        epsilon *= Math.sqrt(pts.length / finalPointCount);
        pts = internalRamerDouglasPeucker(data, epsilon);
    }
    return pts;
};

// Event handlers
btnReset.addEventListener('click', function () {
    calculatedCount = 0;
    clearGroup(grpRawData);
    clearGroup(grpCalculatedData);
    showRawData();
});

btnClearCalculated.addEventListener('click', function () {
    calculatedCount = 0;
    clearGroup(grpCalculatedData);
});

btnAddCalculated.addEventListener('click', function () {
    switch (cmbMethod.value) {
        case "Gaussian":
            showPoints(grpCalculatedData, createGaussianKernelData(), 2, colors[calculatedCount++]);
            break;
        case "RDP":
            showPoints(grpCalculatedData, createRamerDouglasPeuckerData(), 2, colors[calculatedCount++]);
            return;
    }
});

showRawData();
div
{
    margin-bottom: 6px;
}
<div>
    <button id="btnReset">Reset</button>&nbsp;
    <select id="cmbMethod">
        <option value="RDP">Ramer-Douglas-Peucker</option>
        <option value="Gaussian">Gaussian kernel</option>
    </select>&nbsp;
    <label for="txtPeriod">Interval: </label>
    <input id="txtPeriod" type="text" style="width: 36px;" value="7" />
</div>
<div>
    <button id="btnAddCalculated">Add calculated points</button>
    <button id="btnClearCalculated">Clear calculated points</button>
</div>
<svg id="svg1" width="765" height="450" viewBox="0 0 510 300">
    <g id="graph1" transform="translate(0,300) scale(1,-1)">
        <rect width="500" height="300" stroke="black" fill="#eee"></rect>
        <g id="grpRawData"></g>
        <g id="grpCalculatedData"></g>
    </g>
</svg>