删除具有毫秒不同采样频率的连续重复项-Python

时间:2019-04-30 16:03:24

标签: python pandas dataframe timestamp

数据框如下所示:

0, 3710.968017578125, 2012-01-07T03:13:43.859Z
1, 3710.968017578125, 2012-01-07T03:13:48.890Z
2, 3712.472900390625, 2012-01-07T03:13:53.906Z
3, 3712.472900390625, 2012-01-07T03:13:58.921Z
4, 3713.110107421875, 2012-01-07T03:14:03.900Z
5, 3713.110107421875, 2012-01-07T03:14:03.937Z
6, 3713.89892578125, 2012-01-07T03:14:13.900Z
7, 3713.89892578125, 2012-01-07T03:14:13.968Z
8, 3713.89892578125, 2012-01-07T03:14:19.000Z
9, 3714.64990234375, 2012-01-07T03:14:24.000Z
10, 3714.64990234375, 2012-01-07T03:14:24.015Z
11, 3714.64990234375, 2012-01-07T03:14:29.000Z
12, 3714.64990234375, 2012-01-07T03:14:29.031Z

在某些行中,有些行具有不同的毫秒级时间戳,我想删除它们,只保留具有不同第二个时间戳的行。有些行具有相同的值(毫秒和秒),而不同的行(如第9到12行)因此,我不能使用a.loc[a.shift() != a]

所需的输出将是:

0, 3710.968017578125, 2012-01-07T03:13:43.859Z
1, 3710.968017578125, 2012-01-07T03:13:48.890Z
2, 3712.472900390625, 2012-01-07T03:13:53.906Z
3, 3712.472900390625, 2012-01-07T03:13:58.921Z
4, 3713.110107421875, 2012-01-07T03:14:03.900Z
6, 3713.89892578125, 2012-01-07T03:14:13.900Z
8, 3713.89892578125, 2012-01-07T03:14:19.000Z
9, 3714.64990234375, 2012-01-07T03:14:24.000Z
11, 3714.64990234375, 2012-01-07T03:14:29.000Z

2 个答案:

答案 0 :(得分:1)

尝试:

$.ajax({
    type: "POST",
    url: 'wsQueryBuilder.asmx/Getdataset1',
    data: '{"strQuery": "' + strQuery + '","strCon": "' + strCon + '"}',
    contentType: "application/json; charset=utf-8",
    dataType: "json",
    async: false,

    success: function(response) {
        $("#spnGetdet").html(response.d); 
        var combo = eval(response.d);
        var trHTML = "";
        var rptHdrstr;
        var counter = 0;
        var col = [];
        var colno = [];
        var resultColumns = [];
        var trhead = "";
        var trfoot = "";
        var alignColumns = [];
        var totalColumns = [];
        var warpColumns = [];
        var strPrnTotal = "";
        groupColumn = 0;
        aggrgColumn = 0;
        groupColumnName = "";
        aggrgColumnName = "";
        aggrgType = 0;

        $('#tblrptviewhdr thead').find('tr').remove().end()
        $('#tblrptviewhdr tbody').find('tr').remove().end()
        $('#tblrptviewhdr tfoot').find('tr').remove().end()

        var obj;
        var alignobj;
        var totobj;
        obj = {
            sWidth: "20px",
            data: 'id',
            sDefaultContent: '',
        };
        resultColumns.push(obj);
        var cp = 0;
        var selectCtrl = document.getElementById("cboRptGroupBy");
        for (var key in combo[0]) {
            if (col.indexOf(key) === -1) {
                cp = cp + 1;
                col.push(key);
                colno.push(col.indexOf(key) + 1);
                trhead += ('<th>' + key + '</th>')
                trfoot += ('<th></th>')
                if (typeof combo[0][key] === "number") {
                    alignobj = cp;
                    totobj = 1;
                    alignColumns.push(alignobj);
                } else {
                    totobj = 0;
                }
                obj = { "mDataProp": key };
                resultColumns.push(obj);
                totalColumns.push(totobj);
                warpColumns.push(cp);
                if (selectCtrl.options[selectCtrl.selectedIndex].text == key) {
                    groupColumn = cp;
                    groupColumnName = selectCtrl.options[selectCtrl.selectedIndex].text;
                }

                if (document.getElementById("cboRptAggrgBy").value == key) {
                    aggrgColumn = cp;
                    aggrgColumnName = document.getElementById("cboRptAggrgBy").value;
                }
            }

            if (alignColumns.length > 0) {
                if ($('#chkResponsive').is(':checked')) {
                    var theadervalue = '<tr><th>S.No</th>' + trhead + '</tr>';
                    var tfootervalue = '<tr><th>Total </br></th>' + trfoot + '</tr>';
                    $("#trfooter").css("display", "");
                } else {
                    var theadervalue = '<tr><th>S.No</th>' + trhead + '</tr>';
                    var tfootervalue = '<tr><th>Total </br></th>' + trfoot + '</tr>';
                    $("#trfooter").css("display", "");
                }
            } else {
                var theadervalue = '<tr><th>S.No</th>' + trhead + '</tr>';
                var tfootervalue = '<tr><th></th>' + trfoot + '</tr>';
                $("#trfooter").css("display", "none");
            }
            $('#trheader').append(theadervalue)
            $('#trfooter').append(tfootervalue)
            $("#tblrptviewhdr").css("display", "");         

            var QbDTdata = {
                data: combo,
                columns: resultColumns,
                "bDestroy": true,
                "bFilter": true,
                "oTableTools": true,
                "bLengthChange": true,
                "bInfo": true,
                "oLanguage": true,
                "paging": true,
                "colReorder": true,
                "bFooter": true,

                "responsive": true,
                "oLanguage": { "oPaginate": { "sPrevious": "<", "sNext": ">" } },
                "pageLength": true,
                //"iDisplayLength": tblPageLen,
                "aLengthMenu": [
                    [10, 25, 50, 100, -1],
                    [10, 25, 50, 100, "All"]
                ],
                dom: 'Bfrtip',

                stateSaveParams: function(settings, data) {
                    for (var i = 0, ien = data.columns.length; i < ien; i++) {
                        delete data.columns[i].visible;
                    }
                },
                "stateLoadParams": function(settings, data) {
                    data.order = "";
                },
                "fnRowCallback": function(nRow, aData, iDisplayIndex, iDisplayIndexFull) {
                    var index = iDisplayIndexFull + 1;
                    $('td:first', nRow).html(index);
                    console.log(nRow)
                    return nRow;
                },

                "columnDefs": [
                    { className: "dt-body-right", "targets": alignColumns },
                    { className: "text-nowrap", "targets": warpColumns },
                    { "searchable": false, "orderable": true, "targets": [0] }
                ],
                "aDataSort": [groupColumn, 'asc'],
                orderCellsTop: true,
                initComplete: function() {
                    var info = this.api().page.info();
                    console.log('Total records', info.recordsTotal);
                    document.getElementById("lblTotalRecords").innerHTML = "Total Record(s) : " + info.recordsTotal;

                    if ($('#chkAddFilter').is(':checked')) {
                        if (groupColumn == 0) {
                            configFilter(this, warpColumns);
                        }
                    }
                },

                buttons: [{
                        extend: 'print',
                        text: '<i class="fa fa-print" style="color: #313131;"></i>&nbsp; <u>P</u>rint',
                        css: 'btn btn-primary button-input',
                        key: {
                            key: 'p',
                            altkey: true
                        },
                        message: '',
                        titleAttr: 'print',
                        title: '',
                        header: true,
                        footer: true,
                        autoPrint: true,
                        exportOptions: {
                            columns: ':visible',
                            grouped_array_index: groupColumnName, 
                            aggrg_array_index: aggrgColumnName, 
                            grouped_index: groupColumn,
                            aggrg_index: aggrgColumn,
                            aggrg_type: document.getElementById("cboRptAggrgType").value,
                            fileexp_type: 1
                        }
                    },
                    {
                        extend: 'pdfHtml5',
                        text: '<i class="fa fa-file-pdf-o" style="color: #ff2116;"></i>&nbsp; P<u>D</u>F',
                        titleAttr: 'PDF',
                        //download: 'open',
                        filename: frmQryView.hRptHdr.value,
                        key: {
                            key: 'd',
                            altkey: true
                        },                       
                        header: true,
                        footer: true,
                        orientation: PrnOrientation,
                        pageSize: PrnPaperSize,
                        exportOptions: {
                            columns: ':visible', //PdfExpClm,
                            stripHtml: true,
                            stripNewlines: false,
                            grouped_array_index: groupColumnName, 
                            aggrg_array_index: aggrgColumnName,
                            grouped_index: groupColumn,
                            aggrg_index: aggrgColumn,
                            aggrg_type: document.getElementById("cboRptAggrgType").value,
                            fileexp_type: 2
                        }                      
                    }
                ],
                "bStateSave": true,
                "fixedHeader": tblFixHdr
            };
        }
    },
    failure: function(msg) {
        alert("No records to display ");
    }
});

我希望这是不言自明的。

答案 1 :(得分:0)

您可以使用以下脚本。我没有得到您的数据框列名称,因此我在列['x','date_time']下方发明了

df = pd.DataFrame([
(3710.968017578125, pd.to_datetime('2012-01-07T03:13:43.859Z')),
(3710.968017578125, pd.to_datetime('2012-01-07T03:13:48.890Z')),
(3712.472900390625, pd.to_datetime('2012-01-07T03:13:53.906Z')),
(3712.472900390625, pd.to_datetime('2012-01-07T03:13:58.921Z')),
(3713.110107421875, pd.to_datetime('2012-01-07T03:14:03.900Z')),
(3713.110107421875, pd.to_datetime('2012-01-07T03:14:03.937Z')),
(3713.89892578125, pd.to_datetime('2012-01-07T03:14:13.900Z')),
(3713.89892578125, pd.to_datetime('2012-01-07T03:14:13.968Z')),
(3713.89892578125, pd.to_datetime('2012-01-07T03:14:19.000Z')),
(3714.64990234375, pd.to_datetime('2012-01-07T03:14:24.000Z')),
(3714.64990234375, pd.to_datetime('2012-01-07T03:14:24.015Z')),
(3714.64990234375, pd.to_datetime('2012-01-07T03:14:29.000Z')),
(3714.64990234375, pd.to_datetime('2012-01-07T03:14:29.031Z'))], 
    columns=['x', 'date_time'])
  • 创建一列“ time_diff”以获取 当前行和下一行的日期时间
  • 只能得到那些差异 无或超过1秒
  • 放置临时列time_diff
df['time_diff'] = df.groupby('x')['date_time'].diff()
df = df[(df['time_diff'].isnull()) | (df['time_diff'].map(lambda x: x.seconds > 1))]
df = df.drop(['time_diff'], axis=1)
df