按列名过滤CSV,将具有唯一值(和最大年份字段)的行推送到数组

时间:2015-12-12 23:38:36

标签: javascript csv

我有一个包含4列的csv:type,state,year,value

有多个行共享相同的类型和状态。我想过滤csv所以我只为每个类型/状态组合获得一行,其中该行具有最近的一年。 我想知道如何以编程方式执行此操作,因为我的数据文件最终会更长,更长。如果可能的话,我想用javascript做这件事。

这是我的数据:

type,state,year,value
    oranges,florida,1990,18.17
    oranges,florida,1980,14.52
    oranges,florida,2000,11.1
    oranges,florida,2010,9.8
    lemons,florida,1990,8.2
    lemons,florida,1980,6.2
    lemons,florida,1985,4.1
    lemons,florida,1987,5.87
    lemons,florida,2002,5.87
    lemons,florida,2003,460
    lemons,florida,2009,730
    lemons,florida,2010,1100
    lemons,florida,2011,1200
    lemons,florida,2012,1200
    lemons,florida,2013,55.5
    lemons,florida,2014,55.1
    lemons,florida,2015,53.1
    limes,florida,1991,49.9
    limes,florida,1992,45.6
    limes,florida,1993,41.8
    limes,florida,2002,3.23
    limes,florida,2003,3.23
    limes,florida,2009,10.767
    limes,florida,2011,34.34492
    oranges,california,1964,49.9
    oranges,california,1965,0.293
    oranges,california,1970,0.293
    oranges,california,1990,16
    oranges,california,1980,16
    oranges,california,1985,29.8
    oranges,california,1987,6.3
    oranges,california,2002,6.31
    oranges,california,2003,6.27
    oranges,california,2008,6.24
    oranges,california,2009,6.38
    oranges,california,2010,10.3
    oranges,california,2011,10.3
    oranges,california,2012,10.4
    oranges,california,2013,8.5
    oranges,california,2014,12.6
    oranges,california,2015,75.1
    lemons,california,1950,74.8
    lemons,california,1955,74.5
    lemons,california,1960,74.2
    lemons,california,1965,72.9
    lemons,california,1970,72.7
    limes,california,1990,72.4
    limes,california,1991,72.1
    limes,california,1992,102.56187
    limes,california,1993,102.25079
    limes,california,1994,96.70884
    limes,california,1995,88

我希望我的结果是:

type,state,year,value
lemons,california,1970,72.7
limes,california,1995,88
oranges,california,2015,75.1
lemons,florida,2015,53.1
limes,florida,2011,34.34492
oranges,florida,2010,9.8

3 个答案:

答案 0 :(得分:0)

  1. 创建空数组
  2. 在循环中逐行获取csv
  3. 用逗号分割以获得水果州和年份
  4. 如果数组中没有水果和状态的组合添加到arrray
  5. 否则检查年份,如果更新,请使用新值替换旧数组
  6. [编辑]你可以直接循环从csv翻译的数组:

    代码示例:

        var items = [];
        items['florida_oranges'] = 1990;
    
        //data taken from array list
        var newYear = 1994;
        var state = 'florida';
        var fruit = 'oranges';
    
        //checking
        var index = state + "_" + fruit;
        if(items[index] && items[index] < newYear) items[index] = newYear;
    

答案 1 :(得分:0)

以下是如何在nodejs中完成的示例。我保持这个例子小而简单...

您需要运行的内容:

  • 使用
  • 安装的nodejs
  • 安装了fast-csv模块(npm install 快速CSV)

  • 假设输入文件(data.csv)

  • 输出文件为(stats.csv)

//代码段

var fs = require('fs');

var csv = require('fast-csv');

var stats = [];

csv
 .fromPath("data.csv")
 .on("data", function(data){
        addOrUpdate(data);
 })
 .on("end", function(){
     console.log(stats);
     writeStats(stats);
 });

function addOrUpdate(item) {
        var found = false;
        for ( var i=0; i<stats.length; i++ ) {
                if ( stats[i][0] === item[0] && stats[i][1] === item[1] ) {
                        found = true;
                        if  (stats[i][3] < item[3]) {
                                stats[i][2] = item[2];
                                stats[i][3] = item[3];
                        }
                        break;
                }
        }
        if ( false == found) {
                stats.push(item);
        }
}

function writeStats(stats) {
  var csvStream =
    csv
     .createWriteStream({headers: false}),
        writableStream = fs.createWriteStream("stats.csv");
   csvStream.pipe(writableStream);
   for ( i=0; i<stats.length; i++ ) {
      csvStream.write(stats[i]);
   }
   csvStream.end();

}

答案 2 :(得分:0)

您可以使用此PHP代码执行此操作:

<?php
    $csvData = array_map('str_getcsv', file("input.csv")); // put csv file in an array
    $csvHeader = array_shift($csvData);    // chop off first element (header) and save in array
    $testArray = array();   // create array

    // iterate over rows in csvData
    for ($i = 0; $i < count($csvData); $i++) {
        // if state and type are NOT in array or, year selected row is greater than the one in array 
        if (!in_array($csvData[$i][0] . '_' . $csvData[$i][1], $testArray) || $testArray[$csvData[$i][0] . '_' . $csvData[$i][1]]['year'] > $csvData[$i][2]) {
            // build multidimensional array
            $testArray[$csvData[$i][0] . '_' . $csvData[$i][1]] = array(
                'type' => $csvData[$i][0],
                'state' => $csvData[$i][1],
                'year' => $csvData[$i][2],
                'value' => $csvData[$i][3],
            );

        }

    }

    array_unshift($testArray, $csvHeader);  // put header back
    $fp = fopen('output.csv', 'w'); // open file

    // fill csv file
    foreach($testArray as $fields) {
        fputcsv($fp, $fields);
    }

    fclose($fp);    // close file
?>