我有一个包含4列的csv:type,state,year,value
有多个行共享相同的类型和状态。我想过滤csv所以我只为每个类型/状态组合获得一行,其中该行具有最近的一年。 我想知道如何以编程方式执行此操作,因为我的数据文件最终会更长,更长。如果可能的话,我想用javascript做这件事。
这是我的数据:
type,state,year,value
oranges,florida,1990,18.17
oranges,florida,1980,14.52
oranges,florida,2000,11.1
oranges,florida,2010,9.8
lemons,florida,1990,8.2
lemons,florida,1980,6.2
lemons,florida,1985,4.1
lemons,florida,1987,5.87
lemons,florida,2002,5.87
lemons,florida,2003,460
lemons,florida,2009,730
lemons,florida,2010,1100
lemons,florida,2011,1200
lemons,florida,2012,1200
lemons,florida,2013,55.5
lemons,florida,2014,55.1
lemons,florida,2015,53.1
limes,florida,1991,49.9
limes,florida,1992,45.6
limes,florida,1993,41.8
limes,florida,2002,3.23
limes,florida,2003,3.23
limes,florida,2009,10.767
limes,florida,2011,34.34492
oranges,california,1964,49.9
oranges,california,1965,0.293
oranges,california,1970,0.293
oranges,california,1990,16
oranges,california,1980,16
oranges,california,1985,29.8
oranges,california,1987,6.3
oranges,california,2002,6.31
oranges,california,2003,6.27
oranges,california,2008,6.24
oranges,california,2009,6.38
oranges,california,2010,10.3
oranges,california,2011,10.3
oranges,california,2012,10.4
oranges,california,2013,8.5
oranges,california,2014,12.6
oranges,california,2015,75.1
lemons,california,1950,74.8
lemons,california,1955,74.5
lemons,california,1960,74.2
lemons,california,1965,72.9
lemons,california,1970,72.7
limes,california,1990,72.4
limes,california,1991,72.1
limes,california,1992,102.56187
limes,california,1993,102.25079
limes,california,1994,96.70884
limes,california,1995,88
我希望我的结果是:
type,state,year,value
lemons,california,1970,72.7
limes,california,1995,88
oranges,california,2015,75.1
lemons,florida,2015,53.1
limes,florida,2011,34.34492
oranges,florida,2010,9.8
答案 0 :(得分:0)
[编辑]你可以直接循环从csv翻译的数组:
代码示例:
var items = [];
items['florida_oranges'] = 1990;
//data taken from array list
var newYear = 1994;
var state = 'florida';
var fruit = 'oranges';
//checking
var index = state + "_" + fruit;
if(items[index] && items[index] < newYear) items[index] = newYear;
答案 1 :(得分:0)
以下是如何在nodejs中完成的示例。我保持这个例子小而简单...
您需要运行的内容:
安装了fast-csv模块(npm install 快速CSV)
假设输入文件(data.csv)
//代码段
var fs = require('fs');
var csv = require('fast-csv');
var stats = [];
csv
.fromPath("data.csv")
.on("data", function(data){
addOrUpdate(data);
})
.on("end", function(){
console.log(stats);
writeStats(stats);
});
function addOrUpdate(item) {
var found = false;
for ( var i=0; i<stats.length; i++ ) {
if ( stats[i][0] === item[0] && stats[i][1] === item[1] ) {
found = true;
if (stats[i][3] < item[3]) {
stats[i][2] = item[2];
stats[i][3] = item[3];
}
break;
}
}
if ( false == found) {
stats.push(item);
}
}
function writeStats(stats) {
var csvStream =
csv
.createWriteStream({headers: false}),
writableStream = fs.createWriteStream("stats.csv");
csvStream.pipe(writableStream);
for ( i=0; i<stats.length; i++ ) {
csvStream.write(stats[i]);
}
csvStream.end();
}
答案 2 :(得分:0)
您可以使用此PHP代码执行此操作:
<?php
$csvData = array_map('str_getcsv', file("input.csv")); // put csv file in an array
$csvHeader = array_shift($csvData); // chop off first element (header) and save in array
$testArray = array(); // create array
// iterate over rows in csvData
for ($i = 0; $i < count($csvData); $i++) {
// if state and type are NOT in array or, year selected row is greater than the one in array
if (!in_array($csvData[$i][0] . '_' . $csvData[$i][1], $testArray) || $testArray[$csvData[$i][0] . '_' . $csvData[$i][1]]['year'] > $csvData[$i][2]) {
// build multidimensional array
$testArray[$csvData[$i][0] . '_' . $csvData[$i][1]] = array(
'type' => $csvData[$i][0],
'state' => $csvData[$i][1],
'year' => $csvData[$i][2],
'value' => $csvData[$i][3],
);
}
}
array_unshift($testArray, $csvHeader); // put header back
$fp = fopen('output.csv', 'w'); // open file
// fill csv file
foreach($testArray as $fields) {
fputcsv($fp, $fields);
}
fclose($fp); // close file
?>