基于字段

时间:2017-08-23 23:08:14

标签: perl hash hashmap

我想基于字段值获取散列中的最大值,首先我通过指定键和值将每行放入散列中,并且我想对散列进行排序并在文件上打印具有该行的行基于keys和val_dens字段的最大值:

我的CSV文件是这样的:

date;name;type;ref;val_dens;val_number;val_tech;rate
08.18.2017;FBT;F13;G45G6;44.19;21.11;78.21;42
08.18.2017;FBT;F13;G45G6;55.39;24.14;56.22;32
08.18.2017;FRT;B14;KY5F6;0.19;27.23;77.27;62
08.18.2017;FBT;F13;G45G6;11.91;26.16;36.21;72
08.18.2017;FBR;F12;561G6;12.51;21.13;76.2;18
08.18.2017;FBR;F12;561G6;27.65;21.11;71.7;27
08.18.2017;FBR;F12;561G6;9.15;18.21;77.1;11
08.18.2017;FRT;B14;KY5F6;0.65;24.14;75.21;52
08.18.2017;FRT;B14;KY5F6;0.14;27.11;74.12;12
08.18.2017;FBT;F13;G45G6;14.54;62.18;16.24;52
08.18.2017;FBR;F12;561G6;11.18;17.12;73.4;18
08.18.2017;FBR;F12;561G6;25.12;15.23;72.5;17
08.18.2017;FRT;B14;KY5F6;0.25;26.67;72.26;72
08.18.2017;FBT;F13;G45G6;22.39;52.14;26.25;12
08.18.2017;FBT;F13;G45G6;11.42;82.19;76.21;32

我的预期输出是:

date;name;type;ref;val_dens;val_number;val_tech;rate
08.18.2017;FBR;F12;561G6;27.65;21.11;71.7;27
08.18.2017;FBT;F13;G45G6;55.39;24.14;56.22;32
08.18.2017;FRT;B14;KY5F6;0.65;24.14;75.21;52

我的代码是:

#!C:\Perl64\bin\perl.exe
use strict;  
use warnings;
my %linearray;
my %index;
open my $info "C:\\my_file.csv" or die "Could not open input file: $!";
while( my $line = <$info>)  {  
    my @array2 = split(/;/, $line);
    @index{@array2} = (0..$#array2);
    my $key_date = $array2[$index{"date"}];  # first key
    my $key_name = $array2[$index{"name"}];  # second key
    my $key_type =  $array2[$index{"type"}];  # third key
    my $key_ref =  $array2[$index{"ref"}];    # fourth key
    my $val_dens =  $array2[$index{"val_dens"}];   #first value
    my $val_number =  $array2[$index{"val_number"}];  #second value
    my $val_tech =  $array2[$index{"val_tech"}];     #third value
    my $val_rate =  $array2[$index{"rate"}];    #fourth value

    if (not defined $key_date or not defined $key_name or not defined $key_type or not defined $key_ref ){
        next ;
        }       
    #ignore header of file
    next if ($. == 1);  
# ADD line to hash

    $linearray{$key_date.";".$key_name.";".$key_type.";".$key_ref} = $val_dens.";".$val_number.";".$val_tech.";".$val_rate

}
    open(my $fh, '>', 'C:\\output.csv') or die "Could not open file 'C:\\output.csv' $!";
    # print header
    print $fh "date;name;type;ref;val_dens;val_number;val_tech;rate"."\n";
    # sort and print the max value based on  val_dens field

foreach my $Value (sort keys %linearray) {
         print $fh "$Value;$linearray{$Value}\n";

}

由于

1 个答案:

答案 0 :(得分:1)

use Text::CSV_XS qw( );

my $qfn_in  = "C:\\my_file.csv";
my $qfn_out = "C:\\output.csv";

open(my $fh_in, '<', $qfn_in);
   or die("Can't open \"$qfn_in\": $!\n");

open(my $fh_out, '>', $qfn_out);
   or die("Can't create \"$qfn_out\": $!\n");

my $csv = Text::CSV_XS->new({
   auto_diag => 2,
   binary    => 1,
   sep_char  => ';',
});

$csv->header($fh_in);

my %data;
while (my $row = $csv->getline_hr($fh_in)) {
   my $key = join(';', @$row{qw( date name type ref )});
   $data{$key} = $row
      if !exists($data{$key}
      || $data{$key}{val_dens} < $row->{val_dens};
}

my @output_cols = qw( date name type ref val_dens val_number val_tech rate );
$csv->say($fh_out, \@output_cols);

for my $row (values(%data)) {
   $csv->say($fh_out, [ @$row{@output_cols} ]);
}