我有一个CSV文件,其中包含标题行和数据之前的注释文本,我想将其作为哈希进行进一步操作。主键具有散列将是两个数据值的组合。我怎么样?
示例CSV
#
#
#
#
Description information of source of file.
index,label,bit,desc,mnemonic
6,370,11,three,THRE
9,240,23,four,FOR
11,120,n/a,five,FIV
示例所需哈希
( '37011' => { 'index' => '6', 'label' => '370', 'bit' => '11', 'desc' => 'three', 'mnemonic' => 'THRE'}, '24023' => {'index' => '9', 'label' => '240', 'bit' => '23', 'desc' => 'four', 'mnemonic' => 'FOR'}, '120n/a' => {'index' => '11', 'label' => '120', 'bit' => 'n/a', 'desc' => 'five', 'mnemonic' => 'FIV'} )
答案 0 :(得分:10)
您需要Text::CSV模块:
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Text::CSV;
my $filename = 'test.csv';
# watch out the encoding!
open(my $fh, '<:utf8', $filename)
or die "Can't open $filename: $!";
# skip to the header
my $header = '';
while (<$fh>) {
if (/^index,/x) {
$header = $_;
last;
}
}
my $csv = Text::CSV->new
or die "Text::CSV error: " . Text::CSV->error_diag;
# define column names
$csv->parse($header);
$csv->column_names([$csv->fields]);
# parse the rest
while (my $row = $csv->getline_hr($fh)) {
my $pkey = $row->{label} . $row->{bit};
print Dumper { $pkey => $row };
}
$csv->eof or $csv->error_diag;
close $fh;
答案 1 :(得分:3)
您可以随时执行以下操作:
#!/usr/bin/env perl
use strict;
use warnings;
my %hash;
while( <DATA> ){ last if /index/ } # Consume the header
my $labels = $_; # Save the last line for hash keys
chop $labels;
while(<DATA>){
chop;
my @a = split ',';
my $idx = 0;
my %h = map { $_ => $a[$idx++]} split( ",", $labels );
$hash{ $a[1] . $a[2] } = \%h;
}
while( my ( $K, $H ) = each %hash ){
print "$K :: ";
while( my( $k, $v ) = each( %$H ) ) {
print $k . "=>" . $v . " ";
}
print "\n";
}
__DATA__
#
#
#
#
Description information of source of file.
index,label,bit,desc,mnemonic
6,370,11,three,THRE
9,240,23,four,FOR
11,120,n/a,five,FIV
答案 2 :(得分:2)
Text::CSV::Simple自2005年以来一直存在......
来自文档:
# Map the fields to a hash
my $parser = Text::CSV::Simple->new;
$parser->field_map(qw/id name null town/);
my @data = $parser->read_file($datafile);
...简单!
答案 3 :(得分:1)
sub parse_csv {
my ($f, $s, %op) = @_; # file, sub, options
my $d = $op{delim}?$op{delim}:"\t"; # delimiter, can be a regex
open IN, $f;
$_=<IN>; chomp;
my @h=map {s/"//g; lc} split /$d/; # header assumed, could be an option
$h[0]="id" if $h[0] eq ""; # r compatible
while(<IN>) {
chomp;
my @d=split /$d/;
map {s/^"//; s/"$//;} @d; # any file with junk in it should fail anyway
push @h, "id" if (@h == (@d - 1)); # r compat
my %d=map {$h[$_]=>$d[$_]} (0..$#d);
&{$s}(\%d);
}
}
parse_csv("file.txt", sub {
die Dumper $_[0];
})
请注意,$。和$ _等内容仍在子
中工作