
时间:2014-09-26 11:07:05

标签: perl csv





Id  Index Fragment
11    A        abc  
12    B        pqr 
13    D        asd


Id  Index Fragment    
12    E       pol
15    G       pqr   
17    H       trw 


Fragment Id_file1 File_1 Id_file_2 File_2
abc         11        1               0
pqr         12        1     15        1     
asd         13        1               0     
pol                   0     12        1
trw                   0     17        1

use warnings;
use feature qw(say);
use autodie;
use Text::CSV_XS;

use constant {
    FILE_1  => "1.csv",
    FILE_2  => "2.csv",
    FILE_3  => "3.csv",

my %hash;
# Load the Hash with value from File #1
open my $file1_fh, "<", FILE_1;
while ( my $value = <$file1_fh> ) {
    chomp $value;
close $file1_fh;
# Add File #2 to the Hash
open my $file2_fh, "<", FILE_2;
while ( my $value = <$file2_fh> ) {
    chomp $value;
    $hash{$value} += 10;   # if the key already exists, the value will now be 11
                           # if it did not exist, the value will be 10
close $file2_fh;

open my $file3_fh, "<", FILE_3;
while ( my $value = <$file3_fh> ) {
    chomp $value;
    $hash{$value} += 100;
close $file3_fh;

for my $k ( sort keys %hash ) 
{   if ($hash{$k} == 1) { # only in file 1
        say "$k\t0\t0\t1";
    elsif ($hash{$k} == 10) { # only in file 2
        say "$k\t0\t1\t0";
    elsif ($hash{$k} == 100) { # only in file 2
        say "$k\t1\t0\t0";
    else { # in both file 1 and file 2
        say "$k\t1\t1\t1";

open (OUT, ">final.csv") or die "Cannot open OUT for writing \n";
$, = " \n";
print OUT "fragment\t1\t2\t3 \n";
print OUT (sort keys %hash);
close OUT;

2 个答案:

答案 0 :(得分:1)



use strict;
use warnings;

# put our files in an array
my @files = ('1.csv', '2.csv', '3.csv');

my %hash;
# Load the Hash with value from File #1
# since we're doing the same parsing to each file,
# let's save ourselves some typing and run the same code
# on each file
for my $f (@files) {
    open my $fh, "<", $f or die "Could not open $f: $!";
    while (my $val = <$fh>) {
        # skip the first line
        next if $. == 1;
        chomp $val;
        # split the line by the tabs
        my ($id, $ix, $frag) = split(/\t/, $val);
        # store the data in a hash of hashes of hashes
        # keys are the fragment, then the file name
        # I've stored the index and the id, but obviously
        # you can alter this if you have files of a different format
        # and/or want to save different data.
        $hash{$frag}{$f} = { ix => $ix, id => $id };


# get the ID of the fragment $x in 2.csv
say $hash{$x}{"2.csv"}{id};

# check if fragment $y exists in 3.csv, and print the index if so
if ( $hash{$y}{"3.csv"} ) {
   say $hash{$y}{"3.csv"}{ix};


#set up the output file
my $out;
open ($out, ">final.csv") or die "Cannot open final.csv for writing \n";
# print out a header row
# map applies the code within the brackets to every element of @files,
# so in this case, we're printing out "ID_<array element> \t <array element >"
# for every file in our list
# the join joins together items following it using the string "\t" 
print { $out } join("\t", "Fragment", map { "ID_$_\t$_" } @files) . "\n";

# now, output our data
# $frag is the fragment
for my $frag ( sort keys %hash ) {
    print { $out } "$frag\t";
#   check which files it appears in
    foreach (@files) {
        # if it exists in that file, print out the ID and '1'
        if ( $hash{$frag}{$_} ) {
            print { $out } $hash{$frag}{$_}{id} . "\t1\t";
        else {
            # print nothing in the ID column, and 0 in the file column
            print { $out } "\t0\t";
    print $out "\n";
close $out;

答案 1 :(得分:0)


  1. 将文件解析为一个数组(直接在文件范围或通过读取cmd args),因为没有选项可以复制每个文件的代码。 my @files = ( "file1", "file2", "file3");
  2. 循环遍历此列表,打开每个文件,并将其片段添加到以字符串作为键的哈希值以及指向文件发生的结构列表及其索引
  3. 所以哈希在最后看起来像这样:

    %hash = (
      "abc"  => [ {fileIdx => 0, id => 11, line => 1, ind => "A"} ] ,
      "pqr"  => [ {fileIdx => 0, id => 12, line => 2, ind => "B"}, 
                  {fileIdx => 1, id => 15, line => 2, ind => "G"}]
    1. 之后你要做的只是遍历哈希并迭代每个键列表的结构。