有许多文本文件包含
答案 0 :(得分:1)
这是另一种选择。输出将写入STDOUT并按您的要求进行格式化:
#!/usr/bin/perl
use strict;
use warnings;
#Array of file names (these should be read from a separate file or constructed in situ)
#my @files = (701, 702, 703, 704);
my $nfiles = 4; #number of files in your dir change as per your need
my $finit = 701; #the index of the first file
#Temporary hash and hash reference
my %names;
my $hash_ref;
#Start printing output (here we use STDOUT)
printf STDOUT "classname:";
# loop through files
#foreach my $file_name (@files) {
for (my $i = 0; $i < $nfiles; $i++) { #new loop that uses file names generated in situ
#generate index of input file
my $file_name = $finit + $i;
#Continue print index to output
printf STDOUT " %s", $file_name;
#generate actual file name
$file_name = "$file_name" . ".txt";
#create handle
open my ($fh) , '<', $file_name or die $!;
#read line record
while (my $rec = <$fh>) {
#remove trailing end of line
chomp $rec;
#remove leading white space
$rec =~ s/^\s+//;
#remove trailing white space
$rec =~ s/\s+$//;
#skip blank lines
next unless $rec;
#split contents of record
my ($k,$p,$c,$seq_count) = split /;/, $rec;
#generate kpc_name
my $kpc_name= "$k;$p;$c;";
#store
$hash_ref->{$file_name}{$kpc_name} = $seq_count;
$names{$kpc_name}++; #this is just for storing the kpc_names seen in the files
#if you have the names already available you don't need this.
}
#close current file handle
close($fh);
}
#Add new line to output
printf STDOUT "\n";
#loop through kpc_names (sorted alphabetically)
foreach my $kpc_name (sort keys %names) {
#Begin construction of output line
my $line_out = $kpc_name;
#loop through each file name
foreach my $file_name (@files) {
#sprint the count if the file has that kpc_name, 0 otherwise
$line_out .= sprintf " %s,",
exists $hash_ref->{$file_name}{$kpc_name} ?
$hash_ref->{$file_name}{$kpc_name} : 0;
}
#Remove extra comma at end of line
chop $line_out;
#Print line to output file (here STDOUT)
print STDOUT "$line_out\n";
}
答案 1 :(得分:0)
试试这个。如果您没有从同一目录运行它,则可以将该目录作为参数传递,否则它将默认为同一目录。
它还默认打印到命令行,可以重定向到您选择的文件。
Ex. perl progname.pl > output.tsv
#!/usr/bin/perl
use strict;
use warnings;
#find files for reading and put into an array
my $dir = shift || "./"; #You can pass argument with directory, or default to same directory as program
opendir(my $dh, $dir) or die "Could not open directory: $!";
my @files;
my @classes;
while(readdir($dh)) {
if (/70\d50\d_classes.txt/){
push @files, $_;
}
}
#find out the classes
open (my $fh, "<", $dir . 'uniqueclasses.txt') or die "Could not open file: $!";
while(<$fh>){
chomp;
next unless $_;
my @line = split /;\s?/;
push @classes, [join(" ", @line[0,1]), {'class' => $line[2], 'found' => 0}];
}
close $fh;
#go through files and read each line in, only caring about class and count
for my $file(@files){
open (my $fh, "<", $dir . $file) or die "Could not open file: $!";
while(<$fh>){
chomp;
next unless $_;
my @line = split /\s/;
for my $i(0 .. $#classes){
if($classes[$i][1]->{'class'} eq $line[2]){
my $key = substr($file, 0,6);
$classes[$i][1]->{$key} = $line[3];
$classes[$i][1]->{'found'} = 1;
}
}
}
close $fh;
}
#Check through array for classes not found in any files
#put it all together for display
#columns
print "\t\t\t\t";
my @columns;
for my $i(0..$#files){
print "\t", ($columns[$i] = substr($files[$i],0,6));
}
print "\n";
#rows
for my $row (@classes) {
my %data = %{$row->[1]};
next unless $data{'found'};
print "\n", $row->[0] ,"\t", $data{'class'};
for (@columns){
if(defined($data{$_})){ #checks if field is there, if so, includes value, if not adds 0
print "\t$data{$_}";
}
else {
print "\t0";
}
}
}