它的范围是两个条形,bar1(hs1)有350个部分,bar2(hs2)有700个部分。 f1c f2c和f3c下的值都适用于任一条上这些部分的一定数量。对于适合同一部分的值,我想将它们列在彼此旁边。
hs f1c range1 range2
hs1 p32 0 200
hs1 p31 200 300
hs1 p30 300 350
hs2 p32 0 300
hs2 p31 300 500
hs2 p30 500 700
f2c hs range1 range2
DDX11L1 hs1 20 50
FAM41C hs1 50 70
WASH7P hs1 70 120
FAM138A hs1 180 250
OR4F5 hs2 0 50
KLHL17 hs2 50 100
PLEKHN1 hs2 100 150
LOC729737 hs2 300 500
HES4 hs2 500 600
ISG15 hs2 600 700
hs range1 range2 f3c
hs1 0 200 -1
hs1 200 350 -2
hs2 0 500 -1
hs2 500 700 -2
hs f1c f2c range1 range2 f3c
hs1 p32 n 0 20 -1 // From the 1st line of file3, and the 1st line of file1
hs1 p32 DDX11L1 20 50 -1 // From the 1st line of file1, 1st line of file2 and 1st line of file3
hs1 p32 FAM41C 50 70 -1 // From the 1st line of file1, 2nd line of file2 and 1st line of file3
hs1 p32 WASH7P 70 120 -1 // 1st line file1, 3rd line file2, first line file3
hs1 p32 n 120 180 -1 // 1st line file1, 1st line file3
hs1 p32 FAM13BA 180 200 -1 // 1st line file1, 4th line file2, 1st line file3
hs1 p31 FAM13BA 200 250. -2 // 2nd line file1, 4th line file2, 2nd line file3
hs1 p31 n 250. 300 -2 // 2nd line file1, 2nd line file3
hs1 p30 n 300 350 -2 // 3rd line file1, 2nd line file3
hs2 p32 OR4FS 0 50 -1
hs2 p32 KLHL17 50 100 -1
hs2 p32 PLEKHN1 100 150 -1
hs2 p32 n 150 300 -1
hs2 p31 LOC729737 300 500 -1
hs2 p30 HES4 500 600 -2
hs2 p30 ISG15 600 700 -2
答案 0 :(得分:2)
use strict;
use warnings 'all';
use autodie;
use Readonly::Tiny 'Readonly';
Readonly my @FILES => qw/ file1.txt file2.txt file3.txt /;
Readonly my $FORMAT => "%-6s%-6s%-10s%-5d%-5d%d\n";
Readonly my @OUTPUT => qw/ hs f1c f2c range1 range2 f3c /;
Readonly my @KEY_COLUMNS => qw/ hs range1 range2 /;
my %data; # All the data for each value of `hs`
my %bounds; # All the values of `range1` or `range2` for each value of `hs`
my %heads; # All the headers found in any of the files
# From each file, read the header line and use the
# headers as keys for the data hashes representing each line
for my $file ( @FILES ) {
open my $fh, '<', $file; # Errors handled by `autodie`
my @head = split ' ', <$fh>;
@heads{@head} = ();
while ( <$fh> ) {
next unless /\S/;
my %row;
@row{@head} = split;
my ($hs, $r1, $r2) = @row{ @KEY_COLUMNS };
push @{ $data{$hs} }, \%row;
++$bounds{$hs}{$_} for $r1, $r2;
# Change the `%bounds` hash values from
# hashes to sorted arrays of the boundary values
for ( values %bounds ) {
my @vals = sort {
my ($aa, $bb) = map { tr/0-9//cdr } $a, $b;
$aa <=> $bb;
} keys %$_;
$_ = \@vals;
# Work through the `%bounds` hash
# printing a line of output for each range
for my $hs ( sort keys %bounds ) {
my $bounds = $bounds{$hs};
my $data = $data{$hs};
for my $i ( 1 .. $#$bounds ) {
my ($r1, $r2) = map { $bounds->[$_] } $i-1, $i;
my @matches = grep {
$r1 >= $_->{range1} and $r2 <= $_->{range2}
} @$data;
my %row;
for my $match ( @matches ) {
@row{ keys %$match } = values %$match;
@row{ @KEY_COLUMNS } = ($hs, $r1, $r2); # Overwrite in the new key values
printf $FORMAT, map { $_ // 'n' } @row{ @OUTPUT };
hs1 p32 n 0 20 -1
hs1 p32 DDX11L1 20 50 -1
hs1 p32 FAM41C 50 70 -1
hs1 p32 WASH7P 70 120 -1
hs1 p32 n 120 180 -1
hs1 p32 FAM138A 180 200 -1
hs1 p31 FAM138A 200 250 -2
hs1 p31 n 250 300 -2
hs1 p30 n 300 350 -2
hs2 p32 OR4F5 0 50 -1
hs2 p32 KLHL17 50 100 -1
hs2 p32 PLEKHN1 100 150 -1
hs2 p32 n 150 300 -1
hs2 p31 LOC729737 300 500 -1
hs2 p30 HES4 500 600 -2
hs2 p30 ISG15 600 700 -2