此代码用于读取此子例程中的blast文件
parse_blast($filename, \$beginning, \$ending, \@HSPs);
似乎无法通过引用获得@HSPs值,我无法找到它无法获得价值的原因。所有子程序似乎工作我也可以直接在sub parse_one_HSP中打印值。
use strict;
use warnings;
my $filename = q/..\test\input.txt/;
my ($beginning, $ending);
my @HSPs;
parse_blast($filename, \$beginning, \$ending, \@HSPs); #Can't get the HSPs value
print $beginning;
foreach my $t(@HSPs){
print_HSP(%{$t}); #can't print anything here
}
print $ending;
sub print_HSP{
my(%HSP)=@_;
print "\n-> Expect value: $HSP{expect}\n";
print "\n-> Query string: $HSP{query}\n";
print "\n-> Query range: $HSP{query_range}\n";
print "\n-> Subject String: $HSP{subject}\n";
print "\n-> Subject range: $HSP{subject_range}\n";
}
sub parse_blast {
my ($filename, $beginning_ref, $ending_ref, $HSPs) = @_;
# parse the blast output into 3 sections
my ($part1, $part2, $part3); # beginning, alignments and ending
my $in_beginning = 0;
my $in_alignment = 0;
my $in_ending = 0;
open IN, $filename || die;
while (<IN>) {
if (/^T?BLAST[NPX]/) {$in_beginning = 1}
if (/^ALIGNMENTS/) {$in_beginning = 0; $in_alignment = 1; next}
if (/^\s\sDatabase/) {$in_alignment = 0; $in_ending = 1;}
if ($in_beginning) {$part1 .= $_;}
if ($in_alignment) {$part2 .= $_;}
if ($in_ending) {$part3 .= $_;}
}
close IN;
$$beginning_ref = $part1;
$$ending_ref = $part3;
# split the alignments into an array
my @alignments;
split_alignments($part2, \@alignments);
# parse each alignment
foreach my $alignment (@alignments) {
parse_one_alignment($alignment, \@$HSPs);
}
}
sub split_alignments{
my ($alignments, $aligns) = @_;
my @alignment;
while ($alignments =~ /^>.*\n(^(?!>).*\n)+/gm) {
push @$aligns, $&;
}
}
sub parse_one_alignment{
my ($align, $HSPs) = @_;
my ($part1, $part2) = $align =~ /(.*?)( Score =.*)/s;
while ($part2 =~ /^ Score =.*\n(^(?! Score =).*\n)+/mg) {
my %hsp;
parse_one_HSP($part2, \%hsp);
push @$HSPs, %hsp;
}
}
sub parse_one_HSP {
my ($data, $hsp) = @_;
#my $hsp = shift; # reference to hash
# parsing one HSP ...
# declare and initialize variables
my($expect) = '';
my($query) = '';
my($query_range) = '';
my($subject) = '';
my($subject_range) = '';
($expect) = ($data =~ /Expect = (\S+)/);
$query = join ( '' , ($data =~ /^Query.*\n/gm) );
$subject = join ( '' , ($data =~ /^Sbjct.*\n/gm) );
$query_range = join('..', ($data =~ /(\d+).*\D(\d+)/s));
$subject_range = join('..', ($data =~ /(\d+).*\D(\d+)/s));
$query =~ s/[^acgt]//g;
$subject =~ s/[^acgt]//g;
$hsp->{expect} = $expect;
$hsp->{query} = $query;
$hsp->{query_range} = $query_range;
$hsp->{subject} = $subject;
$hsp->{subject_range} = $subject_range;
#print_HSP(%$hsp);
}
答案 0 :(得分:2)
对标量和散列使用太多引用会导致混淆哪个变量是什么。此外,您应该避免在函数参数中传递返回变量。关于Perl的一个很酷的事情是你可以从函数中返回多个变量,这与C。不同。
这是对代码的清理。我无法测试它,因为我没有任何输入数据,但它应该按照您的期望进行:
use strict;
use warnings;
my $filename = q/..\test\input.txt/;
my ($beginning, $ending, @HSPs) = parse_blast($filename);
print $beginning;
foreach my $t (@HSPs){
print_HSP($t); #can't print anything here
}
print $ending;
sub print_HSP{
my($HSP) = @_;
print "\n-> Expect value: $HSP->{expect}\n";
print "\n-> Query string: $HSP->{query}\n";
print "\n-> Query range: $HSP->{query_range}\n";
print "\n-> Subject String: $HSP->{subject}\n";
print "\n-> Subject range: $HSP->{subject_range}\n";
}
sub parse_blast {
my ($filename) = @_;
# parse the blast output into 3 sections
my ($beginning, $alignments, $ending) = ("","","");
my $in_beginning = 0;
my $in_alignment = 0;
my $in_ending = 0;
open(my $IN, "<", $filename) || die;
while (<$IN>) {
if (/^T?BLAST[NPX]/) {
$in_beginning = 1;
}
if (/^ALIGNMENTS/) {
$in_beginning = 0;
$in_alignment = 1;
next;
}
if (/^\s\sDatabase/) {
$in_alignment = 0;
$in_ending = 1;
}
$beginning .= $_ if $in_beginning;
$alignments .= $_ if $in_alignment;
$ending .= $_ if $in_ending;
}
close $IN;
# split the alignments into an array
# and parse each alignment
my @HSPs;
foreach my $alignment (split_alignments($alignments)) {
push @HSPs, parse_one_alignment($alignment);
}
return ($beginning, $ending, @HSPs);
}
sub split_alignments{
my ($alignments) = @_;
my @aligns = ($alignments =~ /^>.*\n (?: ^(?!>).*\n )+/gmx);
return @aligns;
}
sub parse_one_alignment{
my ($align) = @_;
my ($part2) = $align =~ /( Score =.*)/s;
my @HSPs;
while ($part2 =~ /^\s Score \s =.*\n (?:^(?!\s Score \s =).*\n)+/mgx) {
push @HSPs, parse_one_HSP($part2);
}
return @HSPs;
}
sub parse_one_HSP {
my ($data) = @_;
my ($expect) = ($data =~ /Expect = (\S+)/);
my $query = join '' , ($data =~ /^Query.*\n/gm);
$query =~ tr/acgt//cd;
# FIXME "Sbjct" contains a "c"
my $subject = join '' , ($data =~ /^Sbjct.*\n/gm);
$subject =~ tr/acgt//cd;
my $query_range = join '..', ($data =~ /(\d+).*\D(\d+)/s);
my $subject_range = join '..', ($data =~ /(\d+).*\D(\d+)/s);
return {
expect => $expect,
query => $query,
query_range => $query_range,
subject => $subject,
subject_range => $subject_range,
};
}