perl子例程引用无法获取值

时间:2014-04-25 04:04:41

标签: perl

此代码用于读取此子例程中的blast文件

parse_blast($filename, \$beginning, \$ending, \@HSPs);

似乎无法通过引用获得@HSPs值,我无法找到它无法获得价值的原因。所有子程序似乎工作我也可以直接在sub parse_one_HSP中打印值。

use strict;
use warnings;
my $filename = q/..\test\input.txt/;
my ($beginning, $ending);
my @HSPs;

parse_blast($filename, \$beginning, \$ending, \@HSPs); #Can't get the HSPs value
print $beginning;

foreach my $t(@HSPs){
    print_HSP(%{$t});  #can't print anything here
}
print $ending; 


sub print_HSP{
    my(%HSP)=@_;
    print "\n-> Expect value:   $HSP{expect}\n";
    print "\n-> Query string:   $HSP{query}\n";
    print "\n-> Query range:    $HSP{query_range}\n";
    print "\n-> Subject String: $HSP{subject}\n";
    print "\n-> Subject range:  $HSP{subject_range}\n";
}

sub parse_blast {
    my ($filename, $beginning_ref, $ending_ref, $HSPs) = @_;
    # parse the blast output into 3 sections
    my ($part1, $part2, $part3); # beginning, alignments and ending
    my $in_beginning = 0;
    my $in_alignment = 0;
    my $in_ending = 0;
    open IN, $filename || die;
    while (<IN>) {
        if (/^T?BLAST[NPX]/) {$in_beginning = 1}
        if (/^ALIGNMENTS/) {$in_beginning = 0; $in_alignment = 1; next}
        if (/^\s\sDatabase/) {$in_alignment = 0; $in_ending = 1;}
        if ($in_beginning) {$part1 .= $_;}
        if ($in_alignment) {$part2 .= $_;}
        if ($in_ending) {$part3 .= $_;}
    }
    close IN;

    $$beginning_ref = $part1;
    $$ending_ref = $part3;
    # split the alignments into an array
    my @alignments;
    split_alignments($part2, \@alignments);
    # parse each alignment
    foreach my $alignment (@alignments) {
        parse_one_alignment($alignment, \@$HSPs);
    }
}

sub split_alignments{
    my ($alignments, $aligns) = @_;
    my @alignment;
    while ($alignments =~ /^>.*\n(^(?!>).*\n)+/gm) {
        push @$aligns, $&;
    }
}

sub parse_one_alignment{
    my ($align, $HSPs) = @_;
    my ($part1, $part2) = $align =~ /(.*?)( Score =.*)/s;

    while ($part2 =~ /^ Score =.*\n(^(?! Score =).*\n)+/mg) {
        my %hsp;
        parse_one_HSP($part2, \%hsp);

        push @$HSPs, %hsp;
    }
}

sub parse_one_HSP {
    my ($data, $hsp) = @_;
    #my $hsp = shift; # reference to hash

    # parsing one HSP ...

    # declare and initialize variables
    my($expect) = '';
    my($query) = '';
    my($query_range) = '';
    my($subject) = '';
    my($subject_range) = '';

    ($expect) = ($data =~ /Expect = (\S+)/);

    $query = join ( '' , ($data =~ /^Query.*\n/gm) );

    $subject = join ( '' , ($data =~ /^Sbjct.*\n/gm) );

    $query_range = join('..', ($data =~ /(\d+).*\D(\d+)/s));

    $subject_range = join('..', ($data =~ /(\d+).*\D(\d+)/s));

    $query =~ s/[^acgt]//g;

    $subject =~ s/[^acgt]//g;

    $hsp->{expect} = $expect;
    $hsp->{query} = $query;
    $hsp->{query_range} = $query_range;
    $hsp->{subject} = $subject;
    $hsp->{subject_range} = $subject_range;
    #print_HSP(%$hsp);
}

1 个答案:

答案 0 :(得分:2)

对标量和散列使用太多引用会导致混淆哪个变量是什么。此外,您应该避免在函数参数中传递返回变量。关于Perl的一个很酷的事情是你可以从函数中返回多个变量,这与C。不同。

这是对代码的清理。我无法测试它,因为我没有任何输入数据,但它应该按照您的期望进行:

use strict;
use warnings;

my $filename = q/..\test\input.txt/;
my ($beginning, $ending, @HSPs) = parse_blast($filename); 

print $beginning;    
foreach my $t (@HSPs){
   print_HSP($t);  #can't print anything here
}
print $ending; 

sub print_HSP{
    my($HSP) = @_;
    print "\n-> Expect value:   $HSP->{expect}\n";
    print "\n-> Query string:   $HSP->{query}\n";
    print "\n-> Query range:    $HSP->{query_range}\n";
    print "\n-> Subject String: $HSP->{subject}\n";
    print "\n-> Subject range:  $HSP->{subject_range}\n";
}

sub parse_blast {
    my ($filename) = @_;
    # parse the blast output into 3 sections
    my ($beginning, $alignments, $ending) = ("","","");
    my $in_beginning = 0;
    my $in_alignment = 0;
    my $in_ending = 0;
    open(my $IN, "<", $filename) || die;
    while (<$IN>) {
        if (/^T?BLAST[NPX]/) {
            $in_beginning = 1;
        }
        if (/^ALIGNMENTS/) {
            $in_beginning = 0;
            $in_alignment = 1;
            next;
        }
        if (/^\s\sDatabase/) {
            $in_alignment = 0;
            $in_ending = 1;
        }

        $beginning  .= $_ if $in_beginning;
        $alignments .= $_ if $in_alignment;
        $ending     .= $_ if $in_ending;
    }
    close $IN;

    # split the alignments into an array
    # and parse each alignment
    my @HSPs;
    foreach my $alignment (split_alignments($alignments)) {
        push @HSPs, parse_one_alignment($alignment);
    }
    return ($beginning, $ending, @HSPs);
}

sub split_alignments{
    my ($alignments) = @_;
    my @aligns = ($alignments =~ /^>.*\n (?: ^(?!>).*\n )+/gmx);
    return @aligns;
}

sub parse_one_alignment{
    my ($align) = @_;
    my ($part2) = $align =~ /( Score =.*)/s;

    my @HSPs;
    while ($part2 =~ /^\s Score \s =.*\n (?:^(?!\s Score \s =).*\n)+/mgx) {
        push @HSPs, parse_one_HSP($part2);
    }
    return @HSPs;
}

sub parse_one_HSP {
    my ($data) = @_;

    my ($expect) = ($data =~ /Expect = (\S+)/);

    my $query = join '' , ($data =~ /^Query.*\n/gm);
    $query =~ tr/acgt//cd;

    # FIXME "Sbjct" contains a "c"
    my $subject = join '' , ($data =~ /^Sbjct.*\n/gm);
    $subject =~ tr/acgt//cd;

    my $query_range = join '..', ($data =~ /(\d+).*\D(\d+)/s);

    my $subject_range = join '..', ($data =~ /(\d+).*\D(\d+)/s);

    return {
        expect        => $expect,
        query         => $query,
        query_range   => $query_range,
        subject       => $subject,
        subject_range => $subject_range,
    };
}