我有一个perl脚本,它给出了multifasta(.txt格式)输入文件中存在的所有序列的总和结果。请帮助我修改代码,使其分别为每个序列提供结果。
输入文件seq:
>seq1
GAA
>seq2
ALL
预期产出:
seq1
sum=1
abs=21
seq2
sum=1
abs=21
实际脚本:
#!/usr/bin/perl -w
print "Please type the filename of the DNA sequence MMta: ";
$str= <STDIN>;
chomp $str;
unless ( open(DNAFILE, $str) )
{
print "Cannot open file \"$str\"\n\n";
exit;
}
@str = <DNAFILE>;
$str=join('',@str);
print"\n DNA:\n
$str\n";
$GA = 0;
$AL = 0;
$MM = 0;
$DE = 0;
$DV = 0;
$VD = 0;
$DW = 0;
$QD = 0;
$SD = 0;
$DD = 0;
$ED = 0;
$DY = 0;
$VE = 0;
$EN = 0;
$II = 0;
$KE = 0;
$NV = 0;
$VP = 0;
$FV = 0;
$SS = 0;
$WK = 0;
$KK = 0;
$abs=0;
while ($str =~ /GA/ig)
{$GA++}
while ($str =~ /AL/ig)
{$AL++}
while ($str =~ /MM/ig)
{$MM++}
while ($str =~ /DE/ig)
{$DE++}
while ($str =~ /DV/ig)
{$DV++}
while ($str =~ /VD/ig)
{$VD++}
while ($str =~ /DW/ig)
{$DW++}
while ($str =~ /QD/ig)
{$QD++}
while ($str =~ /SD/ig)
{$SD++}
while ($str =~ /DD/ig)
{$DD++}
while ($str =~ /ED/ig)
{$ED++}
while ($str =~ /DY/ig)
{$DY++}
while ($str =~ /VE/ig)
{$VE++}
while ($str =~ /EN/ig)
{$EN++}
while ($str =~ /II/ig)
{$II++}
while ($str =~ /KE/ig)
{$KE++}
while ($str =~ /NV/ig)
{$NV++}
while ($str =~ /VP/ig)
{$VP++}
while ($str =~ /FV/ig)
{$FV++}
while ($str =~ /SS/ig)
{$SS++}
while ($str =~ /WK/ig)
{$WK++}
while ($str =~ /KK/ig)
{$KK++}
$total= "$GA+$AL+$MM+$DE+$DV+$VD+$DW+$QD+$SD+$DD+$ED+$DY+$VE+$EN+$II+$KE+$NV+$VP+$FV+$SS+$WK+$KK";
while ($total=~ /0/ig)
{$abs++}
$sum= $GA+$AL+$MM+$DE+$DV+$VD+$DW+$QD+$SD+$DD+$ED+$DY+$VE+$EN+$II+$KE+$NV+$VP+$FV+$SS+$WK+$KK;
print "GA = $GA\n";
print "AL = $AL\n";
print "WK = $MM\n";
print "DE = $DE\n";
print "VP = $DV\n";
print "VD = $VD\n";
print "DW = $DW\n";
print "QD = $QD\n";
print "SD = $SD\n";
print "DD = $DD\n";
print "ED = $ED\n";
print "DY = $DY\n";
print "VE = $VE\n";
print "EN = $EN\n";
print "II = $II\n";
print "KE = $KE\n";
print "NV = $NV\n";
print "VP = $VP\n";
print "FV = $FV\n";
print "SS = $SS\n";
print "WK = $WK\n";
print "KK = $KK\n";
print "sum=$sum\n";
print "abs=$abs";
$outputfile = "countbase.txt";
unless ( open(COUNTBASE, ">$outputfile") ) {
print "Cannot open file \"$outputfile\" to write
to!!\n\n";
exit;
}
print COUNTBASE "$sum
$abs";
close(COUNTBASE);
exit;
答案 0 :(得分:0)
这是一种完成工作的方法:
#!/usr/bin/perl
# ALLWAYS put these two lines at the beginig of each script
use strict;
use warnings;
use List::Util qw(sum);
print "Please type the filename of the DNA sequence MMta: ";
my $file = <STDIN>;
chomp $file;
# Use three arg open and test for errors
open my $fh_in, '<', $file or die "Unable to open '$file': $!";
my %count;
my $seq;
# loop on every lines of input file
while(my $line = <$fh_in>) {
chomp $line;
# Find the sequence name
$seq = $1 if $line =~ />(\w+)/;
# Count
while($line =~ /(GA|AL|MM|DE|DV|VD|DW|QD|SD|DD|ED|DY|VE|EN|II|KE|NV|VP|FV|SS|WK|KK)/ig) {
$count{$seq}{$1}++;
}
}
open my $fh_out, '>', 'countbase.txt' or die "Unable to open 'countbase.txt': $!";
# Display the result separately for each sequence
foreach(keys %count) {
my $sum = sum(values %{$count{$_}});
my $abs = 22 - scalar keys %{$count{$_}};
print $fh_out $_,"\nsum = ",$sum,"\nabs = ", $abs,"\n";
}