我在此程序中实现两个文件时遇到问题。我正在尝试访问文件$Q
和$s
的内容。
print "Input the K value \n";
$k = <>;
chomp $k;
print "Input T\n";
$t = <>;
chomp $t;
%Qkmer = ();
$i = 1;
$query=' ';
while ($line=<IN>) {
chomp($line);
if ($line=~ m/^>/ ) {
next;
}
$query=$query.$line;
$line=~ s/(^|\n)[\n\s]*/$1/g;
while (length($line) >= $k) {
$line =~ m/(.{$k})/;
if (! defined $Qkmer{$1}) {#every key not deined as the first match
$Qkmer{$1} = $i;
}
$i++;
$line = substr($line, 1, length($line) -1);
}
}
open(MYDATA, '<', "data.txt");
while ($line=<MYDATA>) { \
chomp($line);
%Skmer = (); # This initializes the hash called Skmer.
$j = 1;
if ($line=~ m/^>/ ) { #if the line starts with >
next; #start on next line #separated characters
}
$line=~ s/^\s+|\s+$//g ; #remove all spaces from file
while (length($line) >= $k) {
$line =~ m/(.{$k})/;#match any k characters and only k characters in dna
$Skmer{$1} = $j; #set the key position to $j and increase for each new key
$j++;
$line = substr($line, 1, length($line) -1); #this removes the first character in the current string
}
###(56)###for($Skmerkey(keys %Skmer)){
$i=$Skmer{$Skmerkey};
if(defined $Qkmer($Skmerkey)){
$j=$Qkmer($Skmerkey);
}
$S1=$line;
$S2=$query;
@arrayS1= split(//, $S1);
@array2= split(//, $S2);
$l=0;
while($arrayS1[$i-$l] eq $arrayS2[$j-$l]){
$l++;
}
$start=$i-$l;
$m=0;
while ($arrayS1[$i+$k+$m] eq $arrayS2[$j+$k+$m]) {
$m++;
}
$length=$l+$k+$m;
$match= substr($S1, $start, $length);
if($length>$t){
$longest=length($match);
print "Longest: $match of length $longest \n";
}
}
}###(83)###
输入文件只包含字母串。例如:
文件1:
ahhtsagnchjgstffhjyfcsghnvzfhg
文件2:
ggujvfbgfgkjfcijjjffcvvafcsghnvzfhgvugxckugcbhfcgh
ghnvzfhgvugxckHhfgjgcfujvftjbvdtkhvddgjcdgjxdjkfrh
ajdbvciyqdanvkjghnvzfhgvugxc
从文件2中文件1中长度为$k
的单词的匹配项中,我检查文件2中的匹配项,从左侧和右侧单词进行进一步匹配。最终输出是基于$k
的文件1和文件2之间的最长匹配。现在我ge
使用此代码,我收到语法错误,我不是为什么,因为它对我来说是正确的:
syntax error at testk.pl line 56, near "$Skmerkey("
syntax error at testk.pl line 83, near "}"
谢谢。
答案 0 :(得分:0)
use strict; # <--- Allways use this
use warnings; # <--- and this
use Data::Dumper;
my $k=3;
open(my $IN, '<', "File2"); # use $IN instead of depricated IN
my $line=0; # line number
my %kmer; # hash of arrays of all $k-letter "words" line/position
my @Q; # rows of Q-file
while(<$IN>) {
chomp;
next if /^>/;
s/^\s+|\s+$//g;
next if !$_;
my $pos=0;
push @Q, $_; # store source row
for(/(?=(.{$k}))/g) { # Capture $k letters. floating window with step 1 symbol
push @{$kmer{$_}}, [$line,$pos]; # store row number and position of "word"
$pos++;
}
$line++;
}
open($IN, '<', "File1");
$line=0;
while(<$IN>) { # Read S-file
chomp;
next if /^>/;
s/^\s+|\s+$//g;
next if !$_;
my $pos=0;
my $len=length($_); # length of row of S-file
my $s=$_; # Current row of S-file
my @ignore=(); # array for store information about match tails
for(/(?=(.{$k}))/g) {
next if ! $kmer{$_}; # "word" not found try to next
for(@{$kmer{$_}}) { # $kmer{word} contains array of lines/positions in Q
my($qline, $qpos)=@{$_};
# print "test $qline:$qpos ";
if( grep {$_->[0]==$qline && $_->[1]==$qpos } @ignore ) {
# this line/position already tested and included in found matching
# print "Ignore match tail $qline:$qpos\n";
next;
}
my $j=$k; # $k letters same, test after this point
my $qlen=length($Q[$qline]);
$j++ while( $pos+$j<$len && $qpos+$j<$qlen &&
substr($s,$pos+$j,1) eq substr($Q[$qline],$qpos+$j,1) );
print "MATCH FOUND: S-file line $line pos $pos, Q-file line $qline pos $qpos: ",
substr($s,$pos,$j),"\n";
push @ignore, [$qline, $qpos, $j]; # store positions and length of match
}
} continue { # Continue block works on all loops, include after "next"
$pos++;
@ignore=grep { # recalculate/filter position and length of all match tails
++$_->[1]; # increment position
(--$_->[2]) # decrement length
>= $k # and filter out lengths < $k
} @ignore;
# print Dumper(\@ignore);
}
$line++;
}