尝试安装perl包myconfig的问题

时间:2017-06-01 06:03:51

标签: perl install installation-package

我正在尝试运行一个脚本,并收到一条错误信息,表示我错过了Myconfig "无法在/ paths / to / packages"中找到MyConfig.pm。 我想安装它,但无法找到我从哪里下载它... 我使用的计算机没有互联网连接,所以我不能使用cpanm或apt-get,我打算下载文件并使用make等安装它。

    #!/usr/bin/perl -w

use strict;
use warnings;

use File::Basename;
use Getopt::Long;

use MyConfig;
use Bed;
use ScanStatistics;

use Data::Dumper;

my $prog = basename ($0);

my $verbose = 0;
my $big = 0;
my $minBlockSize = 2000000;
my $separateStrand = 0;

my $valleySeeking = 0;  #0 or 1
my $valleyDepth = 0.9;  #0.9 as default

my $geneBedFile = "";
my $useExpr = 0;
my $pvalueThreshold = 0.01;
my $multiTestCorrection = 0;

my $minPH = 2;
my $maxPH = 5000;
my $maxGap = -1; #no merge by default, default 25 recommended if merge

my $outBoundaryBedFile = "";
my $outHalfPHBedFile = "";

my $prefix = "Peak";

my $cache = getDefaultCache ($prog);
my $keepCache = 0;

my @ARGV0 = @ARGV;

GetOptions (
    'big'=>\$big,
    'p:f'=>\$pvalueThreshold,
    'ss'=>\$separateStrand,
    'valley-seeking'=>\$valleySeeking,
    'valley-depth:f'=>\$valleyDepth,
    'gene:s'=>\$geneBedFile,
    'use-expr'=>\$useExpr,
    'multi-test'=>\$multiTestCorrection,
    'minPH:i'=>\$minPH,
    'maxPH:i'=>\$maxPH,
    'gap:i'=>\$maxGap,
    'out-boundary:s'=>\$outBoundaryBedFile,
    'out-half-PH:s'=>\$outHalfPHBedFile,
    'prefix:s'=>\$prefix,
    'c|cache:s'=>\$cache,
    'keep-cache'=>\$keepCache,
    'v'=>\$verbose);


if (@ARGV != 2)
{
    print "detecting peaks from CLIP data\n";
    print "Usage: $prog [options] <tag.bed> <peak.bed>\n";
    print " <tag.bed> : BED file of unique CLIP tags, input\n";
    print " <peak.bed>: BED file of called peaks, output\n";
    print "Options:\n";
    #print " -e          : expression values indicated in the gene bed file\n";
    print " -big                   : big input file\n";
    print " -ss                    : separate the two strands\n";
    print " --valley-seeking       : find candidate peaks by valley seeking\n";
    print " --valley-depth [float] : depth of valley if valley seeking ($valleyDepth)\n";
    print " --out-boundary [string]: output cluster boundaries\n";
    print " --out-half-PH  [string]: output half peak height boundaries\n";
    print " --gene         [string]: gene bed file for scan statistics\n";
    print " --use-expr             : use expression levels given in the score column in the gene bed file for normalization\n";
    print " -p             [float] : threshold of p-value to call peak ($pvalueThreshold)\n";    
    print " --multi-test           : do Bonferroni multiple test correction\n";
    print " -minPH         [int]   : min peak height ($minPH)\n";
    print " -maxPH         [int]   : max peak height ($maxPH)\n";
    print " -gap           [int]   : merge cluster peaks closer than the gap ($maxGap, no merge if < 0)\n";
    print " --prefix       [string]: prefix of peak id ($prefix)\n";
    print " -c             [dir]   : cache dir\n";
    print " --keep-cache           : keep cache when the job done\n";
    print " -v                     : verbose\n";
    exit (0);
}

print "CMD=$prog ", join(" ", @ARGV0), "\n" if $verbose;

my ($tagBedFile, $outBedFile) = @ARGV;

#check parameters

if ($geneBedFile ne '')
{
    Carp::croak "$geneBedFile does not exist\n" unless -f $geneBedFile;
}

if ($valleySeeking)
{
    Carp::croak "valley-depth must be >= 0.9\n" unless $valleyDepth >= 0.9;
}
else
{
    Carp::croak "must specify --gene or --valley-seeking\n" unless $geneBedFile ne '';
    Carp::croak "gene Bed file $geneBedFile does not exist\n" unless -f $geneBedFile;
    warn "peak merging is strongly recommended (e.g. --gap 25) without valley seeking\n" if $maxGap < 0;
}


if ($outBoundaryBedFile ne '' || $outHalfPHBedFile ne '')
{
    Carp::croak "must enable --valley-seeking\n" unless $valleySeeking;
}

if ($minPH < 2)
{
    warn "minPH must be >=2\n";
    $minPH = 2;
}

my $cmdDir = dirname ($0);

my $bigFlag = $big ? "-big" : "";
my $verboseFlag = $verbose ? "-v" : "";
my $ssFlag = $separateStrand ? "-ss" : "";

Carp::croak "$cache already exists\n" if -d $cache || -f $cache;
system ("mkdir $cache");


##
print "generate exact tag coverage profile ...\n" if $verbose;

my $tagExactCountBedFile = "$cache/tag.count.exact.bed";
my $tmpPeakBedFile = "$cache/tmp.peak.bed";

my $cmd = "perl $cmdDir/tag2profile.pl $verboseFlag $bigFlag -exact $ssFlag -of bed $tagBedFile $tagExactCountBedFile";
my $ret = system ($cmd);
Carp::croak "CMD=$cmd failed:$?\n" unless $ret == 0;


print "extract candidate peaks ...\n" if $verbose;

if ($valleySeeking)
{
    my %tagCount;
    my $cachePeak = "$cache/cache_peak";
    system ("mkdir $cachePeak");
    if ($big)
    {
        my $ret = splitBedFileByChrom ($tagExactCountBedFile, $cachePeak, v=>$verbose, "sort"=>1);
        %tagCount = %$ret;
    } 
    else
    {
        my $tags = readBedFile ($tagExactCountBedFile, $verbose);
        foreach my $t (@$tags)
        {
            my $chrom = $t->{"chrom"};
            push @{$tagCount{$chrom}}, $t;
        }
    }

    print "get positions with non-zero tag count broken down into chromosomes ...\n" if $verbose;

    foreach my $chrom (sort keys %tagCount)
    {

        my $n = $tagCount{$chrom};
        $n = ref($n) eq 'HASH' ? $n = $n->{'n'} : @$n;
        print "$chrom : $n tags\n" if $verbose;
    }

    my @strand = ('b');
    @strand = qw(+ -) if $separateStrand;

    my $fout;
    open ($fout, ">$tmpPeakBedFile") || Carp::croak "cannot open file $tmpPeakBedFile to write\n";
    foreach my $s (@strand)
    {
        print "#processing strand $s ...\n" if $verbose;
        my @chroms = sort keys %tagCount;

        foreach my $chrom (@chroms)
        {
            print "processing chrom $chrom ...\n" if $verbose;
            if ($big)
            {
                my $tmpFile = $tagCount{$chrom}->{'f'};
                my $fin;
                open ($fin, "<$tmpFile") || Carp::croak "cannot open file $tmpFile to read\n";

                my $iter = 0;
                while (my $block = readNextBedBlock ($fin, 1, 0, minBlockSize=> $minBlockSize))
                {
                    my $n = @$block;
                    print "block $iter: $n positions\n" if $verbose;
                    $iter++;
                    my $peaks = extractPeaks ($block, $s, $valleyDepth);
                    foreach my $p (@$peaks)
                    {
                        $p->{'name'} .= "#" . join ("#", $p->{'leftBoundary'}, $p->{'rightBoundary'}+1, $p->{'leftHalfPH'}, $p->{'rightHalfPH'}+1);
                        print $fout bedToLine ($p), "\n" if $p->{'score'} >= $minPH;
                    }
                }
                close ($fin);
            }
            else
            {
                my $tags = $tagCount{$chrom};
                my $n = @$tags;
                print "$n tags loaded on chromosome $chrom\n" if $verbose;
                my $peaks = extractPeaks ($tags, $s, $valleyDepth);

                foreach my $p (@$peaks)
                {
                    $p->{'name'} .= "#" . join ("#", $p->{'leftBoundary'}, $p->{'rightBoundary'}+1, $p->{'leftHalfPH'}, $p->{'rightHalfPH'}+1);
                    print $fout bedToLine ($p), "\n" if $p->{'score'} >= $minPH;
                }
            }
        }
    }
    close ($fout);
}
else
{
    my $cmd = "awk '{if (\$5>=$minPH) {print \$0}}' $tagExactCountBedFile > $tmpPeakBedFile";
    system ($cmd);
}


$cmd = "wc -l $tmpPeakBedFile";
my $n = `$cmd`;
$n=~/^(\d+)/;
$n = $1;


if ($n == 0)
{
    Carp::croak "no peaks found\n";
    system ("rm -rf $cache") unless $keepCache;
    exit (0);
}

print "$n candidate peaks with PH>=$minPH detected\n" if $verbose;

#assign candidate peaks to genes and assess statistical significance

if (-f $geneBedFile)
{
    print "estimating average tag size ...\n" if $verbose;
    my $tagSize =  `awk 'BEGIN{s=0;n=0;} {s=s+\$3-\$2; n=n+1} END {print s/n}' $tagBedFile`;
    chomp $tagSize;
    print "average tag size = $tagSize\n" if $verbose;

    print "get exons in genes ... \n" if $verbose;
    my $ts2geneFile = "$cache/ts2gene.txt";
    my $cleanGeneBedFile = "$cache/gene.clean.bed";

    my $cmd = "awk '{print \$4\"\\t\"\$4}' $geneBedFile | sort | uniq > $ts2geneFile";
    system ($cmd);

    $cmd = "perl $cmdDir/combineTranscripts.pl $verboseFlag  $geneBedFile $ts2geneFile $cleanGeneBedFile";
    system ($cmd);

    my $exonBedFile = "$cache/exon.bed";
    $cmd = "perl $cmdDir/gene2ExonIntron.pl $verboseFlag -nid -oe $exonBedFile $cleanGeneBedFile";
    system ($cmd);


    print "count tag number for each exon/gene ...\n" if $verbose;
    my $exonTagCountBedFile = "$cache/tag.count.exon.bed";

    $cmd = "perl $cmdDir/tag2profile.pl $bigFlag $verboseFlag -region $exonBedFile $ssFlag -of bed $tagBedFile $exonTagCountBedFile";
    print $cmd, "\n";
    system ($cmd);

    my $fin;
    my $fout;

    my %geneTagCountHash;

    open ($fin, "<$exonTagCountBedFile") || Carp::croak "cannot open file $exonTagCountBedFile\n";
    while (my $line = <$fin>)
    {
        chomp $line;
        my $r = lineToBed ($line);

        my $geneId = $r->{"name"};
        $geneTagCountHash{$geneId}->{'size'} += ($r->{'chromEnd'} - $r->{'chromStart'} + 1);
        $geneTagCountHash{$geneId}->{'count'} += $r->{'score'};
    }
    close ($fin);

    my $geneTagCountTotal = 0;
    map {$geneTagCountTotal += $geneTagCountHash{$_}->{'count'}} keys %geneTagCountHash;

    Carp::croak "no tags??" if $geneTagCountTotal <= 0;

    print "total number of tags overlapping with specified reions: $geneTagCountTotal\n" if $verbose;

    my $geneExpressionTotal = 0;
    if ($useExpr)
    {
        print "reading gene expression levels from $geneBedFile ...\n" if $verbose;
        open ($fin, "<$geneBedFile") || Carp::croak "cannot open file $geneBedFile to read\n";
        while (my $line = <$fin>)
        {
            chomp $line;
            next if $line =~/^\s*$/;
            next if $line =~/^track/;
            next if $line =~/^#/;

            my @cols = split (/\s+/, $line);
            Carp::croak "no score column in $geneBedFile ...\n" if @cols < 5;

            my $geneId = $cols[3];
            my $expr = $cols[4];

            Carp::croak "expression value for gene $geneId is negative\n" if $expr < 0;
            $geneTagCountHash{$geneId}->{'expr'} = $expr if exists $geneTagCountHash{$geneId};
        }
        close ($fin);

        map {$geneExpressionTotal += $geneTagCountHash{$_}->{'expr'}} keys %geneTagCountHash;
        Carp::croak "all genes have no expression??\n" if $geneExpressionTotal <= 0;

        print "total gene expression level: $geneExpressionTotal\n" if $verbose;
    }

    print "calculating expected peak height for each gene ...\n" if $verbose;


#my $geneTagCountFile = "$cache/tag.count.gene.txt";
#open ($fout, ">$geneTagCountFile") || Carp::croak "cannot open file $geneTagCountFile to write\n";

#if ($useExpr)
#{
#   print $fout "#", join ("\t", "gene id", "size", "tag number", "tag density", "expression", "expexpted PH"), "\n";
#}
#else
#{
#   print $fout "#", join ("\t", "gene id", "size", "tag number", "tag density", "expexpted PH"), "\n";
#}

    foreach my $geneId (sort keys %geneTagCountHash)
    {
        my $g = $geneTagCountHash{$geneId};
        $g->{'density'} = $g->{'count'} / $g->{'size'};
        $g->{'PH0'} = $useExpr ? ($geneTagCountTotal * $g->{'expr'} / $geneExpressionTotal / $g->{'size'} * $tagSize) : ($g->{'density'} * $tagSize);

#   if ($useExpr)
#   {
#       print $fout join ("\t", $geneId, $g->{'size'}, $g->{'count'}, $g->{'density'}, $g->{'expr'}, $g->{'PH0'}), "\n";
#   }
#   else
#   {
#       print $fout join ("\t", $geneId, $g->{'size'}, $g->{'count'}, $g->{'density'}, $g->{'PH0'}), "\n";
#   }

    }
#close ($fout);

#my $effectiveGeneNum = `awk '{if(\$3>0) {print \$0}}' $geneTagCountFile | wc -l`;
#$effectiveGeneNum =~/\s*(\d+)\s*/;
#$effectiveGeneNum = $1;

    my $effectiveGeneNum = 0;

    if ($useExpr)
    {
        map {$effectiveGeneNum++ if $geneTagCountHash{$_}->{'expr'} > 0} keys %geneTagCountHash;
    }
    else
    {
        map {$effectiveGeneNum++ if $geneTagCountHash{$_}->{'count'} > 0} keys %geneTagCountHash;
    }

    print "effective gene number: $effectiveGeneNum\n" if $verbose;


    #this part can be improved
    #e.g. 1. if two genes overlap with each other resulting in ambiguity, exonic region should have higher priority
    #     2. peaks in the extended 3' UTR region

    print "match peaks with genes ...\n" if $verbose;
    my $peak2geneBedFile = "$cache/peak2gene.bed";
    $cmd = "perl $cmdDir/tagoverlap.pl $bigFlag $verboseFlag -d \"@@\" --keep-score -region $cleanGeneBedFile $ssFlag $tmpPeakBedFile $peak2geneBedFile";
    system ($cmd);


    print "calculate pvalues ...\n" if $verbose;

    $tmpPeakBedFile = "$cache/tmp.peak.sig.bed";
    open ($fin, "<$peak2geneBedFile") || Carp::croak "cannot open file $peak2geneBedFile to read\n";
    open ($fout, ">$tmpPeakBedFile") || Carp::croak "cannot open file $tmpPeakBedFile to write\n";

    my %resultHash;
    my $i = 0;

    while (my $line = <$fin>)
    {
        chomp $line;

        print "$i ...\n" if $verbose && $i % 5000 == 0;
        $i++;

        my $peak = lineToBed ($line);
        my $name = $peak->{'name'};
        my $peakHeight = $peak->{'score'};

        my ($peakId, $geneId) = split ("@@", $name);
        my ($leftBoundary, $rightBoundary, $leftHalfPH, $rightHalfPH);

        ($peakId, $leftBoundary, $rightBoundary, $leftHalfPH, $rightHalfPH) = split ("#", $peakId) if $valleySeeking;

        next unless exists $geneTagCountHash{$geneId};

        my $expectedPeakHeight = $geneTagCountHash{$geneId}->{'PH0'}; #expected PH
        next unless $expectedPeakHeight > 0; #otherwise no tag on the exonic region of the gene
        next unless $peakHeight > $expectedPeakHeight;#otherwise it cannot be significant

        my $geneSize = $geneTagCountHash{$geneId}->{'size'};

        #print "$i: $peakHeight, $expectedPeakHeight\n";
        if ($peakHeight > $maxPH)
        {
            print "$i: peak height out of range: ", bedToLine ($peak), "\n" if $verbose;
            next;
        }

        my $pvalue = exists $resultHash{$geneId}{$peakHeight} ? 
            $resultHash{$geneId}{$peakHeight} : calcScanStatistic ($peakHeight, $expectedPeakHeight, $geneSize / $tagSize);

        $resultHash{$geneId}{$peakHeight} = $pvalue unless exists $resultHash{$geneId}{$peakHeight};

        $pvalue *= $effectiveGeneNum if $multiTestCorrection;
        $pvalue = 1e-100 if $pvalue == 0;

        $peak->{'name'} = $peakId . "[gene=$geneId][PH=$peakHeight][PH0=" . 
            sprintf ("%.2f", $expectedPeakHeight) . "][P=" .sprintf ("%.2e", $pvalue) . "]";

        $peak->{'name'} .= "#" . join("#", $leftBoundary, $rightBoundary, $leftHalfPH, $rightHalfPH) if $valleySeeking;
        $peak->{'score'} = $peakHeight; #-log ($pvalue) / log(10);

        print $fout bedToLine ($peak), "\n" if $pvalue <= $pvalueThreshold;
    }

    close ($fin);
    close ($fout);
}


if ($maxGap >=0)
{
    print "merge peaks ...\n" if $verbose;
    my $tmpPeakMergedBedFile = "$cache/tmp.peak.sig.merge.bed";
    $cmd = "perl $cmdDir/bedUniq.pl $ssFlag -c maxScore -maxgap $maxGap $verboseFlag $tmpPeakBedFile $tmpPeakMergedBedFile";
    system ($cmd);
    $tmpPeakBedFile = $tmpPeakMergedBedFile;
}

print "generating output...\n" if $verbose;
#output, some formatting issues
my $fin;
my $fout;
my $foutPeak;
my $foutBoundary;
my $foutHalfPH;

open ($fin, "<$tmpPeakBedFile") || Carp::croak "cannot open file $tmpPeakBedFile to read\n";
open ($fout, ">$outBedFile") || Carp::croak "cannot open file $outBedFile to write\n";

if ($outBoundaryBedFile ne '')
{
    open ($foutBoundary, ">$outBoundaryBedFile") || Carp::croak "cannot open file $outBoundaryBedFile to write\n";
}

if ($outHalfPHBedFile ne '')
{
    open ($foutHalfPH, ">$outHalfPHBedFile") || Carp::croak "cannot open file $outHalfPHBedFile to write\n";
}

my $iter = 0;
while (my $line = <$fin>)
{
    chomp $line;
    next if $line=~/^\s*$/;

    $iter++;
    my $p = lineToBed ($line);
    my $peakId = $p->{'name'};
    my ($leftBoundary, $rightBoundary, $leftHalfPH, $rightHalfPH);
    ($peakId, $leftBoundary, $rightBoundary, $leftHalfPH, $rightHalfPH) = split ("#", $peakId) if $valleySeeking;

    $p->{'name'} = sprintf("%s_%d", $prefix, $iter);
    if ($geneBedFile ne '')
    {
        $peakId =~/(\[\S*)$/;
        my $sig = $1;
        $p->{'name'} .= $sig;
    }
    print $fout bedToLine ($p), "\n";

    my %pCopy = %$p;
    if ($outBoundaryBedFile ne '')
    {
        $pCopy{'chromStart'} = $leftBoundary;
        $pCopy{'chromEnd'} = $rightBoundary - 1;
        print $foutBoundary bedToLine (\%pCopy), "\n";
    }

    if ($outHalfPHBedFile ne '')
    {
        $pCopy{'chromStart'} = $leftHalfPH;
        $pCopy{'chromEnd'} = $rightHalfPH - 1;
        print $foutHalfPH bedToLine (\%pCopy), "\n";
    }
}

close ($fin);
close ($fout);
close ($foutBoundary) if $outBoundaryBedFile ne '';
close ($foutHalfPH) if $outHalfPHBedFile ne '';

system ("rm -rf $cache") unless $keepCache;



sub extractPeaks
{
    my ($tags, $strand, $valleyDepth) = @_;

    my @tags;
    #extract the tags on the specified strand
    foreach my $t (@$tags)
    {
        $t->{"strand"} = '+' unless exists $t->{"strand"};
        next if $strand ne 'b' && $strand ne $t->{"strand"};
        push @tags, $t;
    }

    #sort the tags
    @tags = sort {$a->{'chromStart'} <=> $b->{'chromStart'}} @tags; 

    my @peaks;

    my @currBlock;
    my $currEnd = -1;
    foreach my $t (@tags)
    {
        if ($t->{'chromStart'} > $currEnd + 1)
        {
            if (@currBlock > 0)
            {
                my $ret = extractPeaksBlock (\@currBlock, $valleyDepth);
                push @peaks, @$ret;

                #start a new block
                @currBlock = ();
            }
        }
        push @currBlock, $t;
        $currEnd = $t->{'chromEnd'};
    }

    #the last block
    my $ret = extractPeaksBlock (\@currBlock, $valleyDepth);
    push @peaks, @$ret;

    return \@peaks;
}


#an upstream bounding local maixma is an upstream local maxima whose PH is greater than the current local maxima
#simiarly, a downstream bounding local maxima is a downstream local maxima whose PH is greater than the current local maxima

#a peak is a local maxima satisfying
#a local minima with PH< valley height can be found between the upstream bounding and the current local maxima
#a local minima with PH< valley height can be found between the downstream bounding and the current local maxima


sub extractPeaksBlock
{
    my ($tags, $valleyDepth) = @_;

    if (@$tags == 1)
    {
        my $t = $tags->[0];
        $t->{'leftHalfPH'} = $t->{'chromStart'};
        $t->{'rightHalfPH'} = $t->{'chromEnd'};
        $t->{'leftBoundary'} = $t->{'chromStart'};
        $t->{'rightBoundary'} = $t->{'chromEnd'};

        return $tags;
    }
    my $valleyHeight = 1 - $valleyDepth;


    #Carp::croak Dumper ($tags), "\n";

    #label local maxima and minima
    #assuming the tags are already sorted here
    my @maxima;
    my @minima;
    for (my $i = 0; $i < @$tags; $i++)
    {
        $tags->[$i]->{'idx'} = $i;
        if ($i == 0)
        {
            if ($tags->[$i]->{'score'} > $tags->[$i+1]->{'score'})
            {
                push @maxima, $i;
                $tags->[$i]->{'maxima'} = $#maxima;
            }
            elsif ($tags->[$i]->{'score'} < $tags->[$i+1]->{'score'})
            {
                push @minima, $i;
                $tags->[$i]->{'minima'} = $#minima;
            }
        }
        elsif ($i > 0 && $i < @$tags - 1)
        {
            if ($tags->[$i]->{'score'} > $tags->[$i+1]->{'score'} && $tags->[$i]->{'score'} >= $tags->[$i-1]->{'score'})
            {
                push @maxima, $i;
                $tags->[$i]->{'maxima'} = $#maxima;
            }
            elsif ($tags->[$i]->{'score'} < $tags->[$i+1]->{'score'} && $tags->[$i]->{'score'} <= $tags->[$i-1]->{'score'})
            {
                push @minima, $i;
                $tags->[$i]->{'minima'} = $#minima;
            }
        }
        else
        {
            if ($tags->[$i]->{'score'} >= $tags->[$i-1]->{'score'})
            {
                push @maxima, $i;
                $tags->[$i]->{'maxima'} = $#maxima;
            }
            elsif ($tags->[$i]->{'score'} <= $tags->[$i-1]->{'score'})
            {
                push @minima, $i;
                $tags->[$i]->{'minima'} = $#minima;
            }
        }
    }
    #Carp::croak Dumper ($tags), "\n";

    my @peaks;
    #find the closest neighbor maxima which has bigger peak height
    for (my $i = 0; $i < @maxima; $i++)
    {
        #this is naive implementation, could improve speed further
        my $t1 = $tags->[$maxima[$i]];
        for (my $j = $i - 1; $j>= 0; $j--)
        {
            my $t2 = $tags->[$maxima[$j]];

            #to break ties
            #if both PH and size are equal, we go further on the right, but not on the left
            if ($t1->{'score'} < $t2->{'score'} || ($t1->{'score'} == $t2->{'score'} && $t1->{'chromEnd'} - $t1->{'chromStart'} < $t2->{'chromEnd'} - $t2->{'chromStart'}))
            {
                $t1->{'prev'} = $maxima[$j];
                last;
            }
        }

        for (my $j = $i + 1; $j < @maxima; $j++)
        {
            my $t2 = $tags->[$maxima[$j]];
            #if both PH and size are equal, we go further on the right, but not on the left
            if ($t1->{'score'} < $t2->{'score'} || ($t1->{'score'} == $t2->{'score'} && $t1->{'chromEnd'} - $t1->{'chromStart'} <= $t2->{'chromEnd'} - $t2->{'chromStart'}))
            {
                $t1->{'next'} = $maxima[$j];
                last;
            }
        }
    }
    #Carp::croak Dumper ($tags), "\n";

    #find if a valley exists surrounding each local maxima
    for (my $i = 0; $i < @maxima; $i++)
    {
        my $p1 = $tags->[$maxima[$i]];

        my $leftValleyFound = 0;
        my $rightValleyFound = 0;

        if (exists $p1->{'prev'})
        {
            my $prev = $p1->{'prev'};
            for (my $j = $maxima[$i] - 1; $j >= $prev+1; $j--)
            {
                my $v = $tags->[$j];
                next unless exists $v->{'minima'};
                if ($v->{'score'} / $p1->{'score'} <= $valleyHeight)
                {
                    $leftValleyFound = 1;
                    last;
                }
            }
        }
        else
        {
            $leftValleyFound = 1;
        }

        if (exists $p1->{'next'})
        {
            my $next = $p1->{'next'};
            for (my $j = $maxima[$i] + 1; $j < $next; $j++)
            {
                my $v = $tags->[$j];
                next unless exists $v->{'minima'};
                if ($v->{'score'} / $p1->{'score'} <= $valleyHeight)
                {
                    $rightValleyFound = 1;
                    last;
                }
            }
        }
        else
        {
            $rightValleyFound = 1;
        }

        push @peaks, $p1 if $leftValleyFound && $rightValleyFound;
    }

    for (my $i = 0; $i < @peaks; $i++)
    {
        my $p1 = $peaks[$i];
        #search peak boundary and half PH boundary
        my $leftBoundaryTag = $p1;
        my $leftHalfPHTag = 0;

        my $left = exists $p1->{'prev'} ? $p1->{'prev'} + 1 : 0;
        if ($i > 0)
        {
            $left = $peaks[$i-1]->{'idx'} + 1 if $left < $peaks[$i-1]->{'idx'} + 1;
        }
        for (my $j = $p1->{'idx'} - 1; $j >= $left; $j--)
        {
            my $v = $tags->[$j];
            if ($v->{'score'} / $p1->{'score'} <= 0.5)
            {
                #the first time the coverage reduced to 1/2PH
                $leftHalfPHTag = $v unless $leftHalfPHTag;
            }

            if ($leftBoundaryTag->{'score'} > $v->{'score'})
            {
                $leftBoundaryTag = $v;
            }
        }
        $leftHalfPHTag = $leftBoundaryTag unless $leftHalfPHTag;

        my $rightBoundaryTag = $p1;
        my $rightHalfPHTag = 0;
        my $right = exists $p1->{'next'} ? $p1->{'next'} - 1 : $#$tags;
        if ($i < @peaks - 1)
        {
            $right = $peaks[$i+1]->{'idx'} - 1 if $right > $peaks[$i+1]->{'idx'} - 1;
        }

        for (my $j = $p1->{'idx'} + 1; $j <= $right; $j++)
        {
            my $v = $tags->[$j];
            if ($v->{'score'} / $p1->{'score'} <= 0.5)
            {
                $rightHalfPHTag = $v unless $rightHalfPHTag;
            }

            if ($rightBoundaryTag->{'score'} > $v->{'score'})
            {
                $rightBoundaryTag = $v;
            }
        }
        $rightHalfPHTag = $rightBoundaryTag unless $rightHalfPHTag;
        $p1->{'leftHalfPH'} = $leftHalfPHTag->{'idx'} < $p1->{'idx'} ? ($leftHalfPHTag->{'chromEnd'} + 1) : $leftHalfPHTag->{'chromStart'};
        $p1->{'rightHalfPH'} = $rightHalfPHTag->{'idx'} > $p1->{'idx'} ? ($rightHalfPHTag->{'chromStart'} - 1) : $rightHalfPHTag->{'chromEnd'};
        $p1->{'leftBoundary'} = $leftBoundaryTag->{'idx'} < $p1->{'idx'} ? $leftBoundaryTag->{'chromEnd'} + 1 : $leftBoundaryTag->{'chromStart'};
        $p1->{'rightBoundary'} = $rightBoundaryTag->{'idx'} > $p1->{'idx'} ? $rightBoundaryTag->{'chromStart'} - 1 : $rightBoundaryTag->{'chromEnd'};

    }

=debug
    if ($tags->[0]->{'chrom'} eq 'chr1' && $tags->[0]->{'strand'} eq '+' && $tags->[0]->{'chromStart'} > 66439247 && $tags->[$#$tags]->{'chromEnd'} < 66439590)
    {
        print Dumper ($tags), "\n";
        print "maxima=", join ("\t", @maxima), "\n";
        print "minima=", join ("\t", @minima), "\n";
        Carp::croak Dumper (\@peaks), "\n";
    }
=cut
    return \@peaks;
}

0 个答案:

没有答案