将整个r脚本集成到perl中

时间:2016-07-19 10:09:34

标签: r perl

我的代码:

cds_data=read.table("D:/Analysis/Coverage_DB/UCSC/UCSC_CDS_modif_canonical.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(cds_data)=c("chr","start","end","gene","cds_no","strand")

panel_data=read.table("D:/Analysis/Coverage_DB/UCSC/NMCNL.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(panel_data)=c("chr","start","end","gene","some_no","strand")

summary_data=read.table("D:/Analysis/Coverage_DB/UCSC/AvgDepth_Summery_Mean.txt",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(summary_data)=c("chr","start","end","gene","avg_depth")

sample_data=read.table("D:/Analysis/Coverage_DB/UCSC/R715_R715_7485_50972_LIB26004_CNLV2.Coverage_AvrgDep.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(sample_data)=c("chr","start","end","gene","avg_depth")

gene="RAB27A";
#gene="BRCA1";
gene_cds_coord =cds_data[cds_data$gene == gene,]
gene_panel_coord = panel_data[panel_data$gene == gene,];
gene_summary_coord = summary_data[gsub(" ","",summary_data$gene) == gene,];
gene_sample_coord = sample_data[gsub(" ","",sample_data$gene) == gene,];

normalizeValues <- function(x){(x-min(x))/(max(x)-min(x))}

chromsomeToTranscript = function(pos_n, gene_model){
   intron_pos = gene_model[-nrow(gene_model),];
   intron_pos$start = gene_model$end[-length(gene_model$end)] + 1;
   intron_pos$end = gene_model$start[2:length(gene_model$end)] - 1;
   out=numeric();
   max_pos = max(gene_model$end);
   min_pos = min(gene_model$start);
   for (pos in pos_n) {
    if (pos >= max_pos) { pos = max_pos; }
    if (pos <= min_pos) { pos = min_pos; }
    withinExonCond   = pos >= gene_model$start & pos <= gene_model$end
    withinIntronCond = pos >= intron_pos$start & pos <= intron_pos$end

    cds_pos=c(1,cumsum(gene_model$end-gene_model$start));

        rel_pos = gene_model[withinExonCond,];
        cds_start_pos=cds_pos[rel_pos$cds_no + 1];
        offset=pos-rel_pos$start;
        transcriptPos = cds_start_pos + offset;
    }else if (any(withinIntronCond)) {
         rel_pos = intron_pos[withinIntronCond,];
        transcriptPos = cds_pos[rel_pos$cds_no + 2];

   }else {
        stop(paste("The input position could not mapped to this gene",pos));
    }
    out = append(out,transcriptPos);
   }
    return (out);
}

cds_pos=c(1,cumsum(gene_cds_coord$end-gene_cds_coord$start))

norm_cds_pos = normalizeValues(cds_pos) * 100;
gene_model = gene_cds_coord;


#plot(c(0,100),c(0,10),axes=FALSE);

plot(c(-4,100),c(1,10), axes=FALSE)
#text(-4,9.5,"CDS track",cex=0.5,font=2)
txt="CDS track";
text(-4,9.5-strheight(txt,units="figure"),txt,cex=0.5,font=2)
xleft=norm_cds_pos[-length(norm_cds_pos)];
yleft=rep(9,length(xleft)-1);
xright= norm_cds_pos[2:length(norm_cds_pos)]
yright= rep(10,length(xleft)-1);
rect(xleft,yleft,xright,yright,col="grey");
# Panel plot
xleft=chromsomeToTranscript(gene_panel_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(7,length(xleft)-1);
xright= chromsomeToTranscript(gene_panel_coord$end,gene_model)*100/max(cds_pos)
yright= rep(7.5,length(xleft)-1);
rect(xleft,yleft,xright,yright,col="red");
txt="Panel track";
text(-4,7.25-strheight(txt,units="figure"),txt,cex=0.5,font=2)

#CDS Plot
xleft=chromsomeToTranscript(gene_summary_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(4,length(xleft));
xright=chromsomeToTranscript(gene_summary_coord$end-4,gene_model)*100/max(cds_pos);
yright=normalizeValues(gene_summary_coord$avg_depth)*2 + 4;
rect(xleft,yleft,xright,yright,col="blue");
txt="Avg. depth track";
text(-4,5-strheight(txt,units="figure"),txt,cex=0.5,font=2)

# Sample Coverage plot
xleft=chromsomeToTranscript(gene_sample_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(1,length(xleft));
xright=chromsomeToTranscript(gene_sample_coord$end-4,gene_model)*100/max(cds_pos);
yright=normalizeValues(gene_sample_coord$avg_depth)*2 + 1;
rect(xleft,yleft,xright,yright,col="green");
txt="Sample depth";
text(-4,2-strheight(txt,units="figure"),txt,cex=0.5,font=2)

我想将相同的脚本放入PERL脚本的末尾,以便我可以使用输出文件(perl)作为R脚本的输入并重新使用R中的Perl ARGV []。

我所知道的只是

use Statistics::R;
my $R = Statistics::R->new();

每个R行的语法在放入Perl时会发生变化,我不知道该怎么做。

下面是我的Perl脚本,后跟R

perl脚本

my $sample_name = $ARGV[0];
my $gene_name = $ARGV[1];
my $bedpath = "/NGS_STORE/ARCHIVE1/Cov_InputBed_Repository";

my $fn = glob "$bedpath/*$sample_name*.bed";


print "$fn\n";

open (SB,"$fn") or die "Cant open $sample_name\n";

open (SBO,">$sample_name\_$gene_name\_Depth.bed") or die "Cant open \n";

while (my $l = <SB>)
{
        chomp($l);
        @d = split("\t",$l);

        if($d[3] eq $gene_name)
        {
        print SBO "$l\n";
        }
}

r script

sample_data=read.table("$sample_name\_$gene_name\_Depth.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)    
colnames(sample_data)=c("chr","start","end","gene","avg_depth")

gene= $gene_name

gene_cds_coord =cds_data[cds_data$gene == gene,]

可以看到

open (SBO,">$sample_name\_$gene_name\_Depth.bed") or die "Cant open \n";

如上所述在R中打开,与$gene_name

相同

0 个答案:

没有答案