我的代码:
cds_data=read.table("D:/Analysis/Coverage_DB/UCSC/UCSC_CDS_modif_canonical.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(cds_data)=c("chr","start","end","gene","cds_no","strand")
panel_data=read.table("D:/Analysis/Coverage_DB/UCSC/NMCNL.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(panel_data)=c("chr","start","end","gene","some_no","strand")
summary_data=read.table("D:/Analysis/Coverage_DB/UCSC/AvgDepth_Summery_Mean.txt",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(summary_data)=c("chr","start","end","gene","avg_depth")
sample_data=read.table("D:/Analysis/Coverage_DB/UCSC/R715_R715_7485_50972_LIB26004_CNLV2.Coverage_AvrgDep.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(sample_data)=c("chr","start","end","gene","avg_depth")
gene="RAB27A";
#gene="BRCA1";
gene_cds_coord =cds_data[cds_data$gene == gene,]
gene_panel_coord = panel_data[panel_data$gene == gene,];
gene_summary_coord = summary_data[gsub(" ","",summary_data$gene) == gene,];
gene_sample_coord = sample_data[gsub(" ","",sample_data$gene) == gene,];
normalizeValues <- function(x){(x-min(x))/(max(x)-min(x))}
chromsomeToTranscript = function(pos_n, gene_model){
intron_pos = gene_model[-nrow(gene_model),];
intron_pos$start = gene_model$end[-length(gene_model$end)] + 1;
intron_pos$end = gene_model$start[2:length(gene_model$end)] - 1;
out=numeric();
max_pos = max(gene_model$end);
min_pos = min(gene_model$start);
for (pos in pos_n) {
if (pos >= max_pos) { pos = max_pos; }
if (pos <= min_pos) { pos = min_pos; }
withinExonCond = pos >= gene_model$start & pos <= gene_model$end
withinIntronCond = pos >= intron_pos$start & pos <= intron_pos$end
cds_pos=c(1,cumsum(gene_model$end-gene_model$start));
rel_pos = gene_model[withinExonCond,];
cds_start_pos=cds_pos[rel_pos$cds_no + 1];
offset=pos-rel_pos$start;
transcriptPos = cds_start_pos + offset;
}else if (any(withinIntronCond)) {
rel_pos = intron_pos[withinIntronCond,];
transcriptPos = cds_pos[rel_pos$cds_no + 2];
}else {
stop(paste("The input position could not mapped to this gene",pos));
}
out = append(out,transcriptPos);
}
return (out);
}
cds_pos=c(1,cumsum(gene_cds_coord$end-gene_cds_coord$start))
norm_cds_pos = normalizeValues(cds_pos) * 100;
gene_model = gene_cds_coord;
#plot(c(0,100),c(0,10),axes=FALSE);
plot(c(-4,100),c(1,10), axes=FALSE)
#text(-4,9.5,"CDS track",cex=0.5,font=2)
txt="CDS track";
text(-4,9.5-strheight(txt,units="figure"),txt,cex=0.5,font=2)
xleft=norm_cds_pos[-length(norm_cds_pos)];
yleft=rep(9,length(xleft)-1);
xright= norm_cds_pos[2:length(norm_cds_pos)]
yright= rep(10,length(xleft)-1);
rect(xleft,yleft,xright,yright,col="grey");
# Panel plot
xleft=chromsomeToTranscript(gene_panel_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(7,length(xleft)-1);
xright= chromsomeToTranscript(gene_panel_coord$end,gene_model)*100/max(cds_pos)
yright= rep(7.5,length(xleft)-1);
rect(xleft,yleft,xright,yright,col="red");
txt="Panel track";
text(-4,7.25-strheight(txt,units="figure"),txt,cex=0.5,font=2)
#CDS Plot
xleft=chromsomeToTranscript(gene_summary_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(4,length(xleft));
xright=chromsomeToTranscript(gene_summary_coord$end-4,gene_model)*100/max(cds_pos);
yright=normalizeValues(gene_summary_coord$avg_depth)*2 + 4;
rect(xleft,yleft,xright,yright,col="blue");
txt="Avg. depth track";
text(-4,5-strheight(txt,units="figure"),txt,cex=0.5,font=2)
# Sample Coverage plot
xleft=chromsomeToTranscript(gene_sample_coord$start,gene_model)*100/max(cds_pos);
yleft=rep(1,length(xleft));
xright=chromsomeToTranscript(gene_sample_coord$end-4,gene_model)*100/max(cds_pos);
yright=normalizeValues(gene_sample_coord$avg_depth)*2 + 1;
rect(xleft,yleft,xright,yright,col="green");
txt="Sample depth";
text(-4,2-strheight(txt,units="figure"),txt,cex=0.5,font=2)
我想将相同的脚本放入PERL脚本的末尾,以便我可以使用输出文件(perl)作为R脚本的输入并重新使用R中的Perl ARGV []。
我所知道的只是
use Statistics::R;
my $R = Statistics::R->new();
每个R行的语法在放入Perl时会发生变化,我不知道该怎么做。
下面是我的Perl脚本,后跟R
perl脚本
my $sample_name = $ARGV[0];
my $gene_name = $ARGV[1];
my $bedpath = "/NGS_STORE/ARCHIVE1/Cov_InputBed_Repository";
my $fn = glob "$bedpath/*$sample_name*.bed";
print "$fn\n";
open (SB,"$fn") or die "Cant open $sample_name\n";
open (SBO,">$sample_name\_$gene_name\_Depth.bed") or die "Cant open \n";
while (my $l = <SB>)
{
chomp($l);
@d = split("\t",$l);
if($d[3] eq $gene_name)
{
print SBO "$l\n";
}
}
r script
sample_data=read.table("$sample_name\_$gene_name\_Depth.bed",sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(sample_data)=c("chr","start","end","gene","avg_depth")
gene= $gene_name
gene_cds_coord =cds_data[cds_data$gene == gene,]
可以看到
open (SBO,">$sample_name\_$gene_name\_Depth.bed") or die "Cant open \n";
如上所述在R中打开,与$gene_name