我正在使用命令Rscript在unix服务器上运行R脚本。该程序计算重新编码的基因型之间的不同Ts和Tv。脚本似乎工作正常,直到它在未知时间内随机暂停(CPU到0.0%)。它有时后来又重新开始,但我在暂停时找不到任何模式。
它仅在以下for循环中停止:
#difference between different id
diffs <- NULL
for (i in 1:1091){
# Start the clock
ptm <- proc.time()
print(i)
print(ptm)
diffs <- NULL
diffs <- abs(mdata[,4:ncol(mdata)]-cbind(mdata[,(4+i):ncol(mdata)],mdata[,4:(3+i)]))
diffs <- cbind(diffs,pos_cpg)
TCcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TC),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TC2),ncol=1092))
print('TC gelukt')
TAcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TA),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TA2),ncol=1092))
print('TA gelukt')
TGcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TG),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TG2),ncol=1092))
print('TG gelukt')
CGcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% CG),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% CG2),ncol=1092))
print('CG gelukt')
CAcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% CA),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% CA2),ncol=1092))
print('CA gelukt')
GAcount[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% GA),ncol=1092)) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% GA2),ncol=1092))
print('GA gelukt')
GAcpg[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% GA),ncol=1092)==(diffs[,ncol(diffs)])-((matrix((diffs[,1:(ncol(diffs)-1)] %in% GA),ncol=1092)==(diffs[,ncol(diffs)]))-(matrix((diffs[,1:(ncol(diffs)-1)] %in% GA),ncol=1092)))) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% GA2),ncol=1092)==(diffs[,ncol(diffs)])-((matrix((diffs[,1:(ncol(diffs)-1)] %in% GA2),ncol=1092)==(diffs[,ncol(diffs)])-matrix((diffs[,1:(ncol(diffs)-1)] %in% GA2),ncol=1092))))
print('GAcpg gelukt')
TCcpg[i,] <- colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TC),ncol=1092)==(diffs[,ncol(diffs)])-((matrix((diffs[,1:(ncol(diffs)-1)] %in% TC),ncol=1092)==(diffs[,ncol(diffs)])-matrix((diffs[,1:(ncol(diffs)-1)] %in% TC),ncol=1092)))) + 2*colSums(matrix((diffs[,1:(ncol(diffs)-1)] %in% TC2),ncol=1092)==(diffs[,ncol(diffs)])-((matrix((diffs[,1:(ncol(diffs)-1)] %in% TC2),ncol=1092)==(diffs[,ncol(diffs)])-matrix((diffs[,1:(ncol(diffs)-1)] %in% TC2),ncol=1092))))
print('TCcpg gelukt')
ratio1 <- mean(CAcount[i,]/TAcount[i,])
ratio2 <- mean(TGcount[i,]/CGcount[i,])
print(ratio1)
print(ratio2)
rm(diffs)
Ts[i,] <- TCcount[i,] + GAcount[i,]
print('Ts gelukt')
Tv[i,] <- TAcount[i,] + TGcount[i,] + CAcount[i,] + CGcount[i,]
print('Tv gelukt')
TsCpG[i,] <- TCcpg[i,] + GAcpg[i,]
print('TsCpG gelukt')
CTnCpG[i,] <- TCcount[i,] - TCcpg[i,]
print('CTnCpG gelukt')
GAnCpG[i,] <- GAcount[i,] - GAcpg[i,]
print('GAnCpG gelukt')
# Stop the clock
print(proc.time())
tijd <- proc.time() - ptm
print(paste(i,'gedaan','in',tijd[3],sep=' '))
save(TC,file='TC.Rda')
save(TA,file='TA.Rda')
save(TG,file='TG.Rda')
save(CA,file='CA.Rda')
save(CG,file='CG.Rda')
save(GA,file='GA.Rda')
save(CTnCpG,file='CTnCpG.Rda')
save(GAnCpG,file='GAnCpG.Rda')
save(Ts,file='Ts.Rda')
save(Tv,file='Tv.Rda')
save(TsCpG,file='TsCpG.Rda')
print('alles opgeslaan')
}
print语句是控制输出,用于确定暂停发生的位置。矩阵mdata在第4列到第二列的第二列中都有数字重新编码的基因型,最后一列包含一个0或1的值,告诉该位置是否为已知的CpG位置。
有人看到这个脚本会随机暂停的原因吗?