以下是我用来从一个数据框到另一个数据框的代码:
uncentered.cov.mat [[ k ]] <- cov ( scaled.snp.by.pop [[ k ]] )
这是scaled.snp.by.pop的样子:
` [1914,] 2.4523662 2.44082577 1.6878051 1.11283738
[1915,] 1.3423300 1.61079519 1.3087711 1.43821057
[1916,] 0.2200832 0.07922990 0.0000000 0.56592730
[1917,] 0.2200832 0.07922990 0.0000000 0.56592730
[1918,] 0.8896991 0.72065699 1.0009125 1.57286191
[1919,] 2.1540474 2.65870923 2.0771166 2.57166807
[1920,] 2.1540474 2.65870923 2.0771166 2.57166807
[1921,] 2.1540474 2.65870923 2.0771166 2.57166807
[1922,] 0.5598038 0.92703437 0.7053522 0.38386478
[1923,] 0.1299660 0.42108943 0.0000000 1.89378890
[ reached getOption("max.print") -- omitted 2056889 rows ]`
`nrow(scaled.snp.by.pop) is NULL`
`ncol(scaled.snp.by.pop) is NULL`
`class(scaled.snp.by.pop) is "list"`
转化后,uncentered.cov.mat看起来像这样:
head(uncentered.cov.mat)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[4,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[5,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[6,] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
[1,] NA NA NA NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA NA NA NA
[4,] NA NA NA NA NA NA NA NA NA NA NA NA
[5,] NA NA NA NA NA NA NA NA NA NA NA NA
[6,] NA NA NA NA NA NA NA NA NA NA NA NA
[,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
[1,] NA NA NA NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA NA NA NA
[4,] NA NA NA NA NA NA NA NA NA NA NA NA
[5,] NA NA NA NA NA NA NA NA NA NA NA NA
[6,] NA NA NA NA NA NA NA NA NA NA NA NA
[,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
[1,] NA NA NA NA NA NA NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA NA NA NA NA NA
[3,] NA NA NA NA NA NA NA NA NA NA NA NA
[4,] NA NA NA NA NA NA NA NA NA NA NA NA
[5,] NA NA NA NA NA NA NA NA NA NA NA NA
[6,] NA NA NA NA NA NA NA NA NA NA NA NA
[,51] [,52]
[1,] NA NA
[2,] NA NA
[3,] NA NA
[4,] NA NA
[5,] NA NA
[6,] NA NA
nrow(uncentered.cov.mat) is 52
ncol(uncentered.cov.mat) is 52
class(uncentered.cov.mat) is "matrix"
根据我的老板,top命令应该在我的矩阵的所有单元格中给出大于0的值。出了什么问题?有什么东西导致NA,我不知道该找什么?
完整代码是:
SampleCovSNPs <- function ( gwas.data , match.pop , pop.names , bin.names , SNPs.per.cycle , cycles , path , full.dataset.file , env.var.data ) {
#recover()
num.pops <- length ( pop.names )
#T.mat <- matrix ( rep ( c ( ( num.pops - 1 ) / num.pops , rep ( - 1 / ( num.pops ) , times = num.pops ) ) , times = num.pops ) , ncol = num.pops , nrow = num.pops )
gwas.cont.table <- table ( gwas.data [ , bin.names ] )
my.cov.bins <- which ( gwas.cont.table != 0 )
total.cov.reps <- ceiling ( 5000 / nrow ( gwas.data ) )
this.many <- total.cov.reps * gwas.cont.table
snp.by.pop <- list ()
epsilon.cov.snps <- list ()
var.cov.snps <- list ()
scaled.snp.by.pop <- list ()
uncentered.cov.mat <- list ()
for ( k in 1 : cycles ) {
sampled.SNPs <- list ()
j = 1
for ( BIN in my.cov.bins ) {
this.many [ BIN ]
this.bin <- mapply ( function ( x , y ) x [ y ] ,
x = dimnames ( gwas.cont.table ) , y = arrayInd ( BIN , dim ( this.many ) ) , SIMPLIFY = FALSE )
in.this.bin <- list ()
for ( i in 1 : length ( this.bin ) ) {
in.this.bin [[ i ]] <- match.pop [ , bin.names [ i ] ] %in% this.bin [ i ]
}
in.this.bin <- do.call ( cbind , in.this.bin )
matched.SNPs <- match.pop [ rowSums ( in.this.bin ) == length ( bin.names ) , ]$SNP
sampled.SNPs [[ j ]] <- as.character ( sample ( matched.SNPs , this.many [ BIN ] , replace = T ) )
j = j + 1
}
sampled.SNPs.count <- table ( unlist ( sampled.SNPs ) )
sampled.SNPs <- unlist ( sampled.SNPs )
write ( unlist ( sampled.SNPs ) , file = paste ( path , "/cov.SNPs" , k , sep = "" ) , ncolumns = 1 )
system ( paste ( "Scripts/sampleSNPs.pl " , path , "/cov.SNPs" , k , " " , full.dataset.file , " > " , path , "/cov.samples" , k , sep = "" ) )
sampled.cov.data <- read.table ( paste ( path , "/cov.samples" , k , sep = "" ) , stringsAsFactors = F , h = T )
sampled.cov.data <- sampled.cov.data [ , 1 : 5 ]
colnames ( sampled.cov.data ) <- c ( "SNP" , "CLST" , "A1" , "A2" , "FRQ" )
sampled.cov.data <- sampled.cov.data [ sampled.cov.data$CLST %in% env.var.data[[1]]$CLST , ]
sampled.cov.data$FRQ <- as.numeric ( sampled.cov.data$FRQ )
#sampled.SNPs <- read.table ( paste ( path , "/cov.SNPs" , k , sep = "" ) , stringsAsFactors = F )
sampled.cov.data <- sampled.cov.data [ with ( sampled.cov.data , order ( SNP , CLST ) ) , ]
split.sampled.cov.data <- split ( sampled.cov.data$FRQ , sampled.cov.data$SNP )
cov.freqs <- mapply ( rep , x = split.sampled.cov.data , times = sampled.SNPs.count )
snp.by.pop [[ k ]] <- t ( matrix ( unlist ( cov.freqs ) , nrow = num.pops ) )
epsilon.cov.snps [[ k ]] <- apply ( snp.by.pop [[ k ]] , 1 , mean )
# average of ratios
var.cov.snps [[ k ]] <- epsilon.cov.snps [[ k ]] * ( 1 - epsilon.cov.snps [[ k ]] )
scaled.snp.by.pop [[ k ]] <- snp.by.pop [[ k ]] / c ( sqrt ( var.cov.snps [[ k ]] ))
uncentered.cov.mat [[ k ]] <- cov ( scaled.snp.by.pop [[ k ]] )}
这里是输入的文件
head(gwas.data)
SNP A1 A2 EFF FRQ MAF MA.EFF IMP BVAL
1481742 rs10 C A 0.024016597 0.9446 0.0554 -0.024016597 1 572
1782250 rs1000000 G A -0.005167638 0.7827 0.2173 0.005167638 0 994
240861 rs10000010 T C 0.010812834 0.5065 0.4935 -0.010812834 0 950
1264831 rs10000017 C T 0.020455267 0.7814 0.2186 -0.020455267 1 919
1229400 rs1000002 C T 0.007209298 0.5116 0.4884 -0.007209298 1 459
1268202 rs10000023 G T 0.004429735 0.4021 0.4021 0.004429735 0 913
MAF.BINS IMP.BINS BVAL.BINS
1481742 (0.04,0.06] (0.5,1] (500,600]
1782250 (0.2,0.22] [-0.001,0.5] (900,1e+03]
240861 (0.48,0.5] [-0.001,0.5] (900,1e+03]
head(match.pop)
SNP CLST A1 A2 FRQ IMP POS CHR BVAL MAF MAF.BINS IMP.BINS
1 rs4030303 French G A 1 1 72434 1 994 0 [0,0.02] (0.5,1]
2 rs1933024 French G A 1 1 -1 -1 -1 0 [0,0.02] (0.5,1]
3 rs6594028 French A G 0 1 554461 1 988 0 [0,0.02] (0.5,1]
4 rs10458597 French C T 1 1 554484 1 988 0 [0,0.02] (0.5,1]
BVAL.BINS
1 (900,1e+03]
2 <NA>
3 (900,1e+03]
4 (900,1e+03]
head(pop.names)
[1] "Adygei" "Balochi" "BantuKenya" "BantuSouthAfrica"
[5] "Basque" "Bedouin"
head(bin.names)
[1] "MAF.BINS" "IMP.BINS" "BVAL.BINS"
SNPs.per.cycle = 5000
周期= 1
path =输出路径
full.dataset.file =完整数据集的路径
head(env.var.data)
35 Palestinian -0.090108195 2
36 Papuan 2.923203965 6
37 Pathan 0.617956167 3
38 Pima -1.640273697 5
39 Russian 1.183447644 1
40 San -2.376985590 7