在R中的协方差矩阵中获得NA

时间:2015-11-27 19:27:45

标签: r matrix null covariance

以下是我用来从一个数据框到另一个数据框的代码:

uncentered.cov.mat [[ k ]] <- cov ( scaled.snp.by.pop [[ k ]] )

这是scaled.snp.by.pop的样子:

` [1914,]  2.4523662  2.44082577  1.6878051  1.11283738
   [1915,]  1.3423300  1.61079519  1.3087711  1.43821057
   [1916,]  0.2200832  0.07922990  0.0000000  0.56592730
   [1917,]  0.2200832  0.07922990  0.0000000  0.56592730
   [1918,]  0.8896991  0.72065699  1.0009125  1.57286191
   [1919,]  2.1540474  2.65870923  2.0771166  2.57166807
   [1920,]  2.1540474  2.65870923  2.0771166  2.57166807
   [1921,]  2.1540474  2.65870923  2.0771166  2.57166807
   [1922,]  0.5598038  0.92703437  0.7053522  0.38386478
   [1923,]  0.1299660  0.42108943  0.0000000  1.89378890
 [ reached getOption("max.print") -- omitted 2056889 rows ]`


`nrow(scaled.snp.by.pop) is NULL`

`ncol(scaled.snp.by.pop) is NULL`

`class(scaled.snp.by.pop) is "list"`
转化后,

uncentered.cov.mat看起来像这样:

head(uncentered.cov.mat)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
[2,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
[3,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
[4,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
[5,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
[6,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
     [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
[1,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[2,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[3,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[4,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[5,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[6,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
     [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
[1,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[2,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[3,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[4,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[5,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[6,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
     [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
[1,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[2,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[3,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[4,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[5,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
[6,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
     [,51] [,52]
[1,]    NA    NA
[2,]    NA    NA
[3,]    NA    NA
[4,]    NA    NA
[5,]    NA    NA
[6,]    NA    NA

nrow(uncentered.cov.mat) is 52

ncol(uncentered.cov.mat) is 52

class(uncentered.cov.mat) is "matrix"

根据我的老板,top命令应该在我的矩阵的所有单元格中给出大于0的值。出了什么问题?有什么东西导致NA,我不知道该找什么?

完整代码是:

SampleCovSNPs <- function ( gwas.data , match.pop , pop.names , bin.names , SNPs.per.cycle , cycles , path , full.dataset.file , env.var.data ) {

    #recover()
    num.pops <- length ( pop.names )
    #T.mat <- matrix ( rep ( c ( ( num.pops - 1 ) / num.pops , rep ( - 1 / ( num.pops ) , times = num.pops ) ) , times = num.pops ) , ncol = num.pops , nrow = num.pops )
    gwas.cont.table <- table ( gwas.data [ , bin.names ] ) 
    my.cov.bins <- which ( gwas.cont.table != 0 )
    total.cov.reps <- ceiling ( 5000 / nrow ( gwas.data ) )
    this.many <- total.cov.reps * gwas.cont.table   



    snp.by.pop <- list ()
    epsilon.cov.snps <- list ()
    var.cov.snps <- list ()
    scaled.snp.by.pop <- list ()
    uncentered.cov.mat <- list ()
    for ( k in 1 : cycles ) {
        sampled.SNPs <- list ()
        j = 1 
        for ( BIN in my.cov.bins ) {
            this.many [ BIN ]
            this.bin <- mapply ( function ( x , y ) x [ y ] ,
            x = dimnames ( gwas.cont.table ) , y = arrayInd ( BIN , dim ( this.many ) ) , SIMPLIFY = FALSE )
            in.this.bin <- list ()
            for ( i in 1 : length ( this.bin ) ) {
                in.this.bin [[ i ]] <- match.pop [ , bin.names [ i ] ] %in% this.bin [ i ] 
            }
            in.this.bin <- do.call ( cbind , in.this.bin )
            matched.SNPs <- match.pop [ rowSums ( in.this.bin ) == length ( bin.names ) , ]$SNP
            sampled.SNPs [[ j ]] <- as.character ( sample ( matched.SNPs , this.many [ BIN ] , replace = T ) )
            j = j + 1   
        }
        sampled.SNPs.count <- table ( unlist ( sampled.SNPs ) )
        sampled.SNPs <- unlist ( sampled.SNPs )
        write ( unlist ( sampled.SNPs ) , file = paste ( path , "/cov.SNPs" , k , sep = "" ) , ncolumns = 1 )
        system ( paste ( "Scripts/sampleSNPs.pl " , path , "/cov.SNPs" , k , " " , full.dataset.file , " > " , path , "/cov.samples" , k , sep = "" ) )
        sampled.cov.data <- read.table ( paste ( path , "/cov.samples" , k , sep = "" ) , stringsAsFactors = F , h = T )
        sampled.cov.data <- sampled.cov.data [ , 1 : 5 ]
        colnames ( sampled.cov.data ) <- c ( "SNP" , "CLST" , "A1" , "A2" , "FRQ" )
        sampled.cov.data <- sampled.cov.data [ sampled.cov.data$CLST %in% env.var.data[[1]]$CLST , ]
        sampled.cov.data$FRQ <- as.numeric ( sampled.cov.data$FRQ )
        #sampled.SNPs <- read.table ( paste ( path , "/cov.SNPs" , k , sep = "" ) , stringsAsFactors = F ) 
        sampled.cov.data <- sampled.cov.data [ with ( sampled.cov.data , order ( SNP , CLST ) ) , ]
        split.sampled.cov.data <- split ( sampled.cov.data$FRQ , sampled.cov.data$SNP )
        cov.freqs <- mapply ( rep , x = split.sampled.cov.data , times = sampled.SNPs.count )
        snp.by.pop [[ k ]] <- t ( matrix ( unlist ( cov.freqs ) , nrow = num.pops ) )
        epsilon.cov.snps [[ k ]] <- apply ( snp.by.pop [[ k ]] , 1 , mean )

        # average of ratios
        var.cov.snps [[ k ]] <- epsilon.cov.snps [[ k ]] * ( 1 - epsilon.cov.snps [[ k ]] )
        scaled.snp.by.pop [[ k ]] <- snp.by.pop [[ k ]] / c ( sqrt ( var.cov.snps [[ k ]] ))
        uncentered.cov.mat [[ k ]] <- cov ( scaled.snp.by.pop [[ k ]] )}

这里是输入的文件

head(gwas.data)
               SNP A1 A2          EFF    FRQ    MAF       MA.EFF IMP BVAL
1481742       rs10  C  A  0.024016597 0.9446 0.0554 -0.024016597   1  572
1782250  rs1000000  G  A -0.005167638 0.7827 0.2173  0.005167638   0  994
240861  rs10000010  T  C  0.010812834 0.5065 0.4935 -0.010812834   0  950
1264831 rs10000017  C  T  0.020455267 0.7814 0.2186 -0.020455267   1  919
1229400  rs1000002  C  T  0.007209298 0.5116 0.4884 -0.007209298   1  459
1268202 rs10000023  G  T  0.004429735 0.4021 0.4021  0.004429735   0  913
           MAF.BINS     IMP.BINS   BVAL.BINS
1481742 (0.04,0.06]      (0.5,1]   (500,600]
1782250  (0.2,0.22] [-0.001,0.5] (900,1e+03]
240861   (0.48,0.5] [-0.001,0.5] (900,1e+03]

head(match.pop)
         SNP   CLST A1 A2 FRQ IMP    POS CHR BVAL MAF MAF.BINS IMP.BINS
1  rs4030303 French  G  A   1   1  72434   1  994   0 [0,0.02]  (0.5,1]
2  rs1933024 French  G  A   1   1     -1  -1   -1   0 [0,0.02]  (0.5,1]
3  rs6594028 French  A  G   0   1 554461   1  988   0 [0,0.02]  (0.5,1]
4 rs10458597 French  C  T   1   1 554484   1  988   0 [0,0.02]  (0.5,1]

    BVAL.BINS
1 (900,1e+03]
2        <NA>
3 (900,1e+03]
4 (900,1e+03]

head(pop.names)
[1] "Adygei"           "Balochi"          "BantuKenya"       "BantuSouthAfrica"
[5] "Basque"           "Bedouin" 

head(bin.names)
[1] "MAF.BINS"  "IMP.BINS"  "BVAL.BINS"   

SNPs.per.cycle = 5000

周期= 1

path =输出路径

full.dataset.file =完整数据集的路径

head(env.var.data)
35      Palestinian -0.090108195   2
36           Papuan  2.923203965   6
37           Pathan  0.617956167   3
38             Pima -1.640273697   5
39          Russian  1.183447644   1
40              San -2.376985590   7

0 个答案:

没有答案