在数据框中执行多个ttests。

时间:2017-06-14 19:01:50

标签: r for-loop statistics

我想对数据集进行一系列测试:

具体而言,我想对每个基因的1-5对6-10分别进行单独测试。

我试图这样做:

>goi2 <- (goi[-1])
control <- goi2[1:5,]
stress <- goi2[6:10,]

for (i in 1:92){
  x <- control[,i]
  y <- stress[,i]
  x <= t.test(x, y)
 # print(x=i)
}

但是我收到了这个错误:

  

错误:无法使用矩阵或数组进行列索引

我尝试了几种这样的但不能弄清楚为什么这不起作用。

我是R的完整新手,但不是一般的编程。

数据集:

  

基因,Rat_1,Rat_2,Rat_3,Rat_4,Rat_5,Rat_6,Rat_7,Rat_8,Rat_9,Rat_10   Oprd1,2.746,1.387,2.25,3.363,3.191,2.432,1.985,1.75,2.752,1.771   Grin2a,3.134,2.644,2.962,5.168,2.484,3.54,2.596,1.535,3.197,2.232   Grin2d(2),4.496,5.528,2.631,4.684,3.934,6.047,0.98,0.077,4.381,2.327   Oprm1,1.998,1.804,1.611,1.712,3.672,3.215,0.249,1.248,1.758,2.671   SCN2B,137.35,97.158,113.65,141.93,77.295,133.02,88.872,75.586,108.96,97.626   Ntf3,0.989,1.835,1.604,1.133,0.889,0.782,0.918,2.241,2.216,3.921   SCN1A(2),9.224,7.369,10.145,14.242,17.262,11.535,8.144,7.166,13.625,6.604   NTRK2(2),21.929,17.018,14.799,19.783,14.632,24.421,14.235,9.344,16.658,17.913   CACNA1C,4.585,3.637,3.948,4.135,3.403,5.381,4.193,3.162,3.455,3.695   GRIN2B,3.273,2.57,2.101,2.922,1.826,3.338,2.121,1.416,2.973,2.005   SCN9A(2),0.319,0,0,0.453,0.434,0.376,0,0,0.346,0.469   GRIA4(2),10.867,8.156,7.889,9.236,14.134,10.574,8.404,8.179,9.442,7.982   Cacna1e(2),1.805,1.783,2.045,1.968,1.405,1.807,0.973,0.993,0.857,1.769   Gria3,4.237,4.188,3.901,5.221,6.439,3.993,3.421,4.012,4.452,4.631   Gria1,8.284,7.942,7.557,12.001,3.976,9.472,7.653,4.16,7.971,5.381   Kcnj5,3.089,2.046,3.332,3.392,2.168,3.786,3.865,1.414,2.37,2.009   Cacna1b(2),11.071,8.716,8.246,9.594,7.189,11.62,6.028,4.481,9.307,9.074   SCN5A,1.301,1.017,0.714,1.401,0.449,1.183,1.065,0.292,0.823,0.714   SCN2A(2),3.286,2.119,2.257,2.024,1.902,3.441,1.327,1.072,2.576,2.09   SCN10A,0.037,0.069,0.087,0.076,0.082,0.095,0.052,0.019,0.078,0.045   Cacna1g(2),6.543,5.095,5.463,8.404,3.084,7.359,5.746,4.682,5.969,4.315   Cacna1e(3),5.37,4.002,3.313,4.803,2.665,5.623,3.296,1.953,3.827,4.092   BDNF(4),0.869,0.509,0.996,1.032,0.256,0.742,0.498,0.531,0.994,0.473   SCN4A,0.284,0.278,0.359,0.45,0.761,0.31,0.319,0.27,0.366,0.273   SCN5A(2),0.256,0.477,0.587,0.283,0,0.564,0.044,0.023,0.204,0.15   Gabra1,51.019,44.3,57.609,81.522,40.853,64.921,68.263,31.766,58.006,39.518   Scn8a,6.854,14.666,5.416,12.347,4.823,14.935,7.014,16.684,9.686,17.44   Kcnj3,17.047,14.3,13.741,14.363,14.01,13.268,12.172,10.718,15.374,13.048   Slc6a2,107.9,69.941,91.704,36.411,112.57,114.5,23.398,63.848,53.323,135.26   GRIN3A,6.952,5.676,7.301,12.557,3.65,10.628,9.783,4.286,8.015,4.499   Cnr1,20.261,16.981,19.996,26.469,12.709,24.705,25.548,10.61,19.746,14.64   SCN1B,13.732,15.763,5.03,20.68,17.788,14.959,16.298,24.682,22.477,15.117   GRIA1(2),2.709,3.667,2.51,2.9,2.134,1.93,4.308,2.59,2.487,1.742   SCN3A(2),1.439,2.614,0,0.352,0,1.358,1.027,0,0.452,0.586   Scn11a,0.058,0.292,0.036,0.127,0.058,0.06,0.074,0.164,0.047,0.05   GRIA1(3),25.283,17.779,22.725,32.705,8.823,28.727,26.915,12.876,23.545,17.879   Cacna1f,0.056,0.067,0.14,0.123,0.04,0.182,0.072,0.083,0.077,0.097   CACNA1A,20.791,19.816,17.613,21.663,15.697,22.824,16.737,16.719,16.604,20.469   Gria4,8.51,7.107,8.342,9.338,7.46,8.877,7.673,6.341,8.393,9.555   Scn8a,6.738,14.706,4.172,11.467,2.552,10.757,6.021,15.222,3.588,11.333   Grin2d,20.398,15.794,22.521,24.693,16.97,24.108,24.19,21.016,18.314,19.044   GRIA3(2),15.301,13.087,13.918,14.433,12.282,14.914,12.198,11.602,13.738,15.481   OPRK1(2),6.66,4.97,7.604,10.281,2.151,10.462,10.278,1.525,6.869,4.902   SCN1B(3),46.553,42.795,49.498,55.558,64.101,38.178,44.1,59.033,43.837,39.382   Cacna1h,9.145,7.295,8.7,8.028,5.415,10.799,8.21,6.332,8.455,7.683   SCN2A,36.803,29.975,30.609,38.334,19.053,39.127,31.146,23.066,30.896,32.345   Cacna1g,5.489,5.213,6.24,7.896,3.97,4.876,6.283,5.464,6.08,3.692   NTRK2(3),147.81,152.45,153.46,136.09,181.1,156.85,219.8,164.53,156.64,147.92   SCN1A,9.222,9.162,9.659,13.83,12.679,8.088,11.45,10.406,9.503,6.827   GRIN1(3),69.943,68.01,76.358,81.029,63.692,83.424,70.981,80.088,69.821,70.764   Grin3b(2),2.065,1.265,1.45,1.576,3.875,1.441,1.822,1.964,2.286,0.965   GABRA2(2),2.268,1.251,1.638,2.844,2.93,2.934,3.725,1.724,1.455,2.674   Scn1b2(2),161.76,164.24,213.24,209.19,235.38,172.98,207.33,216.96,198.26,130.93   OPRM1(2),4.046,5.181,2.362,1.925,0.806,2.232,1.178,1.491,3.259,3.751   CACNA1C(3),0.077,0.194,0.23,0,0.132,0.127,0,0.035,0.09,0.092   Ntrk2,27.139,26.028,23.881,27.22,22.259,30.728,22.381,19.782,24.704,30.85   Cacna1d(2),2.126,2.263,2.038,2.1,1.995,2.966,1.943,2.01,2.317,2.214   SCN3A,21.272,16.356,16.245,14.875,11.825,19.753,10.994,11.08,16.905,19.832   GRIN1(2),76.771,65.788,66.059,78.716,33.91,88.228,73.859,47.717,70.674,61.275   Grina,672.31,705.45,679.04,623.4,597.51,742.12,619.74,662.95,665.18,781.29   Cacna1e,2.448,1.981,1.506,2.003,1.318,3.052,1.953,0.814,2.17,2.482   BDNF(2),1.853,2.128,2.553,1.996,0.663,2.5,2.385,0.468,1.922,1.481   FOS,18.402,24.653,23.038,20.615,8.027,38.444,20.836,11.756,20.823,20.296   Scn4b,23.772,27.874,25.388,25.109,51.926,20.291,25.521,28.701,30.256,17.344   SLC6A2(3),480.05,455.95,307.6,186.82,376.96,447.61,123.5,409.58,347.86,681.04   NTF3(3),1.87,3.561,2.421,3.133,2.134,2.327,1.712,2.32,1.735,3.497   BDNF(3),0.319,0.09,0.665,0.187,0.107,0.185,0.394,0.264,0.21,0.345   Scn3b,112.86,115.29,99.711,96.245,71.741,122.34,85.875,88.906,102.88,132.13   GRIN2C,14.224,15.944,15.473,21.936,32.732,13.98,20.168,23.958,14.541,17.402   Gabrd,0.701,3.542,0.532,5.222,5.593,0.133,2.954,0.961,0.506,2.152   Cacna1b,16.935,15.764,14.475,15.639,10.655,19.408,14.115,14.079,14.26,16.737   Slc18a2,433.92,429.22,293.57,164.53,287.51,370.72,93.973,283.12,321.49,551.07   Cacnb1(2),16.456,5.099,16.969,4.469,12.471,5.143,14.017,10.049,17.537,4.26   Gabrg1,40.614,37.373,43.103,39.253,47.768,41.202,51.665,37.74,42.17,39.097   Grin1,1.235,0.812,0.909,1.605,0.513,1.371,1.596,1.346,1.213,0.922   SLC6A2(2),138.21,136.75,34.759,38.393,25.89,87.126,0,0.467,99.703,137.66   Galr3,2.691,2.51,2.517,4.446,0.727,2.933,4.041,2.08,2.638,1.456   OPRM1(3),7.273,7.676,7.08,6.196,5.515,9.023,2.57,4.8,7.699,10.471   Gabrq,70.623,67.728,51.095,42.456,43.156,77.924,28.63,32.975,54.192,87.697   GRIA4(3),25.846,26.045,24.37,37.866,18.037,26.907,31.423,21.292,26.795,24.642   CACNA1C(2),0.644,0.894,0.831,1.084,0.721,1.026,0.817,0.371,1.333,1.015   Cacna1d(3),0.299,0.406,0.127,0.319,0.319,0.231,0.178,0.075,0.18,0.405   Cacnb1,47.24,51.505,42.702,48.718,33.28,60.334,38.611,41.827,40.352,56.132   Scn7a,2.351,2.38,2.114,1.96,0.316,2.647,1.945,1.219,2.559,1.498   Cacna1d,2.661,2.733,2.714,2.649,2.403,2.923,3.216,2.768,2.401,2.302   Gabra2,25.209,26.731,23.249,25.599,20.17,22.928,24.072,18.664,23.808,23.306   SCN9A,3.209,3.106,3.212,3.206,1.094,3.35,3.994,1.934,2.883,2.046   NTF3(2),2.347,2.282,2.112,1.025,1.762,2.029,0.501,1.652,2.717,1.982   Gria2,12.726,12.997,12.74,15.615,7.156,14.375,13.387,11.682,12.968,11.332   BDNF,0.703,0.777,1.034,0.571,0.166,1.164,0.549,0.325,0.801,1.12   GRIA2(2),17.769,17.694,16.62,18.603,11.295,19.926,18.044,13.594,16.946,17.712   BDNF(5),1.321,2.152,1.882,2.397,1.598,3.072,3.038,1.53,2.04,1.464

1 个答案:

答案 0 :(得分:2)

以下是使用基础R的工作示例。使用goi

str(goi)
# 'data.frame': 92 obs. of  11 variables:
#  $ Gene  : chr  "Oprd1" "Grin2a" "Grin2d(2)" "Oprm1" ...
#  $ Rat_1 : num  2.75 3.13 4.5 2 137.35 ...
#  $ Rat_2 : num  1.39 2.64 5.53 1.8 97.16 ...
#  $ Rat_3 : num  2.25 2.96 2.63 1.61 113.65 ...
#  $ Rat_4 : num  3.36 5.17 4.68 1.71 141.93 ...
#  $ Rat_5 : num  3.19 2.48 3.93 3.67 77.3 ...
#  $ Rat_6 : num  2.43 3.54 6.05 3.21 133.02 ...
#  $ Rat_7 : num  1.985 2.596 0.98 0.249 88.872 ...
#  $ Rat_8 : num  1.75 1.535 0.077 1.248 75.586 ...
#  $ Rat_9 : num  2.75 3.2 4.38 1.76 108.96 ...
#  $ Rat_10: num  1.77 2.23 2.33 2.67 97.63 ...
control <- goi[,2:6]
stress <- goi[,7:11]

现在,我们计算它不是使用for循环并处理每个返回,而是让我们计算所有内容,将每个测试的完整对象存储在list中,并保留抓住任何我们的机会希望之后的所有测试。

results <- lapply(seq_len(nrow(goi)),
                  function(i) t.test(control[i,], stress[i,]))
length(results)
# [1] 92

results的每个元素都是t.test一次调用的返回值。

results[[1]]
#   Welch Two Sample t-test
# data:  control[i, ] and stress[i, ]
# t = 1.1034, df = 6.2218, p-value = 0.3107
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#  -0.5386851  1.4374851
# sample estimates:
# mean of x mean of y 
#    2.5874    2.1380 

您可以访问测试结果的任何组件:

names(results[[1]])
# [1] "statistic"   "parameter"   "p.value"     "conf.int"    "estimate"   
# [6] "null.value"  "alternative" "method"      "data.name"  
head( sapply(results, `[[`, "p.value") )
# [1] 0.3107098 0.3083295 0.2626753 0.6245368 0.4406157 0.2800657
head( t(sapply(results, `[[`, "conf.int")) )
#             [,1]       [,2]
# [1,]  -0.5386851  1.4374851
# [2,]  -0.7513650  2.0681650
# [3,]  -1.5018657  4.4862657
# [4,]  -1.1880098  1.8504098
# [5,] -23.5402499 48.8678499
# [6,]  -2.2762668  0.8250668

注意:R的许多细微差别之一是*apply系列将返回一个矩阵,有些人可能会认为该矩阵是从应该所转换的。因为这样,返回matrix的调用将受益于被t(...)夹在中间。 (这是一个很好的机会按下“我相信”按钮继续前进。)

您可以将所有这些结果合并到一个data.frame中,例如:

namefunc <- function(x, nameroot) { dimnames(x) <- list(NULL, paste0(nameroot, seq_len(ncol(x)))) ; x ; }

(这是一个小辅助函数,使下面的内容稍微容易阅读。这是一个非常天真的命名约定,仅用于保持列的唯一性。)

test_results <- cbind.data.frame(
  statistic = sapply(results, `[[`, "statistic"),
  p.value = sapply(results, `[[`, "p.value"),
  parameter = sapply(results, `[[`, "parameter"),
  namefunc( t(sapply(results, `[[`, "conf.int")), "conf" ),
  namefunc( t(sapply(results, `[[`, "estimate")), "est" )
)
head(test_results)
#    statistic   p.value parameter       conf1      conf2     est1     est2
# 1  1.1033554 0.3107098  6.221806  -0.5386851  1.4374851   2.5874   2.1380
# 2  1.0948456 0.3083295  7.312678  -0.7513650  2.0681650   3.2784   2.6200
# 3  1.2480711 0.2626753  5.480699  -1.5018657  4.4862657   4.2546   2.7624
# 4  0.5107431 0.6245368  7.337202  -1.1880098  1.8504098   2.1594   1.8282
# 5  0.8134064 0.4406157  7.633546 -23.5402499 48.8678499 113.4766 100.8128
# 6 -1.2161356 0.2800657  4.824393  -2.2762668  0.8250668   1.2900   2.0156

正如RobertMc建议的那样,tidyverse使用的软件包肯定有空间。为此,我建议使用dplyrtidyr,但也许broom也有效用。