两个数据集之间的T.test - 逐行

时间:2016-07-29 07:20:28

标签: r

我认为这个标题解释了一切。我想在两个数据集之间进行t.test。我想逐行比较。

让我们使用mtcars并略微修改mtcars_mod

structure(list(mpg = c(21, 25, 22.8, 21.4, 18.7, 18.1, 14.3, 
                       24.4, 24.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
                       36.4, 31.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
                       29.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
                                                8, 8, 8, 8, 7, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
               disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
                        167.6, 167.6, 275.8, 275.8, 275.8, 6, 460, 440, 78.7, 75.7, 
                        71.1, 120.1, 318, 304, 350, 400, 79, 15, 97, 351, 145, 
                        301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 
                                          123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 
                                          150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 
                                                                                                    3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
                                                                                                    3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
                                                                                                    3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
                                          ), wt = c(2.62, 2.875, 2.32, 7, 3.44, 3.46, 3.57, 3.19, 
                                                    3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
                                                    1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
                                                    1.513, 3.17, 2.77, 6, 2.78), qsec = c(16.46, 17.02, 18.61, 
                                                                                             114, 17.02, 20.22, 15.84, 12, 22.9, 18.3, 18.9, 17.4, 17.6, 
                                                                                             18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
                                                                                             32, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
                                                    ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
                                                              0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 
                                                                                                                      1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
                                                                                                                      0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, 
                                                                                                                                                                    3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
                                                                                                                                                                    3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 
                                                                                                                                                                                                         2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
                                                                                                                                                                                                         2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp", 
                                                                                                                                                                                                                                        "drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4", 
                                                                                                                                                                                                                                                                                                         "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", 
                                                                                                                                                                                                                                                                                                         "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", 
                                                                                                                                                                                                                                                                                                         "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
                                                                                                                                                                                                                                                                                                         "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
                                                                                                                                                                                                                                                                                                         "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
                                                                                                                                                                                                                                                                                                         "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
                                                                                                                                                                                                                                                                                                         "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
                                                                                                                                                                                                                                                                                                         "Volvo 142E"), class = "data.frame"

我尝试在循环中执行此操作,但我不知道如何存储结果。我只得到最后一个值...

for(z in 1:nrow(mtcars)){
  vec_1 <- mtcars[z,1:7]
  vec_2 <- mtcars_mod[z,1:7]
  vec_results <- unlist(t.test(vec_1, vec_2)[3])

} 

有人可以告诉我如何纠正我的循环吗?我更喜欢使用apply函数,但仍想知道我的循环错误...

1 个答案:

答案 0 :(得分:3)

(我只会使用我自己修改的mtcarsmod ...抱歉,你的至少缺少一个paren,而且 - 尽管我确切地知道发生了什么 - 它是丑陋的在那个SO窗口!)

set.seed(42)
mtcarsmod <- as.data.frame(lapply(mtcars, jitter, factor = 5))
head(mtcarsmod)
#    mpg  cyl disp    hp drat   wt qsec     vs     am gear  carb
# 1 21.1 5.55  160 109.7 3.89 2.62 16.5 -0.373  0.221 3.68 3.861
# 2 21.1 6.74  160 110.0 3.90 2.88 17.0  0.641  1.080 3.06 3.788
# 3 22.8 2.02  108  93.5 3.86 2.32 18.6  0.614  1.142 4.73 0.284
# 4 21.5 7.33  258 110.2 3.08 3.21 19.4  0.371  0.238 3.46 0.560
# 5 18.7 6.03  360 175.3 3.15 3.44 17.0 -0.903  0.430 2.63 2.130
# 6 18.1 4.83  225 104.4 2.77 3.46 20.2  0.491 -0.753 2.77 1.870

您应该使用sapply或其中一个亲属来代替循环。

sapply(seq_len(nrow(mtcars)),
       function(r) unlist(t.test(mtcars[r,1:7], mtcarsmod[r,1:7])[3]))
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.998   0.998   0.992   0.996   0.998   0.995   0.999   1.000   0.999   0.998   0.995 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.995   0.999   0.999   0.998   0.999   0.997   0.999   0.995   0.997   0.995   0.999 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.997   0.998   1.000   0.990   0.997   0.999   0.999   0.995   0.997   0.995 

使用lapply的一个好处可能是使用了更多的测试结果。例如:

ret <- lapply(seq_len(nrow(mtcars)),
              function(r) t.test(mtcars[r,1:7], mtcarsmod[r,1:7]))
str(head(ret, n = 2))
# List of 2
#  $ :List of 9
#   ..$ statistic  : Named num 0.0024
#   .. ..- attr(*, "names")= chr "t"
#   ..$ parameter  : Named num 12
#   .. ..- attr(*, "names")= chr "df"
#   ..$ p.value    : num 0.998
#   ..$ conf.int   : atomic [1:2] -73.4 73.5
#   .. ..- attr(*, "conf.level")= num 0.95
#   ..$ estimate   : Named num [1:2] 45.7 45.6
#   .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
#   ..$ null.value : Named num 0
#   .. ..- attr(*, "names")= chr "difference in means"
#   ..$ alternative: chr "two.sided"
#   ..$ method     : chr "Welch Two Sample t-test"
#   ..$ data.name  : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
#   ..- attr(*, "class")= chr "htest"
#  $ :List of 9
#   ..$ statistic  : Named num -0.00311
#   .. ..- attr(*, "names")= chr "t"
#   ..$ parameter  : Named num 12
#   .. ..- attr(*, "names")= chr "df"
#   ..$ p.value    : num 0.998
#   ..$ conf.int   : atomic [1:2] -73.4 73.2
#   .. ..- attr(*, "conf.level")= num 0.95
#   ..$ estimate   : Named num [1:2] 45.8 45.9
#   .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
#   ..$ null.value : Named num 0
#   .. ..- attr(*, "names")= chr "difference in means"
#   ..$ alternative: chr "two.sided"
#   ..$ method     : chr "Welch Two Sample t-test"
#   ..$ data.name  : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
#   ..- attr(*, "class")= chr "htest"
ret[[1]]$p.value
# [1] 0.998

你仍然可以从结果中轻松获得p值的向量:

sapply(ret, `[[`, "p.value")
#  [1] 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 0.995 0.999 0.999
# [15] 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 0.997 0.998 1.000 0.990 0.997 0.999
# [29] 0.999 0.995 0.997 0.995