使用带有NA值的summarise_at()

时间:2017-09-17 10:39:12

标签: r dplyr tidyverse

鉴于此类数据:

library(tidyverse)
e <- structure(list(A = 1:3, B = c(24.4510633333333, NA, NA), C = c(22.7447195, 22.448911, NA), D = c(22.6726856666667, NA, NA), E = c(23.1775613333333,  23.1209583333333, 23.357228), F = c(1.273502, NA, NA), G = c(-0.432841833333331, -0.672047333333335, NA), H = c(-0.504875666666667, NA, NA), I = c(0.901974333333332,NA, NA), J = c(0.239205500000004, 0, NA), K = c(0, NA, NA), L_num = c(0.535153869932122, NA, NA), M_num = c(0.847211748030606, 1, NA), N_num = c(1, NA, NA)), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L_num", "M_num", "N_num"), row.names = c(NA, -3L), class = c("tbl_df","tbl", "data.frame"))

为什么这样可行:

e %>% select_at(vars(contains("_num"))) %>% colMeans(na.rm = T)
        L_num     M_num     N_num 
    0.5351539 0.9236059 1.0000000 

但这些都没有:

e %>% summarise_at(vars(contains("_num")), mean, na.rm=TRUE)
# A tibble: 1 x 3
  L_num M_num N_num
  <dbl> <dbl> <dbl>
1    NA    NA    NA

e %>% summarise_at(vars(contains("_num")), funs(mean_meas = mean(., na.rm = TRUE)))
# A tibble: 1 x 3
  L_num_mean_meas M_num_mean_meas N_num_mean_meas
            <dbl>           <dbl>           <dbl>
1              NA              NA              NA

当有NA时,实施summarise_at()的正确方法是什么?

编辑:我看到我的示例没有充分重现问题。我在创建MWE时遇到了问题,但这是一个更具体的例子。

我编写了一个函数来获取原始数据并处理我的数据,使其看起来像这样:

    example <- structure(list(sample = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), 
type = c("A", "A", "A", "B", "B", "B", "A", "A", "A", "C", "C", "C", "C", "C", "C"), 
condition = c("one", "one", "one", "two", "two", "two", "two", "two", "two", "one", "one", "one", "two", "two", "two"), 
replicate = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), G1 = c(NA, NA, NA, 27, 27.1, 29.4, 25.3, 23.3, 24.8, NA, NA, NA, 24.5, NA, NA), 
G2 = c(23.3, 25.1, 24.6, 27.3, 25.2, 23.6, 23.6, 24.3, 23.3, 25.6, 24.4, 26.4, 22.7, 22.4, NA), G3 = c(24, 25.7, 24.5, 27.7, 25.6, 24.1, 23.9, 25.2, 23.7, 24.7, 23.3, 25.4, 22.7, NA, NA), 
ref = c(23.2, 23.2, 23.3, 22.9, 23.4, 23.4, 23.2, 22.9, 23.3, 23.4, 23.2, 23.5, 23.2, 23.1, 23.4), 
G1_t1 = c(NA, NA, NA, 4, 3.7, 6, 2.1, 0.4, 1.5, NA, NA, NA, 1.3, NA, NA), G2_t1 = c(0.1, 2, 1.3, 4.4, 1.8, 0.2, 0.5, 1.4, 0, 2.1, 1.2, 2.9, -0.4, -0.7, NA), 
G3_t1 = c(0.8, 2.5, 1.2, 4.8, 2.2, 0.7, 0.7, 2.3, 0.4, 1.2, 0, 1.9, -0.5, NA, NA), 
G1_t1_t2 = c(NA, NA, NA, 3.7, 3.3, 5.6, 1.7, 0, 1.1, NA, NA, NA, 0.9, NA, NA), 
G2_t1_t2 = c(0.7, 2.6, 2, 5, 2.5, 0.9, 1.1, 2, 0.6, 2.8, 1.8, 3.6, 0.2, 0, NA), 
G3_t1_t2 = c(1.3, 3, 1.7, 5.3, 2.7, 1.2, 1.2, 2.8, 0.9, 1.7, 0.6, 2.4, 0, NA, NA), 
G1_t1_t2_t3 = c(NA, NA, NA, 0.1, 0.1, 0, 0.3, 1, 0.5, NA, NA, NA, 0.5, NA, NA), 
G2_t1_t2_t3 = c(0.6, 0.2, 0.3, 0, 0.2, 0.5, 0.5, 0.2, 0.6, 0.1, 0.3, 0.1, 0.8, 1, NA), 
G3_t1_t2_t3 = c(0.4, 0.1, 0.3, 0, 0.2, 0.4, 0.4, 0.1, 0.5, 0.3, 0.7, 0.2, 1, NA, NA)), 
.Names = c("sample", "type", "condition", "replicate", "G1", "G2", "G3", "ref", "G1_t1", "G2_t1", "G3_t1", "G1_t1_t2", "G2_t1_t2", "G3_t1_t2", "G1_t1_t2_t3", "G2_t1_t2_t3", "G3_t1_t2_t3"), 
class = c("tbl_df", "tbl", "data.frame"), 
row.names = c(NA, -15L), 
spec = structure(list(cols = structure(list(
sample = structure(list(), class = c("collector_integer", "collector")),type = structure(list(), class = c("collector_character", "collector")), "collector")), 
replicate = structure(list(), class = c("collector_integer", "collector")), 
G1 = structure(list(), class = c("collector_double", "collector")), 
G2 = structure(list(), class = c("collector_double", "collector")), 
G3 = structure(list(), class = c("collector_double", "collector")), 
ref = structure(list(), class = c("collector_double", "collector")), 
G1_t1 = structure(list(), class = c("collector_double", "collector")), 
G2_t1 = structure(list(), class = c("collector_double", "collector")),
G3_t1 = structure(list(), class = c("collector_double", "collector")), 
G1_t1_t2 = structure(list(), class = c("collector_double", "collector")), 
G2_t1_t2 = structure(list(), class = c("collector_double", "collector")), 
G3_t1_t2 = structure(list(), class = c("collector_double", "collector")), 
G1_t1_t2_t3 = structure(list(), class = c("collector_double", "collector")), 
G2_t1_t2_t3 = structure(list(), class = c("collector_double", "collector")), 
G3_t1_t2_t3 = structure(list(), class = c("collector_double", "collector"))), 
.Names = c("sample", "type", "condition", "replicate", "G1", "G2", "G3", "ref", "G1_t1", "G2_t1", "G3_t1", "G1_t1_t2", "G2_t1_t2", "G3_t1_t2", "G1_t1_t2_t3", "G2_t1_t2_t3", "G3_t1_t2_t3")), 
default = structure(list(), class = c("collector_guess", "collector"))), 
.Names = c("cols", "default"), class = "col_spec"))

然后我想这样做:

example %>% 
  nest(-sample, -type, -condition) %>% 
  mutate(mean = map(data, ~ summarise_at(.x, vars(contains("t3")),
                             funs(mean_expr = mean(., na.rm = TRUE) ))),
         se = map(data, ~ summarise_at(.x, vars(contains("t3")),
                           funs(se = sd(., na.rm = TRUE)/sqrt(sum(!is.na(.))) ))) ) %>% 
  unnest(mean, se)

在这个MWE的背景下工作正常。但是,当我在函数的上下文中实现相同代码时,最后一行都是NA,而前面的行很好。我不明白为什么会这样。

0 个答案:

没有答案