鉴于此类数据:
library(tidyverse)
e <- structure(list(A = 1:3, B = c(24.4510633333333, NA, NA), C = c(22.7447195, 22.448911, NA), D = c(22.6726856666667, NA, NA), E = c(23.1775613333333, 23.1209583333333, 23.357228), F = c(1.273502, NA, NA), G = c(-0.432841833333331, -0.672047333333335, NA), H = c(-0.504875666666667, NA, NA), I = c(0.901974333333332,NA, NA), J = c(0.239205500000004, 0, NA), K = c(0, NA, NA), L_num = c(0.535153869932122, NA, NA), M_num = c(0.847211748030606, 1, NA), N_num = c(1, NA, NA)), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L_num", "M_num", "N_num"), row.names = c(NA, -3L), class = c("tbl_df","tbl", "data.frame"))
为什么这样可行:
e %>% select_at(vars(contains("_num"))) %>% colMeans(na.rm = T)
L_num M_num N_num
0.5351539 0.9236059 1.0000000
但这些都没有:
e %>% summarise_at(vars(contains("_num")), mean, na.rm=TRUE)
# A tibble: 1 x 3
L_num M_num N_num
<dbl> <dbl> <dbl>
1 NA NA NA
e %>% summarise_at(vars(contains("_num")), funs(mean_meas = mean(., na.rm = TRUE)))
# A tibble: 1 x 3
L_num_mean_meas M_num_mean_meas N_num_mean_meas
<dbl> <dbl> <dbl>
1 NA NA NA
当有NA时,实施summarise_at()
的正确方法是什么?
编辑:我看到我的示例没有充分重现问题。我在创建MWE时遇到了问题,但这是一个更具体的例子。
我编写了一个函数来获取原始数据并处理我的数据,使其看起来像这样:
example <- structure(list(sample = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L),
type = c("A", "A", "A", "B", "B", "B", "A", "A", "A", "C", "C", "C", "C", "C", "C"),
condition = c("one", "one", "one", "two", "two", "two", "two", "two", "two", "one", "one", "one", "two", "two", "two"),
replicate = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), G1 = c(NA, NA, NA, 27, 27.1, 29.4, 25.3, 23.3, 24.8, NA, NA, NA, 24.5, NA, NA),
G2 = c(23.3, 25.1, 24.6, 27.3, 25.2, 23.6, 23.6, 24.3, 23.3, 25.6, 24.4, 26.4, 22.7, 22.4, NA), G3 = c(24, 25.7, 24.5, 27.7, 25.6, 24.1, 23.9, 25.2, 23.7, 24.7, 23.3, 25.4, 22.7, NA, NA),
ref = c(23.2, 23.2, 23.3, 22.9, 23.4, 23.4, 23.2, 22.9, 23.3, 23.4, 23.2, 23.5, 23.2, 23.1, 23.4),
G1_t1 = c(NA, NA, NA, 4, 3.7, 6, 2.1, 0.4, 1.5, NA, NA, NA, 1.3, NA, NA), G2_t1 = c(0.1, 2, 1.3, 4.4, 1.8, 0.2, 0.5, 1.4, 0, 2.1, 1.2, 2.9, -0.4, -0.7, NA),
G3_t1 = c(0.8, 2.5, 1.2, 4.8, 2.2, 0.7, 0.7, 2.3, 0.4, 1.2, 0, 1.9, -0.5, NA, NA),
G1_t1_t2 = c(NA, NA, NA, 3.7, 3.3, 5.6, 1.7, 0, 1.1, NA, NA, NA, 0.9, NA, NA),
G2_t1_t2 = c(0.7, 2.6, 2, 5, 2.5, 0.9, 1.1, 2, 0.6, 2.8, 1.8, 3.6, 0.2, 0, NA),
G3_t1_t2 = c(1.3, 3, 1.7, 5.3, 2.7, 1.2, 1.2, 2.8, 0.9, 1.7, 0.6, 2.4, 0, NA, NA),
G1_t1_t2_t3 = c(NA, NA, NA, 0.1, 0.1, 0, 0.3, 1, 0.5, NA, NA, NA, 0.5, NA, NA),
G2_t1_t2_t3 = c(0.6, 0.2, 0.3, 0, 0.2, 0.5, 0.5, 0.2, 0.6, 0.1, 0.3, 0.1, 0.8, 1, NA),
G3_t1_t2_t3 = c(0.4, 0.1, 0.3, 0, 0.2, 0.4, 0.4, 0.1, 0.5, 0.3, 0.7, 0.2, 1, NA, NA)),
.Names = c("sample", "type", "condition", "replicate", "G1", "G2", "G3", "ref", "G1_t1", "G2_t1", "G3_t1", "G1_t1_t2", "G2_t1_t2", "G3_t1_t2", "G1_t1_t2_t3", "G2_t1_t2_t3", "G3_t1_t2_t3"),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -15L),
spec = structure(list(cols = structure(list(
sample = structure(list(), class = c("collector_integer", "collector")),type = structure(list(), class = c("collector_character", "collector")), "collector")),
replicate = structure(list(), class = c("collector_integer", "collector")),
G1 = structure(list(), class = c("collector_double", "collector")),
G2 = structure(list(), class = c("collector_double", "collector")),
G3 = structure(list(), class = c("collector_double", "collector")),
ref = structure(list(), class = c("collector_double", "collector")),
G1_t1 = structure(list(), class = c("collector_double", "collector")),
G2_t1 = structure(list(), class = c("collector_double", "collector")),
G3_t1 = structure(list(), class = c("collector_double", "collector")),
G1_t1_t2 = structure(list(), class = c("collector_double", "collector")),
G2_t1_t2 = structure(list(), class = c("collector_double", "collector")),
G3_t1_t2 = structure(list(), class = c("collector_double", "collector")),
G1_t1_t2_t3 = structure(list(), class = c("collector_double", "collector")),
G2_t1_t2_t3 = structure(list(), class = c("collector_double", "collector")),
G3_t1_t2_t3 = structure(list(), class = c("collector_double", "collector"))),
.Names = c("sample", "type", "condition", "replicate", "G1", "G2", "G3", "ref", "G1_t1", "G2_t1", "G3_t1", "G1_t1_t2", "G2_t1_t2", "G3_t1_t2", "G1_t1_t2_t3", "G2_t1_t2_t3", "G3_t1_t2_t3")),
default = structure(list(), class = c("collector_guess", "collector"))),
.Names = c("cols", "default"), class = "col_spec"))
然后我想这样做:
example %>%
nest(-sample, -type, -condition) %>%
mutate(mean = map(data, ~ summarise_at(.x, vars(contains("t3")),
funs(mean_expr = mean(., na.rm = TRUE) ))),
se = map(data, ~ summarise_at(.x, vars(contains("t3")),
funs(se = sd(., na.rm = TRUE)/sqrt(sum(!is.na(.))) ))) ) %>%
unnest(mean, se)
在这个MWE的背景下工作正常。但是,当我在函数的上下文中实现相同代码时,最后一行都是NA,而前面的行很好。我不明白为什么会这样。