我有以下数据框
a <-
structure(list(Sample_1 = structure(c(Bacteria_A = 1L, Bacteria_B = 2L,
Bacteria_C = 3L, `4` = 1L, `5` = 2L, `6` = 2L, `7` = 3L, `8` = 1L
), .Label = c("12", "23", "25", "soil"), class = "factor"), Sample_2 = structure(c(Bacteria_A = 3L,
Bacteria_B = 2L, Bacteria_C = 1L, `4` = 3L, `5` = 2L, `6` = 2L,
`7` = 1L, `8` = 3L), .Label = c("10", "12", "23", "soil"), class = "factor"),
Sample_3 = structure(c(Bacteria_A = 2L, Bacteria_B = 1L,
Bacteria_C = 3L, `4` = 2L, `5` = 1L, `6` = 1L, `7` = 3L,
`8` = 2L), .Label = c("33", "45", "50", "soil"), class = "factor"),
Sample_4 = structure(c(Bacteria_A = 1L, Bacteria_B = 3L,
Bacteria_C = 2L, `4` = 1L, `5` = 3L, `6` = 3L, `7` = 2L,
`8` = 1L), .Label = c("32", "38", "44", "soil"), class = "factor"),
Sample_5 = structure(c(Bacteria_A = 2L, Bacteria_B = 3L,
Bacteria_C = 1L, `4` = 2L, `5` = 3L, `6` = 3L, `7` = 1L,
`8` = 2L), .Label = c(" 3", "34", "55", "soil"), class = "factor"),
Sample_6 = structure(c(Bacteria_A = 1L, Bacteria_B = 2L,
Bacteria_C = 3L, `4` = 1L, `5` = 2L, `6` = 2L, `7` = 3L,
`8` = 1L), .Label = c(" 0", " 3", "34", "soil"), class = "factor"),
Genus = c("Bacteria_A", "Bacteria_B", "Bacteria_C", "Bacteria_A",
"Bacteria_B", "Bacteria_B", "Bacteria_C", "Bacteria_A"),
Group = c("Soil", "Soil", "Soil", "Water", "Water", "Water",
"Water", "Water")), row.names = c(NA, 8L), class = "data.frame")
> a
Sample_1 Sample_2 Sample_3 Sample_4 Sample_5 Sample_6 Genus Group
1 12 23 45 32 34 0 Bacteria_A Soil
2 23 12 33 44 55 3 Bacteria_B Soil
3 25 10 50 38 3 34 Bacteria_C Soil
4 12 23 45 32 34 0 Bacteria_A Water
5 23 12 33 44 55 3 Bacteria_B Water
6 23 12 33 44 55 3 Bacteria_B Water
7 25 10 50 38 3 34 Bacteria_C Water
8 12 23 45 32 34 0 Bacteria_A Water
我想比较每种细菌在土壤和水中的治疗效果。例如,wilcox.test在土壤和水中测试BActeria_A。我该怎么做??
到目前为止,我已经尝试扩大数据框的范围,以将细菌作为列名
nms <- colnames(a)[1:(ncol(a)-2)]
> nms
[1] "Sample_1" "Sample_2" "Sample_3" "Sample_4" "Sample_5" "Sample_6"
d <- a %>%
pivot_wider(names_from = Genus, values_from=nms )
group_by(name) %>%
summarise(mean_Soil = mean(value[Group == "Soil"]),
mean_Water= mean(value[Group == "Water"]),
pvalue = wilcox.test(value ~ Group)$p.value)
Error in group_by(name) : object 'name' not found
预期输出将类似于以下内容(此示例中为假值)。只是为了说明所需的输出。
#> # A tibble: 3 x 4
#> name mean_soil mean_water pvalue
#> <chr> <dbl> <dbl> <dbl>
#> 1 Bacteria_A 24.3 24 0.936
#> 2 Bacteria_B 28.3 29 0.873
#> 3 Bacteria_C 26.7 23.8 0.748
答案 0 :(得分:0)
您需要使用pivot_longer
而不是pivot_wider
,因为summarise
适用于列。然后将所有值转换为数字(在您的示例中是因素):
a_longer =
a %>%
pivot_longer(c(-Genus,-Group)) %>%
mutate(value = as.numeric(as.character(value)))
由于您实际上对summarise
和mean
使用了两个不同的分组,因此我建议从这里将wilcox.test
分成两部分,然后可以将表连接在一起:>
full_join(
a_longer %>%
group_by(Genus, Group) %>%
summarise(mean = mean(value)) %>%
pivot_wider(names_from = Group, names_prefix = "mean_", values_from = mean)
,
a_longer %>%
group_by(Genus) %>%
summarise(pvalue = wilcox.test(value ~ Group)$p.value)
)