Question

我想根据发现的物种来计算模态的频率。

这里是数据帧，我想统计每个type _gite的数量，并统计仅找到aegypti的那些，仅发现albo的那些，并混合两个被发现在一起。

type_gite            aegypti          albopictus                    total 
recipient_abandonne       19                   0                       19
recipient_stockage         0                   2                        2
recipient_stockage         8                   0                        8
recipient_stockage        36                   0                       36
recipient_stockage        13                   0                       13
recipient_stockage         1                   3                        4
autres                     0                   1                        1
autres                     0                   9                        9
recipient_abandonne        3                   0                        3

这是它的外观：

type gite             aegypti  albopictus  mixed  total
recipient_abandonne         2           0      0      2
recipient stockage          3           1      1      5
autres                      0           2      0      2
total                       5           3      1      9

哪种代码或聚合公式最适合？}

Answer 1

这是我想出的：

#create data
df = data.frame(type_gite = c('recipient_abandonne', 'recipient_stockage', 'recipient_stockage', 
                              'recipient_stockage', 'recipient_stockage', 'recipient_stockage', 'autres', 'autres',
                              'recipient_abandonne'),
                aegyti_collected = c(19, 0, 8, 36,13,1,0,0,3),
                albopictus_collected = c(0,2,0,0,0,3,1,9,0),
                total_collected = c(19,2,8,36,13,4,1,9,3))

#Classify as Mixed or only one of species using case when
df$label = case_when(df$albopictus_collected == 0 ~ 'Aegyti Only',
                                          df$aegyti_collected == 0 ~ 'Albopictus Only',
                                          TRUE ~'Mixed')

#frequency table
df = data.frame(rbind(table(df$type_gite, df$label)))

#add column title back in
df = df %>% tibble::rownames_to_column(var = 'type_gite')

#create total column
library(janitor)
df = df %>% adorn_totals("col")

Answer 2

您可以使用dplyr和janitor（以获得Total行）来实现所需的功能：

#install.packages("janitor")
#install.packages("dplyr") 
library(dplyr)

df1 %>% select(-total_collected) %>% group_by(type_gite) %>% 
  mutate(mixed = +(aegyti_collected * albopictus_collected > 0)) %>%  
  mutate_at(vars(aegyti_collected:albopictus_collected), list(~+(. > 0)*!(mixed))) %>% 
  summarise_all(sum)  %>% janitor::adorn_totals(c("row", "col"))

    #>            type_gite aegyti_collected albopictus_collected mixed Total
    #>               autres                0                    2     0     2
    #>  recipient_abandonne                2                    0     0     2
    #>   recipient_stockage                3                    1     1     5
    #>                Total                5                    3     1     9

数据：

df1 <- structure(list(type_gite = structure(c(2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L), 
        .Label = c("autres", "recipient_abandonne", "recipient_stockage"), 
        class = "factor"), 
        aegyti_collected = c(19, 0, 8, 36, 13, 1, 0, 0, 3), 
        albopictus_collected = c(0, 2, 0, 0, 0, 3, 1, 9, 0), 
        total_collected = c(19, 2, 8, 36, 13, 4, 1, 9, 3)), 
        class = "data.frame", row.names = c(NA, -9L))

^{由reprex package（v0.2.1）于2019-04-30创建}

Answer 3

我认为您正在寻找类似的东西。我以一些随机虚拟数据为例。

document.getElementById("BUTTON IDs").click();

计算变量组的频率

3 个答案: