我正在寻找R中的一种有效方法来推导可能的组合。我有一个包含10列的数据框,并基于1列和2列,我想计算报告值的所有组合(例如,当whit_spouse和whit_alone在4:00等于Reported时)。
structure(list(id = 1:6, time = structure(c(1L, 1L, 1L, 1L, 1L,
1L), .Label = c("04:00", "04:10", "04:20", "04:30", "04:40",
"04:50", "05:00", "05:10", "05:20", "05:30", "05:40", "05:50",
"06:00", "06:10", "06:20", "06:30", "06:40", "06:50", "07:00",
"07:10", "07:20", "07:30", "07:40", "07:50", "08:00", "08:10",
"08:20", "08:30", "08:40", "08:50", "09:00", "09:10", "09:20",
"09:30", "09:40", "09:50", "10:00", "10:10", "10:20", "10:30",
"10:40", "10:50", "11:00", "11:10", "11:20", "11:30", "11:40",
"11:50", "12:00", "12:10", "12:20", "12:30", "12:40", "12:50",
"13:00", "13:10", "13:20", "13:30", "13:40", "13:50", "14:00",
"14:10", "14:20", "14:30", "14:40", "14:50", "15:00", "15:10",
"15:20", "15:30", "15:40", "15:50", "16:00", "16:10", "16:20",
"16:30", "16:40", "16:50", "17:00", "17:10", "17:20", "17:30",
"17:40", "17:50", "18:00", "18:10", "18:20", "18:30", "18:40",
"18:50", "19:00", "19:10", "19:20", "19:30", "19:40", "19:50",
"20:00", "20:10", "20:20", "20:30", "20:40", "20:50", "21:00",
"21:10", "21:20", "21:30", "21:40", "21:50", "22:00", "22:10",
"22:20", "22:30", "22:40", "22:50", "23:00", "23:10", "23:20",
"23:30", "23:40", "23:50", "00:00", "00:10", "00:20", "00:30",
"00:40", "00:50", "01:00", "01:10", "01:20", "01:30", "01:40",
"01:50", "02:00", "02:10", "02:20", "02:30", "02:40", "02:50",
"03:00", "03:10", "03:20", "03:30", "03:40", "03:50"), class = "factor"),
whereat = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("At home",
"Not At home"), class = "factor"), whit_spouse = structure(c(2L,
2L, 2L, 1L, 2L, 2L), .Label = c("Not reported", "Reported"
), class = "factor"), whit_alone = structure(c(1L, 1L, 1L,
2L, 1L, 1L), .Label = c("Not reported", "Reported"), class = "factor"),
whit_mother = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not reported",
"Reported"), class = "factor"), whit_father = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Not reported", "Reported"
), class = "factor"), whit_otherpeople = structure(c(1L,
1L, 2L, 1L, 1L, 1L), .Label = c("Not reported", "Reported"
), class = "factor"), whit_otherpeopleoutsidehh = structure(c(1L,
1L, 2L, 1L, 1L, 1L), .Label = c("Not reported", "Reported"
), class = "factor"), whit_child_con = structure(c(1L, 1L,
2L, 1L, 1L, 1L), .Label = c("Not reported", "Reported"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
Example of input:
id time whereat whit_spouse whit_alone whit_mother whit_father
1 04:00 At home Reported Not reported Reported Reported...
2 04:00 At home Reported Not reported Not reported Not reported ....
输出示例:
id time whereat Together (Reported) Frequency
1 04:00 At home whit_mother, wit_father 1
2 04:00 At home - 0
最后,我想确定最常见的组合:
例如在一起
答案 0 :(得分:2)
您可以使用tidyverse
来做到这一点。这是通过“时间”,“ whit_spouse”和“ whit_alone”计算所有组合出现次数的代码。如果要总结其他类型的变量,可以使用summarize
。
library(tidyverse)
df %>%
group_by(time, whit_alone, whit_spouse) %>%
count()
答案 1 :(得分:1)
我们可以使用data.table
library(data.table)
setDT(df)[, .N, .(time, whit_alone, whit_spouse)]