我想使一个变量与data.frame中的所有其他变量的交叉表。
library(tidyverse)
library(janitor)
humans <- starwars %>%
filter(species == "Human")
humans %>%
janitor::tabyl(gender, eye_color)
gender blue blue-gray brown dark hazel yellow
female 3 0 5 0 1 0
male 9 1 12 1 1 2
humans %>%
dplyr::select_if(is.character) %>%
dplyr::select(-name, -gender) %>%
purrr::map(.f = ~janitor::tabyl(dat = humans, gender, .x))
Error: Unknown columns `blond`, `none`, `brown`, `brown, grey`, `brown` and ...
Call `rlang::last_error()` to see a backtrace
答案 0 :(得分:3)
假设我们需要带有“性别”的成对表
humans %>%
dplyr::select_if(is.character) %>%
dplyr::select(-name, -gender) %>%
imap(~ tibble(!! .y := .x) %>%
mutate(gender = humans[['gender']]) %>%
janitor::tabyl(!!rlang::sym(names(.)[1]), gender))
#$hair_color
# hair_color female male
# auburn 1 0
# auburn, grey 0 1
# auburn, white 0 1
# black 1 7
# blond 0 3
# brown 6 8
# brown, grey 0 1
# grey 0 1
# none 0 3
# white 1 1
#$skin_color
# skin_color female male
# dark 0 4
# fair 3 13
# light 6 5
#...
xtable::xtableList
要求list
元素中的名称必须相同。为此,请在list
元素上更改第一列名称,然后创建一个标识符列
library(xtable)
humans %>%
dplyr::select_if(is.character) %>%
dplyr::select(-name, -gender) %>%
imap(~ tibble(!! .y := .x) %>%
mutate(gender = humans[['gender']]) %>%
janitor::tabyl(!!rlang::sym(names(.)[1]), gender) %>%
mutate(colNname = .y) %>%
rename_at(1, ~ 'Variable')) %>%
xtableList
答案 1 :(得分:3)
tably
将名称作为参数,然后您将向量传递给它。
如果使用imap
,则可以访问列的名称,可以将其转换为符号,并且由于janitor
支持准引号,因此您可以编写:
humans %>%
select_if(is.character) %>%
select(-name, -gender) %>%
imap(.f = ~janitor::tabyl(dat = humans, !!sym(.y), gender))
#$`hair_color`
# hair_color female male
# auburn 1 0
# auburn, grey 0 1
# auburn, white 0 1
# black 1 7
# blond 0 3
# brown 6 8
# brown, grey 0 1
# grey 0 1
# none 0 3
# white 1 1
#
# $skin_color
# skin_color female male
# dark 0 4
# fair 3 13
有趣的是,tabyl.data.frame
调用了一个对符号起作用的未导出函数,因此通过直接调用它,我们可以跳过取消引号的操作并使用基数R。
cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
lapply(cols, function(x) janitor:::tabyl_2way(humans, as.name(x), quote(gender)))
# [[1]]
# hair_color female male
# auburn 1 0
# auburn, grey 0 1
# auburn, white 0 1
# black 1 7
# blond 0 3
# brown 6 8
# brown, grey 0 1
# grey 0 1
# none 0 3
# white 1 1
#
# [[2]]
# skin_color female male
# dark 0 4
要使其与xtable
一起使用,@ akrun的建议在这里也适用:
humans %>%
select_if(is.character) %>%
select(-name, -gender) %>%
imap(.f = ~tabyl(dat = humans, !!sym(.y), gender) %>% rename_at(1,~"x")) %>%
xtableList
或
cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
l <- lapply(cols, function(x) {
res <- janitor:::tabyl_2way(humans, as.name(x), quote(gender))
names(res)[1] <- "x"
res
})
xtableList(l)
答案 2 :(得分:0)
仅使用data.table
(和一个%>%
):
library(data.table)
swDT <- data.table(starwars)
setkey(swDT, gender, hair_color)
swDT[species == "Human"
][CJ(gender, hair_color, unique =TRUE), .N, .EACHI] %>%
dcast(hair_color ~ gender, value.var = "N")
hair_color female male
1: auburn 1 0
2: auburn, grey 0 1
3: auburn, white 0 1
4: black 1 7
5: blond 0 3
6: brown 6 8
7: brown, grey 0 1
8: grey 0 1
9: none 0 3
10: white 1 1
答案 3 :(得分:0)
starwars
中的列表列增加了复杂性,但这是mtcars
的示例:将交叉表cyl
与所有其他变量进行比较。
mtcars %>%
tidyr::gather(var, value, -cyl) %>%
janitor::tabyl(cyl, value, var, show_missing_levels = FALSE) %>%
purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y))
返回交叉表列表。 cyl x am,cyl x carb等:
$`am`
am
cyl 0 1
4 3 8
6 4 3
8 12 2
$carb
carb
cyl 1 2 3 4 6 8
4 5 6 0 0 0 0
6 2 0 0 4 1 0
8 0 4 3 6 0 1
...
如果您要进一步处理这些data.frame,则可能会更友好地找到该标题选项:
purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y, placement = "combined"))
哪个给你:
$vs
cyl/vs 0 1
4 1 10
6 3 4
8 14 0