我有一个包含样本分类的数据框:
Seq_ID Family Father Mother Sex Role Type
<chr> <dbl> <chr> <chr> <chr> <chr> <chr>
1 SSC02219 11000. 0 0 Male Father Parent
2 SSC02217 11000. 0 0 Female Mother Parent
3 SSC02254 11000. SSC02219 SSC02217 Male Proband Child
4 SSC02220 11000. SSC02219 SSC02217 Female Sibling Child
5 SSC02184 11001. 0 0 Male Father Parent
6 SSC02181 11001. 0 0 Female Mother Parent
7 SSC02178 11001. SSC02184 SSC02181 Male Proband Child
8 SSC03092 11002. 0 0 Male Father Parent
9 SSC03078 11002. 0 0 Female Mother Parent
10 SSC03070 11002. SSC03092 SSC03078 Female Proband Child
目前,从a到b,我必须这样做:
library(tidyverse)
library(janitor)
sample.df %>% tabyl(Role, Sex) %>%
adorn_totals(where=c("row", "col") ) %>%
as.tibble() %>% select(1,4,3,2) %>%
# Part 2
mutate(type=c("parent", "parent", "child", "child", " ")) %>%
inner_join(., group_by(., type) %>%
summarise(total=sum(Total))) %>%
select(5,6,1,2,3,4)
我觉得这是一个非常简单的解决方法。在dplyr中有更直接的方法来完成第二部分吗?
答案 0 :(得分:2)
这是一个选项。 as.tibble
没有必要。当你有很多课程分配给&#34; parent&#34;时,mutate
case_when
更易于管理或者&#34;孩子&#34;。我们不需要inner_join
,因为我们可以使用group_by
和mutate
来计算total
。最后,我喜欢在使用select
函数时写下列名,因为将来我更容易阅读,但是你可以使用列索引,只要你确信列索引无论您在管道操作中包含哪些新分析,都将保持不变。
library(tidyverse)
library(janitor)
sample.df %>%
tabyl(Role, Sex) %>%
adorn_totals(where=c("row", "col")) %>%
select(Role, Total, Male, Female) %>%
# Part 2
mutate(type = case_when(
Role %in% c("Mother", "Father") ~"parent",
Role %in% c("Proband", "Sibling") ~"child",
TRUE ~" "
)) %>%
group_by(type) %>%
mutate(total = sum(Total)) %>%
ungroup() %>%
select(type, total, Role, Total, Male, Female)
# # A tibble: 5 x 6
# type total Role Total Male Female
# <chr> <dbl> <chr> <dbl> <dbl> <dbl>
# 1 parent 6. Father 3. 3. 0.
# 2 parent 6. Mother 3. 0. 3.
# 3 child 4. Proband 3. 2. 1.
# 4 child 4. Sibling 1. 0. 1.
# 5 " " 10. Total 10. 5. 5.
数据强>
library(tidyverse)
library(janitor)
sample.df <- read.table(text = "Seq_ID Family Father Mother Sex Role Type
1 SSC02219 11000 0 0 Male Father Parent
2 SSC02217 11000 0 0 Female Mother Parent
3 SSC02254 11000 SSC02219 SSC02217 Male Proband Child
4 SSC02220 11000 SSC02219 SSC02217 Female Sibling Child
5 SSC02184 11001 0 0 Male Father Parent
6 SSC02181 11001 0 0 Female Mother Parent
7 SSC02178 11001 SSC02184 SSC02181 Male Proband Child
8 SSC03092 11002 0 0 Male Father Parent
9 SSC03078 11002 0 0 Female Mother Parent
10 SSC03070 11002 SSC03092 SSC03078 Female Proband Child ",
header = TRUE, stringsAsFactors = FALSE)
sample.df <- as_tibble(sample.df)
答案 1 :(得分:1)
另一种选择可能是使用knitr
library(janitor)
library(tidyverse)
library(kableExtra)
library(knitr)
sample.df %>%
tabyl(Role, Sex) %>%
adorn_totals(where=c("row", "col")) %>%
# Part 2
mutate(type=case_when(
Role %in% c('Father', 'Mother') ~ 'parent',
Role %in% c('Proband', 'Sibling') ~ 'child',
TRUE ~ ''
)) %>%
group_by(type) %>%
mutate(total=sum(Total)) %>%
ungroup() %>%
kable("html") %>%
kable_styling(c("striped", "bordered")) %>%
collapse_rows(columns = c(5,6))
输出为:
示例数据:
sample.df <- structure(list(Seq_ID = c("SSC02219", "SSC02217", "SSC02254",
"SSC02220", "SSC02184", "SSC02181", "SSC02178", "SSC03092", "SSC03078",
"SSC03070"), Family = c(11000L, 11000L, 11000L, 11000L, 11001L,
11001L, 11001L, 11002L, 11002L, 11002L), Father = c("0", "0",
"SSC02219", "SSC02219", "0", "0", "SSC02184", "0", "0", "SSC03092"
), Mother = c("0", "0", "SSC02217", "SSC02217", "0", "0", "SSC02181",
"0", "0", "SSC03078"), Sex = c("Male", "Female", "Male", "Female",
"Male", "Female", "Male", "Male", "Female", "Female"), Role = c("Father",
"Mother", "Proband", "Sibling", "Father", "Mother", "Proband",
"Father", "Mother", "Proband"), Type = c("Parent", "Parent",
"Child", "Child", "Parent", "Parent", "Child", "Parent", "Parent",
"Child")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = c("tbl_df", "tbl", "data.frame"))