我在Swirl中遇到了问题。第2周R编程环境第12章数据处理
Swirl希望我制作一张看起来完全像这样的表:
Pclass agecat Sex N survivors perc_survived
<int> <fctr> <chr> <int> <int> <dbl>
1 Under 15 female 2 1 50.000000
1 Under 15 male 3 3 100.000000
1 15 to 50 female 70 68 97.142857
1 15 to 50 male 72 32 44.444444
1 Over 50 female 13 13 100.000000
1 Over 50 male 26 5 19.230769
到目前为止,
>library(dplyr)
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)
>
> head (titanic_4)
# A tibble: 6 x 6
# Groups: Pclass, agecat [3]
Pclass agecat Sex N survivors perc_survived
<int> <fctr> <chr> <int> <int> <dbl>
1 1 Under 15 female 2 1 50.000000
2 1 Under 15 male 3 3 100.000000
3 1 15 to 50 female 70 68 97.142857
4 1 15 to 50 male 72 32 44.444444
5 1 Over 50 female 13 13 100.000000
6 1 Over 50 male 26 5 19.230769
>
现在,看起来我需要弄清楚如何将另一个组件添加到我的管道中,这将使表格的所有列都以对齐为中心。
到目前为止,我已经尝试将scale(center = TRUE,scale = TRUE)放到管道的末尾,这给了我colMeans中的错误(x,na.rm = TRUE):'x'必须是数字,因为比例仅适用于矩阵。
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)%>%
+ scale(center = TRUE, scale = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric
>
> head (titanic_4)
# A tibble: 6 x 6
# Groups: Pclass, agecat [3]
Pclass agecat Sex N survivors perc_survived
<int> <fctr> <chr> <int> <int> <dbl>
1 1 Under 15 female 2 1 50.000000
2 1 Under 15 male 3 3 100.000000
3 1 15 to 50 female 70 68 97.142857
4 1 15 to 50 male 72 32 44.444444
5 1 Over 50 female 13 13 100.000000
6 1 Over 50 male 26 5 19.230769
接下来,我尝试将格式(justify = center)放到管道的末尾,但结果并不是我想要的。
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)%>%
+ format(justify=center)
> print (titanic_4)
[1] "# A tibble: 18 x 6" "# Groups: Pclass, agecat [9]"
[3] " Pclass agecat Sex N survivors perc_survived" " <int> <fctr> <chr> <int> <int> <dbl>"
[5] " 1 1 Under 15 female 2 1 50.0000000" " 2 1 Under 15 male 3 3 100.0000000"
[7] " 3 1 15 to 50 female 70 68 97.1428571" " 4 1 15 to 50 male 72 32 44.4444444"
[9] " 5 1 Over 50 female 13 13 100.0000000" " 6 1 Over 50 male 26 5 19.2307692"
[11] " 7 2 Under 15 female 10 10 100.0000000" " 8 2 Under 15 male 9 9 100.0000000"
[13] " 9 2 15 to 50 female 61 56 91.8032787" "10 2 15 to 50 male 78 5 6.4102564"
[15] "11 2 Over 50 female 3 2 66.6666667" "12 2 Over 50 male 12 1 8.3333333"
[17] "13 3 Under 15 female 27 13 48.1481481" "14 3 Under 15 male 27 9 33.3333333"
[19] "15 3 15 to 50 female 74 33 44.5945946" "16 3 15 to 50 male 217 29 13.3640553"
[21] "17 3 Over 50 female 1 1 100.0000000" "18 3 Over 50 male 9 0 0.0000000"
此外,不是Swirl正在寻找的东西。 接下来,我尝试了format.data.frame(digits = 6,justify =“center”,width = 10)
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)%>%
+ format.data.frame(digits=6, justify= "centre", width=10)
> head (titanic_4)
Pclass agecat Sex N survivors perc_survived
1 1 Under 15 female 2 1 50.00000
2 1 Under 15 male 3 3 100.00000
3 1 15 to 50 female 70 68 97.14286
4 1 15 to 50 male 72 32 44.44444
5 1 Over 50 female 13 13 100.00000
6 1 Over 50 male 26 5 19.23077
接下来我尝试格式化(数字= 6,对齐=“中心”,宽度= 10)
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)%>%
+ format(digits=6, justify= "centre", width=10)
> head (titanic_4)
[1] "# A\n# tibble:\n# 18 x\n# 6" "# Groups: \n# Pclass,\n# agecat\n# [9]"
[3] " Pclass" " <int>"
[5] " 1 1" " 2 1"
接下来是
> titanic_4 <- titanic %>%
+ select(Survived, Pclass, Age, Sex) %>%
+ filter(!is.na(Age)) %>%
+ mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150),
+ include.lowest = TRUE,
+ labels = c("Under 15", "15 to 50",
+ "Over 50"))) %>%
+ group_by(Pclass,agecat,Sex) %>%
+ summarize(N=n(), survivors = sum(Survived))%>%
+ mutate(perc_survived = survivors/N*100)%>%
+ format(justify= "centre")
> head (titanic_4)
[1] "# A tibble: 18 x 6" "# Groups: Pclass, agecat [9]"
[3] " Pclass agecat Sex N survivors perc_survived" " <int> <fctr> <chr> <int> <int> <dbl>"
[5] " 1 1 Under 15 female 2 1 50.0000000" " 2 1 Under 15 male 3 3 100.0000000"
关于我如何让我的列居中对齐的想法与问题开头的列一致?
谢谢,
德鲁
当我试图将我的答案提交给漩涡时,漩涡给了我一个线索。
> submit()
| Sourcing your script...
| Keep trying!
| Check the example output in the comments of the script I opened for you. Make sure the columns in your output are in the same order as shown
| in the example output in the script comments. Try running your code in the console and printing out the result to determine whether it's
| successfully generating the desired final data frame before you submit it.
意见征询建议: 42先生注意到我的表包含rownames,而漩涡表没有rownames。我试图用他的建议来删除rownames,但是他们没有成功。
我试图关闭head中的rownames:
> head(titanic_4,rownames=FALSE)
# A tibble: 6 x 6
# Groups: Pclass, agecat [3]
Pclass agecat Sex N survivors perc_survived
<int> <fctr> <chr> <int> <int> <dbl>
1 1 Under 15 female 2 1 50.000000
2 1 Under 15 male 3 3 100.000000
3 1 15 to 50 female 70 68 97.142857
4 1 15 to 50 male 72 32 44.444444
5 1 Over 50 female 13 13 100.000000
6 1 Over 50 male 26 5 19.230769
我也试过打印:
> print( titanic_4, row.names=FALSE)
# A tibble: 18 x 6
# Groups: Pclass, agecat [9]
Pclass agecat Sex N survivors perc_survived
<int> <fctr> <chr> <int> <int> <dbl>
1 1 Under 15 female 2 1 50.0000000
2 1 Under 15 male 3 3 100.0000000
3 1 15 to 50 female 70 68 97.1428571
4 1 15 to 50 male 72 32 44.4444444
5 1 Over 50 female 13 13 100.0000000
6 1 Over 50 male 26 5 19.2307692
print.data.table:
> print.data.table(titanic_4,row.names=FALSE)
Error in print.data.table(titanic_4, row.names = FALSE) :
could not find function "print.data.table"