Question

我在Swirl中遇到了问题。第2周R编程环境第12章数据处理

Swirl希望我制作一张看起来完全像这样的表：

 Pclass   agecat    Sex      N     survivors   perc_survived
 <int>   <fctr>    <chr>   <int>     <int>         <dbl>
   1    Under 15  female     2         1        50.000000
   1    Under 15    male     3         3       100.000000
   1    15 to 50  female    70        68        97.142857
   1    15 to 50    male    72        32        44.444444
   1    Over 50   female    13        13       100.000000
   1    Over 50     male    26         5        19.230769

到目前为止，

>library(dplyr)
> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)
> 
> head (titanic_4)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769
>

现在，看起来我需要弄清楚如何将另一个组件添加到我的管道中，这将使表格的所有列都以对齐为中心。

到目前为止，我已经尝试将scale（center = TRUE，scale = TRUE）放到管道的末尾，这给了我colMeans中的错误（x，na.rm = TRUE）：'x'必须是数字，因为比例仅适用于矩阵。

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     scale(center = TRUE, scale = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric
> 
> head (titanic_4)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769

接下来，我尝试将格式（justify = center）放到管道的末尾，但结果并不是我想要的。

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(justify=center)
> print (titanic_4)
 [1] "# A tibble: 18 x 6"                                      "# Groups:   Pclass, agecat [9]"                         
 [3] "   Pclass   agecat    Sex     N survivors perc_survived" "    <int>   <fctr>  <chr> <int>     <int>         <dbl>"
 [5] " 1      1 Under 15 female     2         1    50.0000000" " 2      1 Under 15   male     3         3   100.0000000"
 [7] " 3      1 15 to 50 female    70        68    97.1428571" " 4      1 15 to 50   male    72        32    44.4444444"
 [9] " 5      1  Over 50 female    13        13   100.0000000" " 6      1  Over 50   male    26         5    19.2307692"
[11] " 7      2 Under 15 female    10        10   100.0000000" " 8      2 Under 15   male     9         9   100.0000000"
[13] " 9      2 15 to 50 female    61        56    91.8032787" "10      2 15 to 50   male    78         5     6.4102564"
[15] "11      2  Over 50 female     3         2    66.6666667" "12      2  Over 50   male    12         1     8.3333333"
[17] "13      3 Under 15 female    27        13    48.1481481" "14      3 Under 15   male    27         9    33.3333333"
[19] "15      3 15 to 50 female    74        33    44.5945946" "16      3 15 to 50   male   217        29    13.3640553"
[21] "17      3  Over 50 female     1         1   100.0000000" "18      3  Over 50   male     9         0     0.0000000"

此外，不是Swirl正在寻找的东西。接下来，我尝试了format.data.frame（digits = 6，justify =“center”，width = 10）

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format.data.frame(digits=6, justify= "centre", width=10)
> head (titanic_4)
      Pclass     agecat        Sex          N  survivors perc_survived
1          1  Under 15    female            2          1      50.00000
2          1  Under 15     male             3          3     100.00000
3          1  15 to 50    female           70         68      97.14286
4          1  15 to 50     male            72         32      44.44444
5          1  Over 50     female           13         13     100.00000
6          1  Over 50      male            26          5      19.23077

接下来我尝试格式化（数字= 6，对齐=“中心”，宽度= 10）

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(digits=6, justify= "centre", width=10)
> head (titanic_4)
[1] "# A\n#   tibble:\n#   18 x\n#   6"             "# Groups:  \n#   Pclass,\n#   agecat\n#   [9]"
[3] "   Pclass"                                     "    <int>"                                    
[5] " 1      1"                                     " 2      1"

接下来是

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(justify= "centre")
> head (titanic_4)
[1] "# A tibble: 18 x 6"                                      "# Groups:   Pclass, agecat [9]"                         
[3] "   Pclass   agecat    Sex     N survivors perc_survived" "    <int>   <fctr>  <chr> <int>     <int>         <dbl>"
[5] " 1      1 Under 15 female     2         1    50.0000000" " 2      1 Under 15   male     3         3   100.0000000"

关于我如何让我的列居中对齐的想法与问题开头的列一致？

谢谢，

德鲁

当我试图将我的答案提交给漩涡时，漩涡给了我一个线索。

> submit()

| Sourcing your script...


| Keep trying!

| Check the example output in the comments of the script I opened for you. Make sure the columns in your output are in the same order as shown
| in the example output in the script comments. Try running your code in the console and printing out the result to determine whether it's
| successfully generating the desired final data frame before you submit it.

意见征询建议： 42先生注意到我的表包含rownames，而漩涡表没有rownames。我试图用他的建议来删除rownames，但是他们没有成功。

我试图关闭head中的rownames：

> head(titanic_4,rownames=FALSE)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769

我也试过打印：

> print( titanic_4, row.names=FALSE)
# A tibble: 18 x 6
# Groups:   Pclass, agecat [9]
   Pclass   agecat    Sex     N survivors perc_survived
    <int>   <fctr>  <chr> <int>     <int>         <dbl>
 1      1 Under 15 female     2         1    50.0000000
 2      1 Under 15   male     3         3   100.0000000
 3      1 15 to 50 female    70        68    97.1428571
 4      1 15 to 50   male    72        32    44.4444444
 5      1  Over 50 female    13        13   100.0000000
 6      1  Over 50   male    26         5    19.2307692

print.data.table：

> print.data.table(titanic_4,row.names=FALSE)
Error in print.data.table(titanic_4, row.names = FALSE) : 
  could not find function "print.data.table"

对齐表中心

0 个答案: