考虑以下数据:
library(dplyr)
df <- structure(list(Total = c(3450, 1728, 122, 5300),
A1 = c(1092, 497, 4, 1593),
A2 = c(596, 156, 29, 781),
A3 = c(801, 417, 36, 1254),
A4 = c(107, 11, 0, 118),
A5 = c(614, 217, 21, 852),
A6 = c(132, 47, 0, 179),
A7 = c(108, 383, 32, 523)),
.Names = c("Total", paste0("A", 1:7)),
row.names = c(paste0("B", 1:3), "Total"),
class = "data.frame")
Total A1 A2 A3 A4 A5 A6 A7
B1 3450 1092 596 801 107 614 132 108
B2 1728 497 156 417 11 217 47 383
B3 122 4 29 36 0 21 0 32
Total 5300 1593 781 1254 118 852 179 523
问题如下:让行Total
为分母,计算同一列中每个单元格与行Total
的百分比。
在这种情况下,我会做
df_names <- rownames(df)
df2 <- apply(
round(
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100,
digits = 2),
MARGIN = 2,
FUN = paste0,
"%")
rownames(df2) <- df_names
df2 <- as.data.frame(df2)
Total A1 A2 A3 A4 A5 A6 A7
B1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
B2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
B3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
Total 100% 100% 100% 100% 100% 100% 100% 100%
但,虽然这有效,但我更喜欢使用%>%
:
df3 <- sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>%
round(digits = 2) %>%
apply(., MARGIN = 2, FUN = paste0, "%")
Error in apply(., MARGIN = 2, FUN = paste0, "%") :
dim(X) must have a positive length
为什么使用%>%
的上述方法不起作用?
答案 0 :(得分:4)
问题是尾随数字100以及它如何进入管道。使用()
和apply的环绕可以正常工作,或者用于较少参与的解决方案:
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% round(digits = 2) %>%
mutate_all(function(x)sprintf('%s%%', x))
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%
%>%
需要两个参数,并生成一个环境。参数为lhs
和rhs
,左侧为输入值
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>% print
[1] 100
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
现在有了parens
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% print
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
括号也有效
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") %>% {
round(. * 100, 2) %>% mutate_all(function(x)sprintf('%s%%',x))
}
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%