对于示例数据框:
df1 <- structure(list(name = c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z", "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z", "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z"), amount = c(5.5, 5.4, 5.2, 5.3, 5.1,
5.1, 5, 5, 4.9, 4.5, 6, 5.9, 5.7, 5.4, 5.3, 5.1, 5.6, 5.4, 5.3,
5.6, 4.6, 4.2, 4.5, 4.2, 4, 3.8, 6, 5.8, 5.7, 5.6, 5.3, 5.6,
5.4, 5.5, 5.4, 5.1, 9, 8.8, 8.6, 8.4, 8.2, 8, 7.8, 7.6, 7.4,
7.2, 6, 5.75, 5.5, 5.25, 5, 4.75, 10, 8.9, 7.8, 6.7, 5.6, 4.5,
3.4, 2.3, 1.2, 0.1, 6, 5.8, 5.7, 5.6, 5.5, 5.5, 5.4, 5.6, 5.8,
5.1, 6, 5.5, 5.4, 5.3, 5.2, 5.1), decile = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L), time = c(2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L)), .Names = c("name", "amount",
"decile", "time"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-78L), spec = structure(list(cols = structure(list(name = structure(list(), class = c("collector_character",
"collector")), amount = structure(list(), class = c("collector_double",
"collector")), decile = structure(list(), class = c("collector_integer",
"collector")), time = structure(list(), class = c("collector_integer",
"collector"))), .Names = c("name", "amount", "decile", "time"
)), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
我想生成一个汇总表,详细列出十分位组1和2之间的平均差异(即十分位1的平均结果减去十分位2组的平均结果),2和3、3和4、4和5、5和每年分别为6、6、7、7、8、8、9、9和10。
有人有什么建议吗?
答案 0 :(得分:5)
使用df['a'] = df['a'].str.replace('[^a-zA-Z]+', '')
print (df)
a
0 apple
1 orangemg
:
dplyr
答案 1 :(得分:1)
您也可以这样做:
library(tidyverse)
for (i in 1:9) {
df1 <- df1 %>%
group_by(time) %>%
mutate_(
.dots = setNames(list(
paste0("mean(amount[decile ==", i, "], na.rm = TRUE) - mean(amount[decile == ", i ,"+ 1], na.rm = TRUE)")),
paste0("mean_", i, "_", i + 1))
)
}
输出为:
# A tibble: 78 x 13
# Groups: time [3]
name amount decile time mean_1_2 mean_2_3 mean_3_4 mean_4_5 mean_5_6 mean_6_7 mean_7_8 mean_8_9 mean_9_10
<chr> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 5.5 1 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
2 b 5.4 2 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
3 c 5.2 3 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
4 d 5.3 4 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
5 e 5.1 5 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
6 f 5.1 6 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
7 g 5 7 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
8 h 5 8 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
9 i 4.9 9 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
10 j 4.5 10 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
# ... with 68 more rows
每年您都可以获取完整的摘要,如下所示:
df1 <- df1 %>% ungroup() %>%
select(time, starts_with("mean")) %>%
distinct()
输出:
# A tibble: 3 x 10
time mean_1_2 mean_2_3 mean_3_4 mean_4_5 mean_5_6 mean_6_7 mean_7_8 mean_8_9 mean_9_10
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
2 2017 0.263 0.0625 0.213 0.237 0.0875 -1.06 0.0500 0.150 0.25
3 2018 0.600 0.433 0.433 0.433 0.4 0.633 0.45 0.450 0.9