我有3列数据。需要根据第2列对第3列求和,第2列的离散值范围是1到1000。第2列的频率间隔应减小为10,即10、20、30、40、50 ... 1000 。步骤1应该是将第2列转换为频率间隔,然后步骤2应该是基于频率对第3列求和。 提供的数据如下:
structure(list(date3 = c(20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190913L, 20190913L, 20190913L, 20190913L,
20190913L, 20190913L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190914L, 20190914L, 20190914L, 20190914L,
20190914L, 20190914L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190915L, 20190915L, 20190915L, 20190915L,
20190915L, 20190915L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190916L, 20190916L, 20190916L, 20190916L,
20190916L, 20190916L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190917L, 20190917L, 20190917L, 20190917L,
20190917L, 20190917L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190918L, 20190918L, 20190918L, 20190918L,
20190918L, 20190918L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190919L, 20190919L, 20190919L, 20190919L,
20190919L, 20190919L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L, 20190920L, 20190920L, 20190920L, 20190920L,
20190920L, 20190920L), length = c(7L, 13L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L,
71L, 72L, 8L, 11L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 7L, 8L, 9L,
11L, 12L, 13L, 14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 67L, 68L, 4L, 11L, 12L, 13L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L,
2L, 6L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 5L, 7L, 8L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 2L, 3L, 4L, 5L, 7L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 1L, 3L, 7L,
11L, 12L, 13L, 14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 67L, 68L), talib = c(2, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 2, 2, 0, 3, 2, 1, 2, 3, 3, 5, 2, 1, 3, 1, 5, 1, 5, 4,
3, 6, 3, 4, 5, 8, 4, 4, 2, 18, 2, 3, 4, 7, 7, 2, 8, 3, 3, 10,
6, 7, 8, 9, 9, 3, 5, 10, 6, 8, 2, 0, 0, 0, 0, 1, 1, 3, 2, 4,
2, 5, 2, 9, 5, 1, 5, 6, 4, 5, 4, 16, 6, 11, 8, 11, 12, 7, 14,
12, 18, 19, 14, 18, 18, 22, 16, 18, 11, 18, 24, 39, 13, 26, 22,
21, 18, 31, 24, 31, 28, 27, 26, 43, 25, 26, 31, 37, 39, 28, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 5, 3, 5, 5, 6, 4, 6, 3, 8, 10, 2,
8, 17, 8, 8, 6, 17, 20, 9, 11, 13, 16, 10, 9, 24, 29, 25, 14,
14, 20, 20, 28, 24, 29, 27, 29, 24, 19, 29, 27, 30, 24, 30, 80,
32, 65, 26, 45, 40, 46, 0, 0, 0, 0, 0, 2, 4, 5, 5, 4, 1, 1, 2,
6, 7, 5, 2, 10, 3, 5, 9, 11, 6, 9, 6, 5, 12, 17, 10, 16, 11,
12, 9, 6, 12, 17, 14, 22, 10, 13, 18, 11, 33, 8, 9, 9, 20, 18,
12, 23, 20, 46, 22, 35, 26, 21, 12, 27, 26, 27, 0, 0, 1, 0, 0,
2, 3, 1, 3, 1, 3, 3, 6, 3, 1, 4, 7, 2, 4, 10, 6, 8, 11, 13, 7,
13, 5, 7, 10, 10, 13, 8, 13, 9, 15, 7, 14, 22, 16, 19, 20, 17,
13, 21, 19, 29, 22, 15, 17, 26, 20, 26, 24, 19, 25, 29, 26, 18,
34, 20, 0, 1, 1, 0, 0, 0, 1, 2, 2, 1, 3, 1, 2, 2, 1, 0, 1, 3,
4, 5, 5, 5, 1, 5, 9, 7, 9, 8, 14, 5, 11, 11, 12, 13, 10, 9, 11,
16, 198, 13, 13, 17, 8, 9, 26, 14, 19, 16, 17, 10, 22, 24, 20,
22, 33, 17, 26, 16, 35, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
3, 6, 10, 5, 4, 0, 3, 0, 3, 7, 5, 7, 7, 6, 6, 7, 7, 8, 11, 7,
9, 14, 15, 24, 11, 13, 13, 13, 16, 28, 20, 9, 15, 11, 13, 5,
11, 14, 12, 12, 20, 16, 20, 23, 30, 18, 20, 20, 33, 0, 0, 0,
1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 2, 7, 1, 1, 4, 3, 9, 6,
4, 6, 5, 11, 7, 7, 16, 8, 7, 7, 8, 7, 12, 11, 12, 4, 5, 17, 13,
14, 5, 15, 9, 6, 6, 15, 24, 8, 8, 13, 9, 11, 20, 7, 18, 16)), row.names = c(NA,
-480L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), groups = structure(list(date3 = 20190913:20190920, .rows = list(
1:60, 61:120, 121:180, 181:240, 241:300, 301:360, 361:420,
421:480)), row.names = c(NA, -8L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE))
对于第1列中的每个唯一值,该数据已分别划分为60行。因此,第2列的频率未达到1000,因此只有在此处发布500000行的整个数据时,才可以部署该数据;这不可行。
答案 0 :(得分:1)
我们可以使用cut
或findInterval
将数据划分为多个组,然后使用sum
。另外请注意,共享的dput
已按date
分组,因此,我们首先ungroup
。
library(dplyr)
df %>%
ungroup() %>%
mutate(temp = findInterval(length, seq(min(length), max(length), 10))) %>%
group_by(date3, temp) %>%
dplyr::summarise(sum_talib = sum(talib))
# date3 temp sum_talib
# <int> <int> <dbl>
# 1 20190913 1 2
# 2 20190913 2 3
# 3 20190913 3 10
# 4 20190913 4 26
# 5 20190913 5 46
# 6 20190913 6 56
# 7 20190913 7 70
# 8 20190913 8 14
# 9 20190914 1 2
#10 20190914 2 11
# … with 47 more rows
在基数R中,我们可以使用transform
添加新列,然后使用aggregate
aggregate(talib~date3+temp, transform(df,
temp = findInterval(length, seq(min(length), max(length), 10))), sum)