我有一张桌子,下面是:
+----------+------------+-------------+-------+
| CUSTOMER | BILL_DT | DELIVERY_DT | UNITS |
+----------+------------+-------------+-------+
| A | 2007-12-07 | 2008-11-04 | 1 |
| A | 2012-11-21 | 2013-01-31 | 1 |
| A | 1992-12-11 | 1993-05-18 | 1 |
| A | 2018-09-06 | 2019-05-28 | 1 |
| A | 2004-11-29 | 2005-10-07 | 1 |
| B | 2003-02-27 | 2004-03-22 | 1 |
| B | 2017-11-13 | 2018-10-19 | 1 |
| B | 2018-04-07 | 2018-05-18 | 2 |
| B | 2019-06-18 | 2019-11-25 | 1 |
| C | 2018-05-18 | 2018-08-11 | 1 |
| C | 2018-03-16 | 2018-05-30 | 1 |
| C | 2006-12-22 | 2007-08-17 | 1 |
+----------+------------+-------------+-------+
我希望基于bill_dt
和delivery_dt
汇总UNITS。
所以对于February 2020
,我的标准是:
df %>%
group_by(CUSTOMER) %>%
filter(
BILL_DT>='2009-01-01' & BILL_DT<='2020-02-29',
DELIVERY_DT>= '2020-02-29'| is.na(DELIVERY_DT)
) %>%
summarize(sumiv=sum(UNITS),
DATE= format(as.Date('2020-02-01', "%Y-%m-%d"), "%Y-%m"))
对于Jan 2020
,应该是
df %>%
group_by(CUSTOMER) %>%
filter(
BILL_DT>='2009-01-01' & BILL_DT<='2020-01-31',
DELIVERY_DT>= '2020-01-31'| is.na(DELIVERY_DT)
) %>%
summarize(sumiv=sum(UNITS),
DATE= format(as.Date('2020-01-01', "%Y-%m-%d"), "%Y-%m"))
我希望重复此操作直到DATE = 2019-02
,然后将它们全部绑定在一起。
是否有一种循环获取结果的方法?感谢您的提前帮助。
答案 0 :(得分:0)
library(tidyverse)
date.end.month <- seq(from = as.Date("2009-01-01"), to = as.Date("2020-03-01"), by = "months")-1
bill_dev <- function(theDate) {
df %>%
filter(BILL_DT>='2009-01-01') %>%
group_by(CUSTOMER) %>%
filter(
BILL_DT <= theDate,
DELIVERY_DT >= theDate | is.na(DELIVERY_DT)
) %>%
summarize(sumiv=sum(UNITS),
DATE= format(theDate, "%Y-%m"))
}
do.call(rbind, lapply(date.end.month, bill_dev)) %>%
arrange(CUSTOMER, DATE) %>%
print(n=32)
输出
# A tibble: 32 x 3
CUSTOMER sumiv DATE
<chr> <int> <chr>
1 A 1 2012-11
2 A 1 2012-12
3 A 1 2013-01
4 A 1 2018-09
5 A 1 2018-10
6 A 1 2018-11
7 A 1 2018-12
8 A 1 2019-01
9 A 1 2019-02
10 A 1 2019-03
11 A 1 2019-04
12 B 1 2017-11
13 B 1 2017-12
14 B 1 2018-01
15 B 1 2018-02
16 B 1 2018-03
17 B 3 2018-04
18 B 1 2018-05
19 B 1 2018-06
20 B 1 2018-07
21 B 1 2018-08
22 B 1 2018-09
23 B 1 2019-06
24 B 1 2019-07
25 B 1 2019-08
26 B 1 2019-09
27 B 1 2019-10
28 C 1 2018-03
29 C 1 2018-04
30 C 1 2018-05
31 C 1 2018-06
32 C 1 2018-07
数据
df <- structure(list(CUSTOMER = c("A", "A", "A", "A", "A", "B", "B",
"B", "B", "C", "C", "C"), BILL_DT = structure(c(13854, 15665,
8380, 17780, 12751, 12110, 17483, 17628, 18065, 17669, 17606,
13504), class = "Date"), DELIVERY_DT = structure(c(14187, 15736,
8538, 18044, 13063, 12499, 17823, 17669, 18225, 17754, 17681,
13742), class = "Date"), UNITS = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L)), row.names = c(NA, -12L), class = "data.frame")
答案 1 :(得分:0)
我们可以使用map2
遍历两个向量的列表并执行filter
library(dplyr)
library(purrr)
library(lubridate)
map2_dfr(head(dates1, -1) tail(dates2 -1), ~
df %>%
group_by(CUSTOMER) %>%
filter(BILL_DT>= '2009-01-01' & BILL_DT<=.x,
DELIVERY_DT>= .x| is.na(DELIVERY_DT)
) %>%
summarize(sumiv=sum(UNITS),
DATE= format(.y, "%Y-%m"))) %>%
arrange(CUSTOMER, DATE)
dates1 <- seq(as.Date('2019-02-01'), length.out = 14, by = '1 month')
dates2 <- dates1 - day(1)
答案 2 :(得分:0)
我们可以使用data.table
软件包:
df <- structure(list(CUSTOMER = c("A", "A", "A", "A", "A", "B", "B",
"B", "B", "C", "C", "C"), BILL_DT = structure(c(13854, 15665,
8380, 17780, 12751, 12110, 17483, 17628, 18065, 17669, 17606,
13504), class = "Date"), DELIVERY_DT = structure(c(14187, 15736,
8538, 18044, 13063, 12499, 17823, 17669, 18225, 17754, 17681,
13742), class = "Date"), UNITS = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L)), row.names = c(NA, -12L), class = "data.frame")
start_end <- seq(as.Date("2009-02-01"), as.Date("2019-03-01"), by = "months") - 1
start_end <- setNames(format(start_end, "%Y-%m"), start_end)
library(data.table)
df2 <- setDT(df)[BILL_DT >= '2009-01-01']
rbindlist(
lapply(names(start_end), function(x) {
df2[BILL_DT <= x & (DELIVERY_DT >= x | is.na(DELIVERY_DT))
][, .(sumiv = sum(UNITS), DATE = start_end[x]), CUSTOMER]
})
)
# CUSTOMER sumiv DATE
# 1: A 1 2012-11
# 2: A 1 2012-12
# 3: A 1 2013-01
# 4: B 1 2017-11
# 5: B 1 2017-12
# 6: B 1 2018-01
# 7: B 1 2018-02
# 8: B 1 2018-03
# 9: C 1 2018-03
# 10: B 3 2018-04
# 11: C 1 2018-04
# 12: B 1 2018-05
# 13: C 1 2018-05
# 14: B 1 2018-06
# 15: C 1 2018-06
# 16: B 1 2018-07
# 17: C 1 2018-07
# 18: B 1 2018-08
# 19: A 1 2018-09
# 20: B 1 2018-09
# 21: A 1 2018-10
# 22: A 1 2018-11
# 23: A 1 2018-12
# 24: A 1 2019-01
# 25: A 1 2019-02