有人可以帮助我解决以下问题。我在附加图像中有这个data.frame需要处理
从交易代码1-开始,必须跟踪客户的所有交易,并且必须汇总金额30天的时间范围。并且计数器再次为客户重新启动,以使用事务代码1为另一个事务创建新的聚合记录。事务代码0或1独立的事务仍然是输出数据帧的一部分,但是作为未聚合的行。
我尝试了dplyr,group_by函数并进行了总结,但我遇到的部分是在函数中包含条件以获得正确的答案。
如果我在控制台中输入dput(df)
,我会收到以下信息:
> dput(df) structure(list(Customer_ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3), date = structure(c(1L, 2L, 3L, 4L, 7L, 10L, 11L, 5L, 6L, 8L, 9L, 12L), .Label = c("01/01/2016", "02/01/2016", "02/15/2016", "02/30/2016", "04/01/2016", "04/20/2016", "05/01/2016", "05/05/2016", "06/01/2016", "07/01/2016", "07/15/2016", "10/01/2016"), class = "factor"), Amount = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Transaction_Code = c(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1)), .Names = c("Customer_ID", "date", "Amount", "Transaction_Code"), row.names = c(NA, -12L ), class = "data.frame")
答案 0 :(得分:0)
#your data frame
x <- structure(list(Customer_ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3), date = structure(c(1L, 2L, 3L, 4L, 7L, 10L, 11L, 5L, 6L, 8L, 9L, 12L), .Label = c("01/01/2016", "02/01/2016", "02/15/2016", "02/28/2016", "04/01/2016", "04/20/2016", "05/01/2016", "05/05/2016", "06/01/2016", "07/01/2016", "07/15/2016", "10/01/2016"), class = "factor"), Amount = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Transaction_Code = c(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1)), .Names = c("Customer_ID", "date", "Amount", "Transaction_Code"), row.names = c(NA, -12L ), class = "data.frame")
#changed date '2/30/2016' to '2/28/2016' to recognize as a real date
x$date <- as.Date(x$date, format = "%m/%d/%Y")
library(dplyr)
x <- x %>% group_by(Customer_ID) %>%
#calculate difference between current row date and prev row date (in days)
mutate(date2 = as.numeric(difftime(date, lag(date,1)))) %>%
#convert NAs to 0 for start of each customer (needed for cumsum function)
mutate(date2 = ifelse(is.na(date2), 0, date2)) %>%
#convert cumsum of days into ranges of 30 & truncate to remove decimals
mutate(date3 = trunc((cumsum(date2))/30)) %>%
#re-group by customer and date range period
group_by(Customer_ID,date3) %>%
#calculate total Amount per range period
summarise(totAmt = sum(Amount))
print(x)
Source: local data frame [8 x 3]
Groups: Customer_ID [?]
Customer_ID date3 totAmt
<dbl> <dbl> <dbl>
1 1 0 1
2 1 1 3
3 1 4 1
4 1 6 2
5 2 0 2
6 2 1 1
7 3 0 1
8 3 4 1