我是R编程的初学者,希望获得有关在数据集上执行日期运算的帮助(希望可以在R中做到这一点),
我已将给定的数据集导入为“交易”
deals <- read.csv("deals_dates.csv")
带有数据(输入),
Deal_Id Deal_Name Start_Date End_Date
1 Samsung Mobile 3/5/2018 6/23/2018
2 Apple Watch 12/21/2017 2/9/2018
我想找到每个交易在给定开始日期/结束日期范围之间的每月开始和结束日期,
(预期输出)
Deal_Id Deal_Name Start_Date End_Date
1 Samsung Mobile 3/5/2018 3/31/2018
1 Samsung Mobile 4/1/2018 4/30/2018
1 Samsung Mobile 5/1/2018 5/31/2018
1 Samsung Mobile 6/1/2018 6/23/2018
2 Apple Watch 12/21/2017 12/31/2017
2 Apple Watch 1/1/2018 1/31/2018
2 Apple Watch 2/1/2018 2/9/2018
谢谢!
答案 0 :(得分:1)
您还可以仅使用Vanilla R
来执行某些操作。例如,具有以下功能:
myDateFunction <-function(dealName,stdate,endate){
firstDay <- cut(as.Date(stdate,format = "%m/%d/%Y"), "month")
Start <-seq(as.Date(firstDay), as.Date(endate,format = "%m/%d/%Y"), by = "month")
DealName <- rep(dealName,length(Start))
plusOne<-seq(as.Date(firstDay), by = "month", length = 2)[2]
End<-seq(as.Date(plusOne),length=4,by="months")-1
data <- data.frame(DealName,Start,End)
data$Start[1] <- as.Date(stdate,format = "%m/%d/%Y")
data$End[-1][3] <- as.Date(endate,format = "%m/%d/%Y")
return(data)
}
如果我们这样称呼:
myDateFunction("Samsung Mobile","3/5/2018","6/23/2018")
产生以下输出:
DealName Start End
1 Samsung Mobile 2018-03-05 2018-03-31
2 Samsung Mobile 2018-04-01 2018-04-30
3 Samsung Mobile 2018-05-01 2018-05-31
4 Samsung Mobile 2018-06-01 2018-06-23
答案 1 :(得分:0)
我们可以先将“日期”列转换为Date
类,然后使用seq
按“月”获取“日期”序列,floor
将“日期”,和unnest
library(tidyverse)
df1 %>%
mutate_at(vars(ends_with("Date")), mdy) %>%
group_by(Deal_Id, Deal_Name) %>%
nest %>%
mutate(data = map(data, ~
seq(.x$Start_Date, .x$End_Date, by = "1 month") %>%
{x1 <- floor_date(., unit = "month")
x2 <- (x1[-1]-1)
tibble(Start_Date = c(.x$Start_Date[1], x1[-1]),
End_date = c(x2, last(.x$End_Date)))})) %>%
unnest
答案 2 :(得分:0)
data.table解决方案,并使用tidyverse
作了一些准备。实际的周期修补由data.table的非常快的foverlaps()
完成。如果您使用的是大数据集,请尝试一下此解决方案。
library( tidyverse )
library( data.table )
#create df with start and end dates
dt1 <- data.table( start = seq( as.Date("2017-01-01"),
length.out = 24,
by = "month") ,
end = seq( as.Date("2017-02-01"),
length.out = 24,
by = "month" ) - 1,
stringsAsFactors = FALSE)
#read data with deals
df2 <- read.table(text=" Deal_Id Deal_Name Start_Date End_Date
1 Samsung_Mobile 3/5/2018 6/23/2018
2 Apple_Watch 12/21/2017 2/9/2018", header = T)
#set the dates as actual dates
dt2 <- df2 %>%
mutate( start = as.Date( as.character(Start_Date), format = "%m/%d/%Y"),
end = as.Date( as.character(End_Date), format = "%m/%d/%Y") ) %>%
select( -Start_Date, -End_Date) %>%
setDT()
setkey(dt1, start, end)
setkey(dt2, start, end)
#create overlap join
dt3 <- foverlaps( dt1, dt2, type = "any", nomatch = 0L)
#set start
dt3[start < i.start, start := i.start]
#set end
dt3[end > i.end, end := i.end]
#print results
dt3[, c("i.end", "i.start") := NULL ][]
# Deal_Id Deal_Name start end
# 1: 2 Apple_Watch 2017-12-21 2017-12-31
# 2: 2 Apple_Watch 2018-01-01 2018-01-31
# 3: 2 Apple_Watch 2018-02-01 2018-02-09
# 4: 1 Samsung_Mobile 2018-03-05 2018-03-31
# 5: 1 Samsung_Mobile 2018-04-01 2018-04-30
# 6: 1 Samsung_Mobile 2018-05-01 2018-05-31
# 7: 1 Samsung_Mobile 2018-06-01 2018-06-23