我有一个包含以下两列的数据集。他们指的是病假。
df <- data.frame(BEGIN_DT = c("04/12/2013", "14/01/2013", "31/12/2012", "24/09/2013", "31/12/2013"),
END_DT = c("04/12/2013", "15/01/2013", "04/01/2013", "27/09/2013", "31/12/2013"))
rownames(df) <- c(16:20)
BEGIN_DT END_DT
16 04/12/2013 04/12/2013
17 14/01/2013 15/01/2013
18 31/12/2012 04/01/2013
19 24/09/2013 27/09/2013
20 31/12/2013 31/12/2013
我希望做的是根据BEGIN_DT和END_DT值,再计算五个额外的列(Q1,Q2,Q3,Q4,Total),计算每个季度的病假天数。
例如第18行:
BEGIN_DT END_DT Q1 Q2 Q3 Q4 Total
31/12/2012 04/01/2013 4 0 0 1 5
我看过这里,这似乎算上了总数,但我真的很想知道怎么把它们分成几个部分。
calculating number of days between 2 columns of dates in data frame 2柱-的-日期 - 在数据帧
survey <- data.frame(date=c("2012/07/26","2012/07/25"),tx_start=c("2012/01/01","2012/01/01 "))
survey$date_diff <- as.Date(as.character(survey$date), format="%Y/%m/%d")-
as.Date(as.character(survey$tx_start), format="%Y/%m/%d")
survey
目前,我有以下代码,但它只给了我Q1的总天数,剩下的就是空白。我在结束日期减去了开始日期,但是在过去一周的许多小时之后,我并没有更接近(或者可能更远)找出如何填充新的季度列。
sick2$Q1 <- if ("/%m/" < 4) {
as.Date(as.character(sick2$END_DT), format="%d/%m/%Y") -
as.Date(as.character(sick2$BEGIN_DT), format="%d/%m/%Y")
} else { "0" }
sick2$Q2 <- if ("/%m/" > 3 & "/%m/" < 7) {
as.Date(as.character(sick2$END_DT), format="%d/%m/%Y") -
as.Date(as.character(sick2$BEGIN_DT), format="%d/%m/%Y")
} else { "0" }
sick2$Q3 <- if ("/%m/" > 6 & "/%m/" < 10) {
as.Date(as.character(sick2$END_DT), format="%d/%m/%Y") -
as.Date(as.character(sick2$BEGIN_DT), format="%d/%m/%Y")
} else { "0" }
sick2$Q4 <- if ("/%m/" > 9) {
as.Date(as.character(sick2$END_DT), format="%d/%m/%Y") -
as.Date(as.character(sick2$BEGIN_DT), format="%d/%m/%Y")
} else { "0" }
无论如何,如果有人能在这里指出我正确的方向,我会v.v.v.非常感谢,谢谢,阿德里安。
答案 0 :(得分:0)
不漂亮(eww ..循环),但这是我对此的看法:
require(lubridate)
df <- data.frame(BEGIN_DT = c("04/12/2013", "14/01/2013", "31/12/2012", "24/09/2013", "31/12/2013"),
END_DT = c("04/12/2013", "15/01/2013", "04/01/2013", "27/09/2013", "31/12/2013"))
rownames(df) <- c(16:20)
df$BEGIN_DT <- as.Date(df$BEGIN_DT, "%d/%m/%Y")
df$END_DT <- as.Date(df$END_DT, "%d/%m/%Y")
for(i in 1:5){
diff_time <- as.numeric(df$END_DT[i] - df$BEGIN_DT[i])
date_vec <- df$BEGIN_DT[i]
for(j in 1:diff_time){
if(diff_time > 0) {
date_vec <- c(date_vec, df$BEGIN_DT[i] + j)
}
}
date_vec <- as.character(quarters(date_vec))
df$Q1[i] <- length(date_vec[date_vec %in% "Q1"])
df$Q2[i] <- length(date_vec[date_vec %in% "Q2"])
df$Q3[i] <- length(date_vec[date_vec %in% "Q3"])
df$Q4[i] <- length(date_vec[date_vec %in% "Q4"])
}
要做的第一件事是累积间隔中包含的所有日期的向量,然后针对lubridate
quarters()
进行检查,然后计算出现的次数。无法想到没有循环的方法。也许其他人可以吗?
答案 1 :(得分:0)
并不为此感到自豪,但可能会给你一些指导。必须有更好的方式...
library(lubridate)
library(plyr)
df <- data.frame(BEGIN_DT = c("04/12/2013",
"14/01/2013",
"31/12/2012",
"24/09/2013",
"31/12/2013"),
END_DT = c("04/12/2013",
"15/01/2013",
"04/01/2013",
"27/09/2013",
"31/12/2013"))
df$BEGIN_DT <- dmy(df$BEGIN_DT)
df$END_DT <- dmy(df$END_DT)
date_seq <- list()
for (i in seq_along(df$BEGIN_DT)) {
date_seq[[i]] <- seq(from = df$BEGIN_DT[i],
to = df$END_DT[i],
by = "1 day")
}
# sapply(date_seq, quarter)
sapply(sapply(date_seq, quarters), table)
results <- sapply(sapply(date_seq, quarters), table)
ldply(results rbind)
# Q4 Q1 Q3
# 1 1 NA NA
# 2 NA 2 NA
# 3 1 4 NA
# 4 NA NA 4
# 5 1 NA NA
cbind(df, ldply(results, rbind))
# BEGIN_DT END_DT Q4 Q1 Q3
# 1 2013-12-04 2013-12-04 1 NA NA
# 2 2013-01-14 2013-01-15 NA 2 NA
# 3 2012-12-31 2013-01-04 1 4 NA
# 4 2013-09-24 2013-09-27 NA NA 4
# 5 2013-12-31 2013-12-31 1 NA NA
library(magrittr)
仍然需要date_seq
列表对象
library(magrittr)
sapply(date_seq, quarters) %>%
sapply(., factor, levels=c("Q1", "Q2", "Q3", "Q4")) %>%
sapply(., table) %>%
t(.) %>%
cbind(df, .)
# BEGIN_DT END_DT Q1 Q2 Q3 Q4
# 1 2013-12-04 2013-12-04 0 0 0 1
# 2 2013-01-14 2013-01-15 2 0 0 0
# 3 2012-12-31 2013-01-04 4 0 0 1
# 4 2013-09-24 2013-09-27 0 0 4 0
# 5 2013-12-31 2013-12-31 0 0 0 1