按财政季度对R中两个日期之间有效的记录进行计数

时间:2018-11-08 20:21:43

标签: r date group-by count dplyr

head(df,6)

       PositionName       Hire.Date     Termination.Date      
1      CBM                4/22/2002         9/14/2007  
2      CBM                10/5/2005         5/5/2008   
3      SBM                10/31/2005        6/25/2007  
4      CBM                12/1/2005         5/5/2008   
5      SBM                7/6/2006          6/20/2008  
6      CBM                10/6/2006         6/30/2008  



df$Hire.Date <- format(as.Date(sm.SL$Hire.Date, format = "%m/%d/%Y"), "%m/%Y")
df$Termination.Date <- format(as.Date(sm.SL$Termination.Date, format = "%m/%d/%Y"), "%m/%Y")

    #Stuckhere
      WorkedDuring <- df %>% 
      filter(PositionName == "CBM")%>%
      mutate(quarter = quarter(Hire.Date >, with_year = TRUE))%>% 
      group_by(quarter) %>%
      summarise(total = n())

尝试获取df:

Year.Quarter     n
2005.1           1
2005.2           1
2005.3           1
2005.4           4
2006.1           4
2006.2           4
2006.3           5
2006.4           6

这并不是一个很好的例子,但是我想在他们被解雇后带走他们。

3 个答案:

答案 0 :(得分:2)

尝试一下:

# quick helper function
date2qtr <- function(d) 1 + (as.integer(format(d, "%m")) - 1) %/% 3L

library(dplyr)
# library(tidyr)
# library(purrr)
tbl_df(df) %>%
  mutate(
    alldates = purrr::map2(Hire.Date, Termination.Date,
                           ~ seq.Date(..1, ..2, by = "3 months"))
  ) %>%
  tidyr::unnest() %>%
  mutate(
    Year = as.integer(format(alldates, "%Y")),
    Quarter = date2qtr(alldates)
  ) %>%
  group_by(Year, Quarter) %>%
  tally()
# # A tibble: 25 x 3
# # Groups:   Year [?]
#     Year Quarter     n
#    <int>   <dbl> <int>
#  1  2002       2     1
#  2  2002       3     1
#  3  2002       4     1
#  4  2003       1     1
#  5  2003       2     1
#  6  2003       3     1
#  7  2003       4     1
#  8  2004       1     1
#  9  2004       2     1
# 10  2004       3     1
# # ... with 15 more rows

数据:

df <- structure(list(PositionName = c("CBM", "CBM", "SBM", "CBM", "SBM", 
"CBM"), Hire.Date = c("4/22/2002", "10/5/2005", "10/31/2005", 
"12/1/2005", "7/6/2006", "10/6/2006"), Termination.Date = c("9/14/2007", 
"5/5/2008", "6/25/2007", "5/5/2008", "6/20/2008", "6/30/2008"
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5", 
"6"))

df[c("Hire.Date","Termination.Date")] <-
  lapply(df[c("Hire.Date","Termination.Date")], as.Date, format = "%m/%d/%Y")

答案 1 :(得分:1)

以下是根据政府会计年度(美国联邦政府会计年度为10月1日至9月30日)添加fiscal quarter列的方法。您可以根据自己的会计年度进行调整。向我的同事罗恩·格拉夫(Ron Graff)喊出代码:

library(zoo) # good library for dealing with dates, also check out lubridate
library(dplyr)
library(lubridate) # good library for dealing with dates
library(stringr) # for dealing with characters

data_with_quarter <- data %>%
  mutate(fiscal_year = as.numeric(as.yearmon(Hire.Date) - 9/12 +1) %/% 1) %>%
  mutate(month = month(Hire.Date)) %>%
  mutate(quarter = if_else(month > 9, 1,
                       if_else(month > 6, 4,
                           if_else(month > 3, 3, 3)))) %>%
  mutate(fiscal_qurater = str_c(as.character(fiscal_year), ".", as.character(quarter)))

然后,如果要绘制此图,则可能需要将会计季度列转换为factor

答案 2 :(得分:1)

您可以认为此问题是在每个雇用日期为您的员工增加+1,在每个终止日期为您的员工增加-1。

library(tidyverse)
library(lubridate)

# create date variables using the lubridate package  
df <- df %>% 
        mutate(Hire.Date = mdy(Hire.Date),
               Termination.Date = mdy(Termination.Date))

# create an arrival counter
arrive <- df %>% 
           mutate(time = floor_date(Hire.Date, 'quarter'),
                  type = 1) %>% 
           select(time, type)

# create a departure counter
depart <- df %>% 
           mutate(time = floor_date(Termination.Date,'quarter'),
                  type = -1) %>% 
           select(time, type)

# bind the two and take cumulative sum
arrive_depart <- arrive %>% 
                    bind_rows(depart) %>% 
                    arrange(time) %>% 
                    mutate(volume = cumsum(type))

# fill in the missing dates
start_time <- min(arrive_depart$time)
end_time <- max(arrive_depart$time)
full_time <- tibble(time = seq(start_time, end_time, 'quarter'))

arrive_depart <- full_time %>% 
  left_join(arrive_depart, by = 'time') %>% 
  fill(volume)