在R中按间隔分割日期

时间:2014-11-01 03:24:34

标签: r split break cut as.date

我有一个日期专栏:

   Date
 6/1/2010
 6/1/2010
 6/1/2010  
 9/1/2010 
 9/1/2010  
 9/1/2010  
12/1/2010   
12/1/2010 
12/1/2010 
 3/1/2011   
 3/1/2011 
 3/1/2011 
 6/1/2011   
 6/1/2011 
 6/1/2011  
 9/1/2011  
 9/1/2011  
 9/1/2011  
12/1/2011 
12/1/2011 
12/1/2011 
 3/1/2012    
 3/1/2012  
 3/1/2012  

如何按财政年度 - 从6月到3月分割这些日期。 2010年6月至2011年3月 - 2010财年;从2011年6月到2012年3月 - 2011财年。并为每一行添加财政年度的名称。

我能用功能切割吗?这样的事情:

cut(Date, c(6/1/2010,3/1/2011,6/1/2011,3/1/2012,
            6/1/2012,3/1/2013,6/1/2013,3/1/2014))

3 个答案:

答案 0 :(得分:2)

你可以尝试

f1 <- function(dat){
    date <- as.Date(dat$Date, "%m/%d/%Y")
    y1 <- as.numeric(format(date, '%Y'))
    m1  <- as.numeric(format(date, '%m'))
    indx <- y1 - min(y1) + (m1!=3)
    dat$Fiscal <- indx
    dat$Fiscal[!indx] <- paste('FY', min(y1-1))
    dat$Fiscal[!!indx] <- paste('FY', sort(unique(y1))[indx])  
    dat1 <- dat[order(y1,m1),]
    row.names(dat1) <- NULL
    dat1
   }

res1 <- f1(df)

res1
#       Date  Fiscal
#1   6/1/2010 FY 2010
#2   6/1/2010 FY 2010
#3   6/1/2010 FY 2010
#4   9/1/2010 FY 2010
#5   9/1/2010 FY 2010
#6   9/1/2010 FY 2010
#7  12/1/2010 FY 2010
#8  12/1/2010 FY 2010
#9  12/1/2010 FY 2010
#10  3/1/2011 FY 2010
#11  3/1/2011 FY 2010
#12  3/1/2011 FY 2010
#13  6/1/2011 FY 2011
#14  6/1/2011 FY 2011
#15  6/1/2011 FY 2011
#16  9/1/2011 FY 2011
#17  9/1/2011 FY 2011
#18  9/1/2011 FY 2011
#19 12/1/2011 FY 2011
#20 12/1/2011 FY 2011
#21 12/1/2011 FY 2011
#22  3/1/2012 FY 2011
#23  3/1/2012 FY 2011
#24  3/1/2012 FY 2011

更新

上述方法也适用于无序数据集

  res2 <- f1(dfN)
  headres2 <-do.call(rbind,lapply(split(res2,res2$Fiscal),head,2))
  row.names(headres2) <- NULL
  headres2
  #       Date  Fiscal
  #1  2009-03-01 FY 2008
  #2  2009-03-01 FY 2008
  #3  2009-09-01 FY 2009
  #4  2009-09-01 FY 2009
  #5  2010-06-01 FY 2010
  #6  2010-09-01 FY 2010
  #7  2011-06-01 FY 2011
  #8  2011-06-01 FY 2011
  #9  2012-09-01 FY 2012
  #10 2012-12-01 FY 2012
  #11 2013-06-01 FY 2013
  #12 2013-06-01 FY 2013
  #13 2014-09-01 FY 2014
  #14 2014-09-01 FY 2014

数据

 df <-   structure(list(Date = c("6/1/2010", "6/1/2010", "6/1/2010", "9/1/2010", 
 "9/1/2010", "9/1/2010", "12/1/2010", "12/1/2010", "12/1/2010", 
 "3/1/2011", "3/1/2011", "3/1/2011", "6/1/2011", "6/1/2011", "6/1/2011", 
 "9/1/2011", "9/1/2011", "9/1/2011", "12/1/2011", "12/1/2011", 
 "12/1/2011", "3/1/2012", "3/1/2012", "3/1/2012")), .Names = "Date", class =
 "data.frame", row.names = c(NA, -24L))

  set.seed(42)
  dfN <- data.frame(Date = sample(seq(as.Date('2009-03-01'), by='3 month',
          length.out=25), 50, replace=TRUE))

答案 1 :(得分:2)

这是我之前学到的东西。

library(dplyr)
years <- 2010:2012

foo %>%
    mutate(fiscal = cut(as.Date(Date, format = "%m/%d/%Y"),
                        breaks = as.Date(paste(years, "-06-01", sep="")),
                        labels = paste("FY", years[-length(years)],sep=" ")))

#        Date  fiscal
#1   6/1/2010 FY 2010
#2   6/1/2010 FY 2010
#3   6/1/2010 FY 2010
#4   9/1/2010 FY 2010
#5   9/1/2010 FY 2010
#6   9/1/2010 FY 2010
#7  12/1/2010 FY 2010
#8  12/1/2010 FY 2010
#9  12/1/2010 FY 2010
#10  3/1/2011 FY 2010
#11  3/1/2011 FY 2010
#12  3/1/2011 FY 2010
#13  6/1/2011 FY 2011
#14  6/1/2011 FY 2011
#15  6/1/2011 FY 2011
#16  9/1/2011 FY 2011
#17  9/1/2011 FY 2011
#18  9/1/2011 FY 2011
#19 12/1/2011 FY 2011
#20 12/1/2011 FY 2011
#21 12/1/2011 FY 2011
#22  3/1/2012 FY 2011
#23  3/1/2012 FY 2011
#24  3/1/2012 FY 2011

答案 2 :(得分:1)

尝试:

> dd = data.frame(t(sapply(strsplit(as.character(ddf$Date), '/'), c)))
> dd = data.frame(sapply(dd, function(x) as.numeric(as.character(x))))
> names(dd) = c('month','date','year')
> dd$fiscal=1
> for(i in 2:nrow(dd))  dd$fiscal[i] = with(dd,ifelse(month[i]==6 & month[i-1]==3, fiscal[i-1]+1, fiscal[i-1]))
> dd
   month date year fiscal
1      6    1 2010      1
2      6    1 2010      1
3      6    1 2010      1
4      9    1 2010      1
5      9    1 2010      1
6      9    1 2010      1
7     12    1 2010      1
8     12    1 2010      1
9     12    1 2010      1
10     3    1 2011      1
11     3    1 2011      1
12     3    1 2011      1
13     6    1 2011      2
14     6    1 2011      2
15     6    1 2011      2
16     9    1 2011      2
17     9    1 2011      2
18     9    1 2011      2
19    12    1 2011      2
20    12    1 2011      2
21    12    1 2011      2
22     3    1 2012      2
23     3    1 2012      2
24     3    1 2012      2

数据:

ddf = structure(list(Date = structure(c(5L, 5L, 5L, 7L, 7L, 7L, 1L, 
1L, 1L, 3L, 3L, 3L, 6L, 6L, 6L, 8L, 8L, 8L, 2L, 2L, 2L, 4L, 4L, 
4L), .Label = c("12/1/2010", "12/1/2011", "3/1/2011", "3/1/2012", 
"6/1/2010", "6/1/2011", "9/1/2010", "9/1/2011"), class = "factor")), .Names = "Date", class = "data.frame", row.names = c(NA, 
-24L))

ddf
        Date
1   6/1/2010
2   6/1/2010
3   6/1/2010
4   9/1/2010
5   9/1/2010
6   9/1/2010
7  12/1/2010
8  12/1/2010
9  12/1/2010
10  3/1/2011
11  3/1/2011
12  3/1/2011
13  6/1/2011
14  6/1/2011
15  6/1/2011
16  9/1/2011
17  9/1/2011
18  9/1/2011
19 12/1/2011
20 12/1/2011
21 12/1/2011
22  3/1/2012
23  3/1/2012
24  3/1/2012

两者都可以绑定在一起,也可以添加四分之一:

dd2= cbind(ddf, dd)
dd2$quarter = dd2$month/3 -1
dd2$quarter = with(dd2, ifelse(quarter==0, 4, quarter))
dd2
        Date month date year fiscal quarter
1   6/1/2010     6    1 2010      1       1
2   6/1/2010     6    1 2010      1       1
3   6/1/2010     6    1 2010      1       1
4   9/1/2010     9    1 2010      1       2
5   9/1/2010     9    1 2010      1       2
6   9/1/2010     9    1 2010      1       2
7  12/1/2010    12    1 2010      1       3
8  12/1/2010    12    1 2010      1       3
9  12/1/2010    12    1 2010      1       3
10  3/1/2011     3    1 2011      1       4
11  3/1/2011     3    1 2011      1       4
12  3/1/2011     3    1 2011      1       4
13  6/1/2011     6    1 2011      2       1
14  6/1/2011     6    1 2011      2       1
15  6/1/2011     6    1 2011      2       1
16  9/1/2011     9    1 2011      2       2
17  9/1/2011     9    1 2011      2       2
18  9/1/2011     9    1 2011      2       2
19 12/1/2011    12    1 2011      2       3
20 12/1/2011    12    1 2011      2       3
21 12/1/2011    12    1 2011      2       3
22  3/1/2012     3    1 2012      2       4
23  3/1/2012     3    1 2012      2       4
24  3/1/2012     3    1 2012      2       4