与上个月结束时有所不同

时间:2012-12-19 14:01:54

标签: r data.table

每天(TRADEDATE)我有一个由BOOK和COMMODITY唯一标识的条目。每个条目都有一个每日更改的现值(PV)。我想得到一个列,其中PV与上个月的最后一个日历日不同。 我通过循环解决了它,但我想知道是否有人可以建议更优雅(更快的解决方案):

library(data.table)

bwTab
COMMODITY       BOOK   TRADEDATE PV Desired Column
   1:      CASH HS_OPT_GEN 2012-09-30 66669.68  NA
   2:      CASH HS_OPT_GEN 2012-10-01 76333.83  9664.15
   3:      CASH HS_OPT_GEN 2012-10-02 76333.83  9664.15
   4:      CASH HS_OPT_GEN 2012-10-03 76333.83  9664.15
   5:      CASH HS_OPT_GEN 2012-10-04 76333.83  9664.15
  ---     
3050:       OIL HO_OIL_FIN 2012-09-30 21330.55  NA
  ---     
3066:       OIL HO_OIL_FIN 2012-10-26 42661.28  21330.73
3067:       OIL HO_OIL_FIN 2012-10-27 21330.69  0.14
3068:       OIL HO_OIL_FIN 2012-10-28 21330.68  0.13
3069:       OIL HO_OIL_FIN 2012-10-29 21330.78  0.23

# Here is my solution

# Define a function for last day of previous month
pme <- function(date) {as.Date(paste("01",month(date),year(date),sep="."),"%d.%m.%Y")-1}

difftopme <- function(a) {

if (nrow(bwTab[COMMODITY==a[,COMMODITY] & BOOK==a[,BOOK] & TRADEDATE==pme(a[,STICHTAG]),])==0) {NA} else {
    a[,PV]-bwTab[COMMODITY==a[,COMMODITY] & BOOK==a[,BOOK] & TRADEDATE==pme(a[,TRADEDATE]),PV] }
}

for (i in 1:nrow(bwTab)){a <- difftopme(bwTab[i,]) ; if (i==1){diffPVme <- a} else {diffPVme <- c(a,diffPVme)}}


#########################
dput(bwTab[1000:1010,])
structure(list(COMMODITY = c("ELEC", "ELEC", "ELEC", "ELEC", 
"ELEC", "ELEC", "ELEC", "ELEC", "ELEC", "ELEC", "ELEC"), BOOK = c("HS_OUK_MKT", 
"HS_OUK_MKT", "HS_OUK_MKT", "HS_OUK_MKT", "HS_OUV_EVO", "HS_OUV_EVO", 
"HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO"
), STICHTAG = structure(c(1353798000, 1353970800, 1354057200, 
1354143600, 1348956000, 1349042400, 1349128800, 1349215200, 1349301600, 
1349388000, 1349474400), class = c("POSIXct", "POSIXt"), tzone = ""), 
    BROKERAGE = c(123406.66, 61791.17, 62229.17, 62492.57, 0, 
    0, 0, 0, 0, 0, 0), DV = c(72873524.86, 38096138.75, 38283589.07, 
    38236199.05, 23171721.81, 23178889.59, 23187553.93, 23187426.98, 
    23173154.67, 23149439.13, 23149469.88), REALIZED = c(47002372.1, 
    23501186.05, 23501186.05, 23501186.05, 22961528, 22961528, 
    22961528, 22961528, 22961528, 22961528, 22961528), PV = c(25871152.76, 
    14594952.7, 14782403.02, 14735013, 210193.81, 217361.59, 
    226025.93, 225898.98, 211626.67, 187911.13, 187941.88), PV_ND = c(25973196.64, 
    14654807.46, 14843080.44, 14795220.35, 210222.01, 217386.44, 
    226048.76, 225920.76, 211641.41, 187919.95, 187949.85), BROKER_R = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CREDIT_R = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), STRUCTURE_R = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), BROKER_UR_D = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ), CREDIT_UR_D = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), STRUCTURE_UR_D = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BROKER_UN_UND = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), CREDIT_UN_UND = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), STRUCTURE_UN_UND = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0)), .Names = c("COMMODITY", "BOOK", "STICHTAG", 
"BROKERAGE", "DV", "REALIZED", "PV", "PV_ND", "BROKER_R", "CREDIT_R", 
"STRUCTURE_R", "BROKER_UR_D", "CREDIT_UR_D", "STRUCTURE_UR_D", 
"BROKER_UN_UND", "CREDIT_UN_UND", "STRUCTURE_UN_UND"), sorted = c("COMMODITY", 
"BOOK", "STICHTAG"), class = c("data.table", "data.frame"), row.names = c(NA, 
-11L), .internal.selfref = <pointer: 0x014024a0>)

1 个答案:

答案 0 :(得分:2)

# the zoo library has a year-month class,
# which makes it easy to find the month's end
library(zoo)

# just use the first eight records of mtcars as an example
x <- mtcars[ 1:8 , ]

# as an example,
# stick a bunch of dates onto the x data frame
x$TRADEDATE <- 
    c( '2012-10-31' , '2012-11-17' , '2012-11-30' , '2012-12-15' , '2012-12-13' , '2012-12-15' , '2012-08-31' , '2012-09-22' )

# calculate each date's end-of-month of the previous month
# just subtract by 1/12th to get this!
month.ends <- 
    as.Date( 
        as.yearmon( 
            x$TRADEDATE 
        ) - 1/12 , 
        # frac = 1 indicates "the end of this period" --
        # frac = 0 would be the start.
        frac = 1 
    )

# isolate the rows that exactly match the month end date for each given date
month.end.rows <- 
    # convert the rows to an integer vector
    as.integer( 
        # figure out which rows contain the `month.ends` for every record in the data table
        lapply( 
            # run each value in `month.ends` through..
            month.ends , 
            # this new simple which( x == y ) function
            function( x , y ) which( x == y ) , 
            # where `y` is the full contents of the TRADEDATE column of your data frame
            as.Date( x$TRADEDATE ) 
        ) 
    )

# note that month.end.rows' length == nrow( x )
stopifnot( length( month.end.rows ) == nrow( x ) )

# now just subtract something from the same variable using its respective month end date
x[ , "desired.column" ] <- x[ , "carb" ] - x[ month.end.rows , "carb" ]