data.table

时间:2016-02-26 20:14:54

标签: r data.table

我想在数据表中按月减去相应的行 这是示例表

   monthly_date sector_order Retail Sales Trend Sales
1:   2014-12-01            1     42123.87    42279.64
2:   2015-11-01            1     44181.69    43620.22
3:   2015-12-01            1     43207.97    43605.21
4:   2014-12-01           30     14972.60    15025.74
5:   2015-11-01           30     15969.98    15685.36
6:   2015-12-01           30     15478.42    15675.09

是否有一种优雅的方式来给我一个带有行的3行表 sector_order == 30从sector_order == 1的行中减去 我显然可以用两个数据框来强制它。是否有更通用的data.table方式?

1 个答案:

答案 0 :(得分:1)

这是一个选项

library(data.table) 
data[, .(RetailSales = RetailSales[1L] - RetailSales[.N], 
          TrendSales = TrendSales[1L] - TrendSales[.N]), by = monthly_date]

#   monthly_date RetailSales TrendSales
#1:   2014-12-01    27151.27   27253.90
#2:   2015-11-01    28211.71   27934.86
#3:   2015-12-01    27729.55   27930.12

或@MichaelChirico提出了更优雅的解决方案

data[order(-sector_order),.(RetailSales = diff(RetailSales),
                            TrendSales  = diff(TrendSales)), by = monthly_date]

或者@Frank建议

data[order(-sector_order), 
  .SD[2]-.SD[1] 
  # lapply(.SD, diff) # also works here
, by=monthly_date, .SDcols=c("RetailSales","TrendSales")]

数据

data = setDT(structure(list(monthly_date = structure(c(1L, 2L, 3L, 1L, 2L, 
3L), .Label = c("2014-12-01", "2015-11-01", "2015-12-01"), class = "factor"), 
    sector_order = c(1L, 1L, 1L, 30L, 30L, 30L), RetailSales = c(42123.87, 
    44181.69, 43207.97, 14972.6, 15969.98, 15478.42), TrendSales = c(42279.64, 
    43620.22, 43605.21, 15025.74, 15685.36, 15675.09), grp = c(1L, 
    2L, 3L, 1L, 2L, 3L)), .Names = c("monthly_date", "sector_order", 
"RetailSales", "TrendSales", "grp"), class = "data.frame", row.names = c(NA, 
-6L)))