考虑以下data.frame
:
data_frame = structure(list(AGREEDTIME = structure(c(1451785888.76968, 1451785945.59156,
1451786041.22135, 1451786089.17713, 1451786671.55922, 1451786853.52841,
1451787231.03475, 1451787641.43011, 1451787999.77345, 1451788571.08314,
1451788695.76539, 1451788769.29787, 1451788891.90181, 1451789206.47645,
1451789653.27497, 1451789740.52194, 1451789875.7906, 1451789971.7024,
1451790030.94949, 1451790681.31701), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), EA = c(2383, 1064, 848, 68, 2665, 277, 175,
2761, 773, 426, 1164, 600, 413, 371, 733, 259, 976, 297, 1973,
1022), FW = structure(c(2L, 2L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 3L,
1L, 3L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 1L), .Label = c("NZNR", "WZNI",
"HSCW"), class = "factor"), CP = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "OZU/NQV", class = "factor")), .Names = c("AGREEDTIME",
"EA", "FW", "CP"), row.names = c(NA, -20L), class = "data.frame")
或以更易读的形式:
AGREEDTIME EA FW CP
1 2016-01-03 01:51:28 2383 WZNI OZU/NQV
2 2016-01-03 01:52:25 1064 WZNI OZU/NQV
3 2016-01-03 01:54:01 848 NZNR OZU/NQV
4 2016-01-03 01:54:49 68 NZNR OZU/NQV
5 2016-01-03 02:04:31 2665 HSCW OZU/NQV
6 2016-01-03 02:07:33 277 NZNR OZU/NQV
7 2016-01-03 02:13:51 175 WZNI OZU/NQV
8 2016-01-03 02:20:41 2761 WZNI OZU/NQV
9 2016-01-03 02:26:39 773 WZNI OZU/NQV
10 2016-01-03 02:36:11 426 HSCW OZU/NQV
11 2016-01-03 02:38:15 1164 NZNR OZU/NQV
12 2016-01-03 02:39:29 600 HSCW OZU/NQV
13 2016-01-03 02:41:31 413 HSCW OZU/NQV
14 2016-01-03 02:46:46 371 NZNR OZU/NQV
15 2016-01-03 02:54:13 733 NZNR OZU/NQV
16 2016-01-03 02:55:40 259 HSCW OZU/NQV
17 2016-01-03 02:57:55 976 WZNI OZU/NQV
18 2016-01-03 02:59:31 297 HSCW OZU/NQV
19 2016-01-03 03:00:30 1973 HSCW OZU/NQV
20 2016-01-03 03:11:21 1022 NZNR OZU/NQV
现在,我的目标是,对于这个矩阵的每一行,
对满足以下所有行的EA值求和:
使用天真的R代码,实际结果应为:
slow_function <- function(ind, data_frame){
index = data_frame[ind, ]
index_set = which(data_frame$AGREEDTIME > index$AGREEDTIME - 3600 &
data_frame$AGREEDTIME < index$AGREEDTIME - 1800 &
data_frame$FW == index$FW &
data_frame$CP == index$CP)
sum(data_frame$EA[index_set])
}
data_frame_results = data.frame(data_frame, results = sapply(1:nrow(data_frame), slow_function, data_frame = data_frame))
现在,我正在尝试使用data.table
来获得相同的结果。
我的第一个尝试是:
data_table <- data.table(data_frame)
setkey(data_table, FW, CP, AGREEDTIME)
less_slow_function <- function(ind, data_table){
ATm = data_table$AGREEDTIME[ind] - 3600
ATM = data_table$AGREEDTIME[ind] - 1800
fw = data_table$FW[ind]
cp = data_table$CP[ind]
as.numeric(data_table[J(fw, cp)][AGREEDTIME > ATm & AGREEDTIME < ATM, 'EA'][,lapply(.SD, sum)])
}
data_table_results = data.table(data_table, results = sapply(1:nrow(data_table), less_slow_function, data_table = data_table))
给出了所需的数字。我的问题是:使用data.table
魔法获得相同结果的方法是否比我天真的解决方案更好?