我有两个data.tables
entext.new
和transit.service1
(在下面的数据框中发布)。
> dput(data.frame(entext.new))
structure(list(person = c(1701L, 1701L), vehicle = c("tr_71_3",
"tr_5_7"), atstop = c(108557L, 108536L), time1 = c(31931, 32560
), acttype1 = structure(c(1L, 1L), .Label = c("PersonEntersVehicle",
"PersonLeavesVehicle", "waitingForPt"), class = "factor"), person2 = c(1701L,
1701L), vehicle2 = c("tr_71_3", "tr_5_7"), deststop = c(108558L,
100905L), time2 = c(31998, 32620), acttype2 = structure(c(2L,
2L), .Label = c("PersonEntersVehicle", "PersonLeavesVehicle",
"waitingForPt"), class = "factor")), .Names = c("person", "vehicle",
"atstop", "time1", "acttype1", "person2", "vehicle2", "deststop",
"time2", "acttype2"), row.names = c(NA, -2L), class = "data.frame")
> dput(data.frame(entext.new))
structure(list(person = c(1701L, 1701L), vehicle = c("tr_71_3",
"tr_5_7"), atstop = c(108557L, 108536L), time1 = c(31931, 32560
), acttype1 = structure(c(1L, 1L), .Label = c("PersonEntersVehicle",
"PersonLeavesVehicle", "waitingForPt"), class = "factor"), person2 = c(1701L,
1701L), vehicle2 = c("tr_71_3", "tr_5_7"), deststop = c(108558L,
100905L), time2 = c(31998, 32620), acttype2 = structure(c(2L,
2L), .Label = c("PersonEntersVehicle", "PersonLeavesVehicle",
"waitingForPt"), class = "factor")), .Names = c("person", "vehicle",
"atstop", "time1", "acttype1", "person2", "vehicle2", "deststop",
"time2", "acttype2"), row.names = c(NA, -2L), class = "data.frame")
> dput(data.frame(transit.service1))
structure(list(id = c(725531L, 725532L, 726871L, 728273L, 728274L,
728825L, 728826L, 729489L, 729490L, 730106L, 730109L, 730315L,
730316L, 732297L, 732298L, 734989L, 734990L, 735945L, 735948L,
736878L, 736879L, 737807L, 737808L, 737834L, 737835L, 738292L,
738293L, 738314L, 738315L, 739275L, 739276L, 740407L, 740408L,
741248L, 741249L, 700159L, 700160L, 700244L, 700245L, 700292L,
700490L, 700526L, 700527L, 702052L, 702053L, 702725L, 702726L,
702812L, 702815L, 702872L, 702991L), vehicle = c("tr_5_7", "tr_5_7",
"tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7",
"tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7",
"tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7",
"tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7",
"tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_5_7", "tr_71_3",
"tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3",
"tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3", "tr_71_3",
"tr_71_3", "tr_71_3", "tr_71_3"), time = c(32542, 32542, 32563,
32584, 32584, 32594, 32594, 32604, 32604, 32613, 32613, 32617,
32617, 32648, 32648, 32691, 32691, 32706, 32706, 32721, 32721,
32736, 32736, 32737, 32737, 32744, 32744, 32745, 32745, 32760,
32760, 32778, 32778, 32793, 32793, 31927, 31927, 31929, 31929,
31930, 31935, 31936, 31936, 31977, 31977, 31994, 31994, 31996,
31996, 31997, 32000), link = c(200016105L, NA, NA, 200016105L,
61056124L, 61056124L, 61246144L, 61246144L, 61446158L, NA, NA,
61446158L, 61589049L, 61589049L, 90496198L, 90496198L, 61986249L,
NA, NA, 61986249L, 62496295L, NA, NA, 62496295L, 62956316L, NA,
NA, 62956316L, 63166350L, NA, NA, 63166350L, 63506404L, 63506404L,
64046472L, 61176131L, 613120013L, 613120013L, 200136131L, NA,
NA, 200136131L, 61316194L, 61316194L, 61946230L, 61946230L, 623020014L,
623020014L, 200146230L, NA, NA), facility = c(NA, 108536L, 108536L,
NA, NA, NA, NA, NA, NA, 100905L, 100905L, NA, NA, NA, NA, NA,
NA, 100979L, 100979L, NA, NA, 101017L, 101017L, NA, NA, 101075L,
101075L, NA, NA, 101098L, 101098L, NA, NA, NA, NA, NA, NA, NA,
NA, 108557L, 108557L, NA, NA, NA, NA, NA, NA, NA, NA, 108558L,
108558L), acttype = structure(c(3L, 4L, 5L, 2L, 1L, 2L, 1L, 2L,
1L, 4L, 5L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 5L, 2L, 1L, 4L, 5L, 2L,
1L, 4L, 5L, 2L, 1L, 4L, 5L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L,
5L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 5L), .Label = c("entered link",
"left link", "vehicle enters traffic", "VehicleArrivesAtFacility",
"VehicleDepartsAtFacility"), class = "factor")), .Names = c("id",
"vehicle", "time", "link", "facility", "acttype"), row.names = c(NA,
-51L), class = "data.frame")
我需要根据transit.service1
中的某些条件对entext.new
进行分组。为此,我写了一个小函数,如下所示。但是,这个功能运行得很慢。
func_subset1 <- function(x,y,z,k) {
list(cbind(x, transit.service1[vehicle==y & time>=z & time<=k]))
}
list1 <- mapply(func_subset1, entext.new$person, entext.new$vehicle, entext.new$time1, entext.new$time2)
df.final <- do.call(rbind.data.frame, list1)
我试图像这样剖析代码(虽然我不完全理解它):
> summaryRprof(tmp)
$by.self
self.time self.pct total.time total.pct
"cat" 0.02 100 0.02 100
$by.total
total.time total.pct self.time self.pct
"cat" 0.02 100 0.02 100
".rs.valueContents" 0.02 100 0.00 0
".rs.valueFromStr" 0.02 100 0.00 0
".rs.withTimeLimit" 0.02 100 0.00 0
"<Anonymous>" 0.02 100 0.00 0
"capture.output" 0.02 100 0.00 0
"do.call" 0.02 100 0.00 0
"doTryCatch" 0.02 100 0.00 0
"eval" 0.02 100 0.00 0
"evalVis" 0.02 100 0.00 0
"NextMethod" 0.02 100 0.00 0
"str" 0.02 100 0.00 0
"str.data.frame" 0.02 100 0.00 0
"str.default" 0.02 100 0.00 0
"strSub" 0.02 100 0.00 0
"try" 0.02 100 0.00 0
"tryCatch" 0.02 100 0.00 0
"tryCatchList" 0.02 100 0.00 0
"tryCatchOne" 0.02 100 0.00 0
"withVisible" 0.02 100 0.00 0
$sample.interval
[1] 0.02
$sampling.time
[1] 0.02
您对加速此子集功能有什么建议吗?