我正在阅读从Elastic到R的一些记录,结果却变得非常缓慢。
背景 - 数据适用于房地产市场。我正在阅读的表是属性视图(即当有人点击某个属性以查看详细信息时)。我需要捕获属性ID并查看日期和时间。分析的时间。
这就是我在做的事情:
将数据从Elastic提取到列表中
query <- sprintf('{"query":{"range":{"time":{"gte":"%s","lte":"%s"}}}}',start_date,end_date)
view_list <- elastic::Search(index = "organised",type = "PROPERTY_VIEW",size = 10000000,body=query)$hits$hits
将列表提取到数据框中的字段
number_of_views <- length(view_list)
a <- 1
view_data <- data.frame(view_date=as.Date(character()),
propertyId=character())
view_date = vector(mode = "character",length = 0)
propertyId = vector(mode = "character",length = 0)
while (a <= number_of_views) {
view_date[a] <- as.Date(as.POSIXct(view_list[[a]][[6]]$time/1000, origin="1970-01-01"))
propertyId <- as.character(view_list[[a]][[6]]$propertyId)
a <- a+1
}
view_list_df <- data.frame(trackId,userId,viewId,view_date,requestId,propertyId)
速度 - 读取1周的数据(500k记录)需要7-8分钟。这太慢了。
列出样本:
> head(dput(view_list[1:10]))
list(structure(list(`_index` = "organised", `_type` = "PROPERTY_VIEW",
`_id` = "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "8e370fe75121cda0cccda2f1934c7051", userId = "ff8081814ea04efe014ea6843b7f3d13",
id = "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9",
time = 1437351878754, requestId = "1437351878754", propertyId = "ff8081814e5cc5af014e5ce7dff202a9"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "6b2eaf11c2e4ba6be7c3bd109d8905aa", userId = NULL,
id = "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78",
time = 1437351694070, requestId = "1437351694070", propertyId = "ff8081814d5bcfaa014d62b622352d78"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "84879c86112f53f1124a1f9cb83d6b37", userId = NULL,
id = "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b",
time = 1437351931929, requestId = "1437351931929", propertyId = "ff8081814d28f714014d31eb92a2210b"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "84879c86112f53f1124a1f9cb83d6b37", userId = NULL,
id = "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571",
time = 1437351964188, requestId = "1437351964188", propertyId = "ff8081814ca1cc06014ca2b9823c0571"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "38212cb8dbd60c10d356fe30257932b4", userId = NULL,
id = "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161",
time = 1437353794879, requestId = "1437353794879", propertyId = "ff8081814d42cfff014d473b7f071161"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "91bbfe57428a47ce2233da1b5517b9a1", userId = "ff8081814ea04efe014ea6843b7f3d13",
id = "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535",
time = 1437353798036, requestId = "1437353798036", propertyId = "ff8081814d891540014d8eac50142535"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "830f0fe6d53c58938d876175ceca357c:ff8081814e71b801014e72a340300b74",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "830f0fe6d53c58938d876175ceca357c", userId = NULL,
id = "830f0fe6d53c58938d876175ceca357c:ff8081814e71b801014e72a340300b74",
time = 1437355401546, requestId = "1437355401546", propertyId = "ff8081814e71b801014e72a340300b74"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "88200b3e1bc22cb4b270f89810cd8f32:ff8081814db00698014db29fb80d0614",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "88200b3e1bc22cb4b270f89810cd8f32", userId = NULL,
id = "88200b3e1bc22cb4b270f89810cd8f32:ff8081814db00698014db29fb80d0614",
time = 1437355324426, requestId = "1437355324426", propertyId = "ff8081814db00698014db29fb80d0614"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "2e9afe62406bedbf90d14fd22a6296de:ff80808149f1ccdc0149f4dd916500e2",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "2e9afe62406bedbf90d14fd22a6296de", userId = NULL,
id = "2e9afe62406bedbf90d14fd22a6296de:ff80808149f1ccdc0149f4dd916500e2",
time = 1437355340320, requestId = "1437355340320", propertyId = "ff80808149f1ccdc0149f4dd916500e2"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")), structure(list(
`_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "357c4a9917b47635a6dba600805861f4:ff80808149e225cd0149e2ba48d5009c",
`_version` = 1L, `_score` = 1, `_source` = structure(list(
trackId = "357c4a9917b47635a6dba600805861f4", userId = NULL,
id = "357c4a9917b47635a6dba600805861f4:ff80808149e225cd0149e2ba48d5009c",
time = 1437355340832, requestId = "1437355340832", propertyId = "ff80808149e225cd0149e2ba48d5009c"), .Names = c("trackId",
"userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index",
"_type", "_id", "_version", "_score", "_source")))
[[1]]
[[1]]$`_index`
[1] "organised"
[[1]]$`_type`
[1] "PROPERTY_VIEW"
[[1]]$`_id`
[1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9"
[[1]]$`_version`
[1] 1
[[1]]$`_score`
[1] 1
[[1]]$`_source`
[[1]]$`_source`$trackId
[1] "8e370fe75121cda0cccda2f1934c7051"
[[1]]$`_source`$userId
[1] "ff8081814ea04efe014ea6843b7f3d13"
[[1]]$`_source`$id
[1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9"
[[1]]$`_source`$time
[1] 1437351878754
[[1]]$`_source`$requestId
[1] "1437351878754"
[[1]]$`_source`$propertyId
[1] "ff8081814e5cc5af014e5ce7dff202a9"
[[2]]
[[2]]$`_index`
[1] "organised"
[[2]]$`_type`
[1] "PROPERTY_VIEW"
[[2]]$`_id`
[1] "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78"
[[2]]$`_version`
[1] 1
[[2]]$`_score`
[1] 1
[[2]]$`_source`
[[2]]$`_source`$trackId
[1] "6b2eaf11c2e4ba6be7c3bd109d8905aa"
[[2]]$`_source`$userId
NULL
[[2]]$`_source`$id
[1] "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78"
[[2]]$`_source`$time
[1] 1437351694070
[[2]]$`_source`$requestId
[1] "1437351694070"
[[2]]$`_source`$propertyId
[1] "ff8081814d5bcfaa014d62b622352d78"
[[3]]
[[3]]$`_index`
[1] "organised"
[[3]]$`_type`
[1] "PROPERTY_VIEW"
[[3]]$`_id`
[1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b"
[[3]]$`_version`
[1] 1
[[3]]$`_score`
[1] 1
[[3]]$`_source`
[[3]]$`_source`$trackId
[1] "84879c86112f53f1124a1f9cb83d6b37"
[[3]]$`_source`$userId
NULL
[[3]]$`_source`$id
[1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b"
[[3]]$`_source`$time
[1] 1437351931929
[[3]]$`_source`$requestId
[1] "1437351931929"
[[3]]$`_source`$propertyId
[1] "ff8081814d28f714014d31eb92a2210b"
[[4]]
[[4]]$`_index`
[1] "organised"
[[4]]$`_type`
[1] "PROPERTY_VIEW"
[[4]]$`_id`
[1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571"
[[4]]$`_version`
[1] 1
[[4]]$`_score`
[1] 1
[[4]]$`_source`
[[4]]$`_source`$trackId
[1] "84879c86112f53f1124a1f9cb83d6b37"
[[4]]$`_source`$userId
NULL
[[4]]$`_source`$id
[1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571"
[[4]]$`_source`$time
[1] 1437351964188
[[4]]$`_source`$requestId
[1] "1437351964188"
[[4]]$`_source`$propertyId
[1] "ff8081814ca1cc06014ca2b9823c0571"
[[5]]
[[5]]$`_index`
[1] "organised"
[[5]]$`_type`
[1] "PROPERTY_VIEW"
[[5]]$`_id`
[1] "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161"
[[5]]$`_version`
[1] 1
[[5]]$`_score`
[1] 1
[[5]]$`_source`
[[5]]$`_source`$trackId
[1] "38212cb8dbd60c10d356fe30257932b4"
[[5]]$`_source`$userId
NULL
[[5]]$`_source`$id
[1] "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161"
[[5]]$`_source`$time
[1] 1437353794879
[[5]]$`_source`$requestId
[1] "1437353794879"
[[5]]$`_source`$propertyId
[1] "ff8081814d42cfff014d473b7f071161"
[[6]]
[[6]]$`_index`
[1] "organised"
[[6]]$`_type`
[1] "PROPERTY_VIEW"
[[6]]$`_id`
[1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535"
[[6]]$`_version`
[1] 1
[[6]]$`_score`
[1] 1
[[6]]$`_source`
[[6]]$`_source`$trackId
[1] "91bbfe57428a47ce2233da1b5517b9a1"
[[6]]$`_source`$userId
[1] "ff8081814ea04efe014ea6843b7f3d13"
[[6]]$`_source`$id
[1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535"
[[6]]$`_source`$time
[1] 1437353798036
[[6]]$`_source`$requestId
[1] "1437353798036"
[[6]]$`_source`$propertyId
[1] "ff8081814d891540014d8eac50142535"