R从Elastic读取数据

时间:2015-09-14 07:50:03

标签: r elasticsearch

我正在阅读从Elastic到R的一些记录,结果却变得非常缓慢。

背景 - 数据适用于房地产市场。我正在阅读的表是属性视图(即当有人点击某个属性以查看详细信息时)。我需要捕获属性ID并查看日期和时间。分析的时间。

这就是我在做的事情:

  1. 将数据从Elastic提取到列表中

    query <- sprintf('{"query":{"range":{"time":{"gte":"%s","lte":"%s"}}}}',start_date,end_date)
    view_list <- elastic::Search(index = "organised",type = "PROPERTY_VIEW",size = 10000000,body=query)$hits$hits
    
  2. 将列表提取到数据框中的字段

    number_of_views <- length(view_list)
    a <- 1 
    view_data <- data.frame(view_date=as.Date(character()),
                             propertyId=character())
    view_date = vector(mode = "character",length = 0)
    propertyId = vector(mode = "character",length = 0)
    
    while (a <= number_of_views) {
      view_date[a] <- as.Date(as.POSIXct(view_list[[a]][[6]]$time/1000, origin="1970-01-01"))                      
      propertyId <- as.character(view_list[[a]][[6]]$propertyId)
      a <- a+1
    }
    view_list_df <- data.frame(trackId,userId,viewId,view_date,requestId,propertyId)
    
  3. 速度 - 读取1周的数据(500k记录)需要7-8分钟。这太慢了。

    列出样本:

    > head(dput(view_list[1:10]))
    list(structure(list(`_index` = "organised", `_type` = "PROPERTY_VIEW", 
        `_id` = "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "8e370fe75121cda0cccda2f1934c7051", userId = "ff8081814ea04efe014ea6843b7f3d13", 
            id = "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9", 
            time = 1437351878754, requestId = "1437351878754", propertyId = "ff8081814e5cc5af014e5ce7dff202a9"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "6b2eaf11c2e4ba6be7c3bd109d8905aa", userId = NULL, 
            id = "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78", 
            time = 1437351694070, requestId = "1437351694070", propertyId = "ff8081814d5bcfaa014d62b622352d78"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "84879c86112f53f1124a1f9cb83d6b37", userId = NULL, 
            id = "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b", 
            time = 1437351931929, requestId = "1437351931929", propertyId = "ff8081814d28f714014d31eb92a2210b"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "84879c86112f53f1124a1f9cb83d6b37", userId = NULL, 
            id = "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571", 
            time = 1437351964188, requestId = "1437351964188", propertyId = "ff8081814ca1cc06014ca2b9823c0571"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "38212cb8dbd60c10d356fe30257932b4", userId = NULL, 
            id = "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161", 
            time = 1437353794879, requestId = "1437353794879", propertyId = "ff8081814d42cfff014d473b7f071161"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "91bbfe57428a47ce2233da1b5517b9a1", userId = "ff8081814ea04efe014ea6843b7f3d13", 
            id = "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535", 
            time = 1437353798036, requestId = "1437353798036", propertyId = "ff8081814d891540014d8eac50142535"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "830f0fe6d53c58938d876175ceca357c:ff8081814e71b801014e72a340300b74", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "830f0fe6d53c58938d876175ceca357c", userId = NULL, 
            id = "830f0fe6d53c58938d876175ceca357c:ff8081814e71b801014e72a340300b74", 
            time = 1437355401546, requestId = "1437355401546", propertyId = "ff8081814e71b801014e72a340300b74"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "88200b3e1bc22cb4b270f89810cd8f32:ff8081814db00698014db29fb80d0614", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "88200b3e1bc22cb4b270f89810cd8f32", userId = NULL, 
            id = "88200b3e1bc22cb4b270f89810cd8f32:ff8081814db00698014db29fb80d0614", 
            time = 1437355324426, requestId = "1437355324426", propertyId = "ff8081814db00698014db29fb80d0614"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "2e9afe62406bedbf90d14fd22a6296de:ff80808149f1ccdc0149f4dd916500e2", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "2e9afe62406bedbf90d14fd22a6296de", userId = NULL, 
            id = "2e9afe62406bedbf90d14fd22a6296de:ff80808149f1ccdc0149f4dd916500e2", 
            time = 1437355340320, requestId = "1437355340320", propertyId = "ff80808149f1ccdc0149f4dd916500e2"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")), structure(list(
        `_index` = "organised", `_type` = "PROPERTY_VIEW", `_id` = "357c4a9917b47635a6dba600805861f4:ff80808149e225cd0149e2ba48d5009c", 
        `_version` = 1L, `_score` = 1, `_source` = structure(list(
            trackId = "357c4a9917b47635a6dba600805861f4", userId = NULL, 
            id = "357c4a9917b47635a6dba600805861f4:ff80808149e225cd0149e2ba48d5009c", 
            time = 1437355340832, requestId = "1437355340832", propertyId = "ff80808149e225cd0149e2ba48d5009c"), .Names = c("trackId", 
        "userId", "id", "time", "requestId", "propertyId"))), .Names = c("_index", 
    "_type", "_id", "_version", "_score", "_source")))
    [[1]]
    [[1]]$`_index`
    [1] "organised"
    
    [[1]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[1]]$`_id`
    [1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9"
    
    [[1]]$`_version`
    [1] 1
    
    [[1]]$`_score`
    [1] 1
    
    [[1]]$`_source`
    [[1]]$`_source`$trackId
    [1] "8e370fe75121cda0cccda2f1934c7051"
    
    [[1]]$`_source`$userId
    [1] "ff8081814ea04efe014ea6843b7f3d13"
    
    [[1]]$`_source`$id
    [1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814e5cc5af014e5ce7dff202a9"
    
    [[1]]$`_source`$time
    [1] 1437351878754
    
    [[1]]$`_source`$requestId
    [1] "1437351878754"
    
    [[1]]$`_source`$propertyId
    [1] "ff8081814e5cc5af014e5ce7dff202a9"
    
    
    
    [[2]]
    [[2]]$`_index`
    [1] "organised"
    
    [[2]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[2]]$`_id`
    [1] "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78"
    
    [[2]]$`_version`
    [1] 1
    
    [[2]]$`_score`
    [1] 1
    
    [[2]]$`_source`
    [[2]]$`_source`$trackId
    [1] "6b2eaf11c2e4ba6be7c3bd109d8905aa"
    
    [[2]]$`_source`$userId
    NULL
    
    [[2]]$`_source`$id
    [1] "6b2eaf11c2e4ba6be7c3bd109d8905aa:ff8081814d5bcfaa014d62b622352d78"
    
    [[2]]$`_source`$time
    [1] 1437351694070
    
    [[2]]$`_source`$requestId
    [1] "1437351694070"
    
    [[2]]$`_source`$propertyId
    [1] "ff8081814d5bcfaa014d62b622352d78"
    
    
    
    [[3]]
    [[3]]$`_index`
    [1] "organised"
    
    [[3]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[3]]$`_id`
    [1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b"
    
    [[3]]$`_version`
    [1] 1
    
    [[3]]$`_score`
    [1] 1
    
    [[3]]$`_source`
    [[3]]$`_source`$trackId
    [1] "84879c86112f53f1124a1f9cb83d6b37"
    
    [[3]]$`_source`$userId
    NULL
    
    [[3]]$`_source`$id
    [1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814d28f714014d31eb92a2210b"
    
    [[3]]$`_source`$time
    [1] 1437351931929
    
    [[3]]$`_source`$requestId
    [1] "1437351931929"
    
    [[3]]$`_source`$propertyId
    [1] "ff8081814d28f714014d31eb92a2210b"
    
    
    
    [[4]]
    [[4]]$`_index`
    [1] "organised"
    
    [[4]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[4]]$`_id`
    [1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571"
    
    [[4]]$`_version`
    [1] 1
    
    [[4]]$`_score`
    [1] 1
    
    [[4]]$`_source`
    [[4]]$`_source`$trackId
    [1] "84879c86112f53f1124a1f9cb83d6b37"
    
    [[4]]$`_source`$userId
    NULL
    
    [[4]]$`_source`$id
    [1] "84879c86112f53f1124a1f9cb83d6b37:ff8081814ca1cc06014ca2b9823c0571"
    
    [[4]]$`_source`$time
    [1] 1437351964188
    
    [[4]]$`_source`$requestId
    [1] "1437351964188"
    
    [[4]]$`_source`$propertyId
    [1] "ff8081814ca1cc06014ca2b9823c0571"
    
    
    
    [[5]]
    [[5]]$`_index`
    [1] "organised"
    
    [[5]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[5]]$`_id`
    [1] "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161"
    
    [[5]]$`_version`
    [1] 1
    
    [[5]]$`_score`
    [1] 1
    
    [[5]]$`_source`
    [[5]]$`_source`$trackId
    [1] "38212cb8dbd60c10d356fe30257932b4"
    
    [[5]]$`_source`$userId
    NULL
    
    [[5]]$`_source`$id
    [1] "38212cb8dbd60c10d356fe30257932b4:ff8081814d42cfff014d473b7f071161"
    
    [[5]]$`_source`$time
    [1] 1437353794879
    
    [[5]]$`_source`$requestId
    [1] "1437353794879"
    
    [[5]]$`_source`$propertyId
    [1] "ff8081814d42cfff014d473b7f071161"
    
    
    
    [[6]]
    [[6]]$`_index`
    [1] "organised"
    
    [[6]]$`_type`
    [1] "PROPERTY_VIEW"
    
    [[6]]$`_id`
    [1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535"
    
    [[6]]$`_version`
    [1] 1
    
    [[6]]$`_score`
    [1] 1
    
    [[6]]$`_source`
    [[6]]$`_source`$trackId
    [1] "91bbfe57428a47ce2233da1b5517b9a1"
    
    [[6]]$`_source`$userId
    [1] "ff8081814ea04efe014ea6843b7f3d13"
    
    [[6]]$`_source`$id
    [1] "ff8081814ea04efe014ea6843b7f3d13:ff8081814d891540014d8eac50142535"
    
    [[6]]$`_source`$time
    [1] 1437353798036
    
    [[6]]$`_source`$requestId
    [1] "1437353798036"
    
    [[6]]$`_source`$propertyId
    [1] "ff8081814d891540014d8eac50142535"
    

0 个答案:

没有答案