R中的并行化API(OSRM)

时间:2019-06-11 14:40:49

标签: r foreach httr osrm

我正在尝试使用R和foreach ping本地OSRM服务器以获取大量的源/目的地对(超过一百万)。这花费了很长时间(每秒仅获得约10 ping),并且想知道我可以采取什么措施来加快速度。

到目前为止,我的代码如下。这是在8核Linux机器上。

library(doParallel)
library(foreach)
library(tidyverse)

citiesdf <- maps::us.cities


origindf <- citiesdf %>% dplyr::select(origin_name = name,origin_longitude=long,origin_latitude=lat) %>% dplyr::mutate(key='k')
destinationdf <- citiesdf %>% dplyr::select(dest_name = name,dest_longitude=long,dest_latitude=lat) %>% dplyr::mutate(key='k')


od_df  <- origindf %>% dplyr::inner_join(destinationdf) %>% dplyr::mutate(

  api_call_text = paste('http://1.1.1.1:5000/route/v1/driving/' , origin_longitude,",", origin_latitude,";",dest_longitude,",",
dest_latitude,"?steps=false",sep='')) %>%
  dplyr::mutate(rownum=dplyr::row_number()) %>%
  dplyr::mutate(statuscode = 9999, distance=9999,time=9999) %>%
  dplyr::filter(rownum<=9999999)#used to limit the number of rows I am using for performance testing

cl <- makeCluster(10)
registerDoParallel(cl)

starttime <- Sys.time()

api_out <- foreach(i = 1:nrow(od_df)) %dopar% httr::GET(url = od_df$api_call_text[i],.packages=c('httr'))


apiendtime <- Sys.time()

#extract list values into the original dataframe
for(i in 1:nrow(od_df)){
  od_df$statuscode[i] <- api_out[[i]]$status_code
  if(od_df$statuscode[i] == 200){
    osrm_content <- jsonlite::fromJSON(base::rawToChar(api_out[[i]]$content))    
    #distance is in meters by default.  Muliply by 0.000621371 to convert to miles  
    od_df$distance[i] <-  osrm_content$routes$distance * 0.000621371  
    #time is in seconds by default  
    od_df$time[i] <- osrm_content$routes$duration * (1/60)
  } }

endtime <- Sys.time()

0 个答案:

没有答案