我正在尝试使用R和foreach ping本地OSRM服务器以获取大量的源/目的地对(超过一百万)。这花费了很长时间(每秒仅获得约10 ping),并且想知道我可以采取什么措施来加快速度。
到目前为止,我的代码如下。这是在8核Linux机器上。
library(doParallel)
library(foreach)
library(tidyverse)
citiesdf <- maps::us.cities
origindf <- citiesdf %>% dplyr::select(origin_name = name,origin_longitude=long,origin_latitude=lat) %>% dplyr::mutate(key='k')
destinationdf <- citiesdf %>% dplyr::select(dest_name = name,dest_longitude=long,dest_latitude=lat) %>% dplyr::mutate(key='k')
od_df <- origindf %>% dplyr::inner_join(destinationdf) %>% dplyr::mutate(
api_call_text = paste('http://1.1.1.1:5000/route/v1/driving/' , origin_longitude,",", origin_latitude,";",dest_longitude,",",
dest_latitude,"?steps=false",sep='')) %>%
dplyr::mutate(rownum=dplyr::row_number()) %>%
dplyr::mutate(statuscode = 9999, distance=9999,time=9999) %>%
dplyr::filter(rownum<=9999999)#used to limit the number of rows I am using for performance testing
cl <- makeCluster(10)
registerDoParallel(cl)
starttime <- Sys.time()
api_out <- foreach(i = 1:nrow(od_df)) %dopar% httr::GET(url = od_df$api_call_text[i],.packages=c('httr'))
apiendtime <- Sys.time()
#extract list values into the original dataframe
for(i in 1:nrow(od_df)){
od_df$statuscode[i] <- api_out[[i]]$status_code
if(od_df$statuscode[i] == 200){
osrm_content <- jsonlite::fromJSON(base::rawToChar(api_out[[i]]$content))
#distance is in meters by default. Muliply by 0.000621371 to convert to miles
od_df$distance[i] <- osrm_content$routes$distance * 0.000621371
#time is in seconds by default
od_df$time[i] <- osrm_content$routes$duration * (1/60)
} }
endtime <- Sys.time()