我有一个大数据集(> 40 000行),我想使用API通过发送带有2个参数的查询来获取每一行的代码。
一切正常,但速度很慢(每行约1.5秒),到达第3401行时出现错误消息:
Error in curl::curl_fetch_memory(url, handle = handle) :
Timeout was reached: Operation timed out after 10969 milliseconds with 0 out of 0 bytes received
library(tidyverse)
library(jsonlite)
library(httr)
# example dataset ---------------------------------------------------------
data_example = tibble(ADRESSE = c("20 Rue de la Tour d'Auvergne",
"21 Rue Lucien Sampaix",
"1 Parvis Notre-Dame"),
CP = c("75009",
"75010",
"75004"))
# function ----------------------------------------------------------------
get_my_iris = function(adresse , cp ) {
print(Sys.time())
tt = httr::VERB(
verb = "GET", url = "https://pyris.datajazz.io/api/search/",
httr::add_headers(accept = "application/json"),
query = list(
geojson = "false",
q = adresse, postcode = cp
)
)
text = content(tt,as="text",encoding = "UTF-8")
df = fromJSON(text, flatten = TRUE) %>% data.frame()
if(length(df) > 0){
return(as.character(df$complete_code))
} else {
return("NO CODE")
}
}
# purrr -------------------------------------------------------------------
map2_chr(data_example$ADRESSE,data_example$CP, ~get_my_iris(adresse = .x, cp =.y))
有没有办法加快这项任务?