使用RVest进行网页抓取时无法获得结果

时间:2019-12-19 14:03:21

标签: r rvest

我想从VISA ATM locator获取西班牙ATM的位置。西班牙的结果显示在表格中,但我不知道如何访问这些元素。我尝试过:

link <- "https://www.visa.com/atmlocator/mobile/index.jsp#(page:results,params:(query:Spain))"
visa_webpage <- read_html(link)
  visa_webpage %>%
  html_nodes("visaATMResultListItem") %>%
  html_text()

1 个答案:

答案 0 :(得分:1)

从浏览器发出的jquery请求中动态检索数据。您可以使用其简化版本来检索数据

library(httr)
library(stringr)
library(jsonlite)

headers = c('User-Agent' = 'Mozilla/5.0')

params = list(
  'callback' = 'jQuery112403101782845756018_1577837576284',
  'request' = '{"wsRequestHeaderV2":{"requestTs":"","applicationId":"VATMLOC","requestMessageId":"test12345678","userId":"CDISIUserID","userBid":"10000108","correlationId":"909420141104053819418"},"requestData":{"culture":"en-US","distance":"60","distanceUnit":"mi","metaDataOptions":0,"location":{"address":null,"placeName":"Spain","geocodes":{"latitude":"40.227949660000036","longitude":"-3.6460631049999392"}},"options":{"sort":{"primary":"distance","direction":"asc"},"range":{"start":0,"count":8},"operationName":"and","findFilters":[{"filterName":"PLACE_NAME","filterValue":""},{"filterName":"CARD_ACCEPT","filterValue":""},{"filterName":"OPER_HRS","filterValue":""},{"filterName":"AIRPORT_CD","filterValue":""},{"filterName":"WHEELCHAIR","filterValue":""},{"filterName":"BRAILLE_AUDIO","filterValue":""},{"filterName":"BALANCE_INQUIRY","filterValue":""},{"filterName":"CHIP_CAPABLE","filterValue":""},{"filterName":"PIN_CHANGE","filterValue":""},{"filterName":"RESTRICTED","filterValue":""},{"filterName":"PLUS_ALLIANCE_NO_SURCHARGE_FEE","filterValue":""},{"filterName":"ACCEPTS_PLUS_SHARED_DEPOSIT","filterValue":""},{"filterName":"V_PAY_CAPABLE","filterValue":""},{"filterName":"READY_LINK","filterValue":""}],"useFirstAmbiguous":true}}}',
  '_' = '1577837576288'
)

r <- httr::GET(url = 'https://www.visa.com/atmlocator_services/rest/findNearByATMs', httr::add_headers(.headers=headers), query = params)


data <- jsonlite::fromJSON(str_match(r%>%toString() , 'jQuery112403101782845756018_1577837576284\\((.*)\\)' )[1,2])
locations <- data.frame(data$responseData$foundATMLocations[1])