我试图通过在页面上导航来下载多个PDF。即使我能够使用下拉菜单浏览页面并最后下载PDF。我收到此错误:
元素命令失败,因为所引用的元素不再附加到DOM。
下面是我的代码:
library(RSelenium)
library(stringr)
rd<-rsDriver()
remDr<-rd[["client"]]
remDr$navigate("http://secc.gov.in/lgdStateList")
#First drop down
stateEle<-remDr$findElement("id", "lgdState")
states<-stateEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% str_trim('left')
states<-states[-1]
for (i in 1:length(states)) {
stateEle$clickElement()
stateEle$sendKeysToElement(list(states[i]))
stateEle$clickElement()
#Second drop down
distEle<-remDr$findElement("id", "lgdDistrict")
districts<-distEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist%>%str_trim('left')
districts<-districts[-1]
for (j in 1:length(districts)) {
distEle$clickElement()
distEle$sendKeysToElement(list(districts[[j]]))
distEle$clickElement()
#Third drop down
blockEle<-remDr$findElement("id", "lgdBlock")
block<-blockEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist%>%str_trim('left')
block<-block[-1]
for (k in 1:length(block)) {
blockEle$clickElement()
blockEle$sendKeysToElement(list(block[[k]]))
blockEle$clickElement()
gpEle<-remDr$findElements('class', 'statesrow')
for (m in 1:length(gpEle)) {
h<-unlist(gpEle[[m]]$getElementAttribute('innerHTML'))
h<-unlist(h%>% strsplit(., '<td>'))
h<-h[-1]
for (n in 1:length(h)) {
xpath1<-paste('//*[@id="example"]/tbody/tr[',m,']/td[',n,']/a')
pdfEle<-remDr$findElement('xpath', xpath1)
pdfEle$clickElement()
Sys.sleep(5)
}
}
}
}
}
答案 0 :(得分:1)
根据您的要求
library(rvest)
url<-"http://secc.gov.in/lgdStateList"
page<-html_session(url)
## STATE LOOP ##
state <- html_nodes(page,css="#lgdState > option") %>% html_text()
state <- state[-1]
state_id <- html_nodes(page,css="#lgdState > option") %>% html_attr('value')
state_id <- state_id[-1]
for(i in 1:length(state)){
page1<-rvest:::request_POST(page, url="http://secc.gov.in/lgdDistrictList",
body=list(
"stateCode"=state_id[i]
),
encode="form")
## DISTRICT LOOP ##
district <- html_nodes(page1,css="#lgdDistrict > option") %>% html_text()
district <- district[-1]
district_id <- html_nodes(page1,css="#lgdDistrict > option") %>% html_attr('value')
district_id <- district_id[-1]
for(j in 1:length(district)){
page2<-rvest:::request_POST(page1,url="http://secc.gov.in/lgdBlockList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j]
),
encode = "form")
## BLOCK LOOP ##
block <- html_nodes(page2, css="#lgdBlock > option") %>% html_text()
block <- block [-1]
block_id <- html_nodes(page2, css="#lgdBlock > option") %>% html_attr('value')
block_id <- block_id[-1]
for(k in 1:length(block)){
page3<-rvest:::request_POST(page2,url="http://secc.gov.in/lgdGpList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k]
),
encode = "form")
txt <- html_nodes(page3,css="#example a") %>% html_attr("onclick")
library(stringr)
gpcode<-sapply(txt,function(x){
k <- str_extract_all(x, "\\([^()]+\\)")[[1]]
k <- substring(k, 2, nchar(k)-1)
regexp <- "[[:digit:]]+"
k <- str_extract(strsplit(k, ",")[[1]][4], regexp)
})
## GP CODE LOOP to download file ##
for(l in 1:length(gpcode)){
page4<-rvest:::request_POST(page3,url="http://secc.gov.in/downloadLgdwisePdfFile",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k],
"gpCode"=gpcode[l]
),
encode = "form")
error = "PDF File for this Gram Panchayat is not available."
error_displayed = try(html_nodes(page4,css=".error") %>% html_text())
if(error != error_displayed){
filename<-gsub("attachment;filename=","",page4$response$headers$`content-disposition`)
filename<-str_replace_all(filename, '"', "")
writeBin(page4$response$content,filename)
}
}
}
}
}
这也是没有RSelenium的情况。 :)