我正在为以下页面创建刮板:https://www.oddsportal.com。由于页面使用JavaScript渲染,因此我决定使用Rselenium。我的目标是为今年的每场比赛争取赔率。我使用登录表格作为页面,因为我设置了自己的博彩公司来显示这些匹配项。我已经为这些比赛抓取了5万个URL,现在我使用Rselenium打开每个URL并抓取特定数据。我想知道是否有更好的解决方案来解决这个问题,因为我的脚本花费的时间主要是remDr $ navigate(url)部分。我还尝试了更快的启动程序包,但是我无法登录并看到我需要的庄家。也尝试过webdriver软件包,但无法设置useragent,以免出现404错误。我使用findElements,但是如果导航到URL并用html_nodes抓取后呈现页面,则可以节省一些时间。我也尝试禁用css,但在R中找不到phantomjs或无头chromedriver的任何有效解决方案。谢谢您的提前答复。到目前为止,这是我用于测试20个URL的脚本:
pjs <- wdman::phantomjs()
eCap <- list(phantomjs.page.settings.userAgent
= "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0", phantomjs.page.settings.loadImages = FALSE, phantomjs.phantom.cookiesEnabled = FALSE, phantomjs.phantom.javascriptEnabled = TRUE)
remDr <- remoteDriver(browserName = "phantomjs", port = 4567L, extraCapabilities = eCap)
remDr$open()
#login to webpage
remDr$navigate("https://www.oddsportal.com/results/#soccer")
remDr$findElement('name', 'login-submit')$clickElement()
remDr$findElement(using = 'css selector', "#login-username1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', "#login-password1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', '#col-content > div:nth-child(3) > div > form > div:nth-child(3) > button')$clickElement()
#loop through the URL adresses and get the odds with results
while(i<=20){
url<-links1$links[i]
remDr$navigate(url)
# odds for 18Bet
if(length(remDr$findElements('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$bet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$bet1[i]<-0
odds$betx[i]<-0
odds$bet2[i]<-0
}
# odds for 1xBet
if(length(remDr$findElements('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$xBet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$xBetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$xBet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$xBet1[i]<-0
odds$xBetx[i]<-0
odds$xBet2[i]<-0
}
# odds for Asianodds
if(length(remDr$findElements('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Asianodds1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Asianoddsx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Asianodds2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Asianodds1[i]<-0
odds$Asianoddsx[i]<-0
odds$Asianodds2[i]<-0
}
# odds for bet-at-home
if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$betathome1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betathomex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$betathome2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$betathome1[i]<-0
odds$betathomex[i]<-0
odds$betathome2[i]<-0
}
# odds for bet365
if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Bet3651[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Bet365x[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Bet3652[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Bet3651[i]<-0
odds$Bet365x[i]<-0
odds$Bet3652[i]<-0
}
# odds for bwin
if(length(remDr$findElements('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$bwin1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$bwinx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bwin2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$bwin1[i]<-0
odds$bwinx[i]<-0
odds$bwin2[i]<-0
}
# odds for Chance.cz
if(length(remDr$findElements('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Chance1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Chancex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Chance2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Chance1[i]<-0
odds$Chancex[i]<-0
odds$Chance2[i]<-0
}
# odds for iFortuna.sk
if(length(remDr$findElements('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$iFortuna1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$iFortunax[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$iFortuna2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$iFortuna1[i]<-0
odds$iFortunax[i]<-0
odds$iFortuna2[i]<-0
}
# odds for Marathonbet
if(length(remDr$findElements('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Marathonbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Marathonbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Marathonbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Marathonbet1[i]<-0
odds$Marathonbetx[i]<-0
odds$Marathonbet2[i]<-0
}
# odds for MAXITIP.cz
if(length(remDr$findElements('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$MAXITIP1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$MAXITIPx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$MAXITIP2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$MAXITIP1[i]<-0
odds$MAXITIPx[i]<-0
odds$MAXITIP2[i]<-0
}
# odds for Pinnacle
if(length(remDr$findElements('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Pinnacle1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Pinnaclex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Pinnacle2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Pinnacle1[i]<-0
odds$Pinnaclex[i]<-0
odds$Pinnacle2[i]<-0
}
# odds for SAZKAbet.cz
if(length(remDr$findElements('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$SAZKAbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$SAZKAbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$SAZKAbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$SAZKAbet1[i]<-0
odds$SAZKAbetx[i]<-0
odds$SAZKAbet2[i]<-0
}
# odds for Tipsport.sk
if(length(remDr$findElements('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$Tipsport1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Tipsportx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Tipsport2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
odds$Tipsport1[i]<-0
odds$Tipsportx[i]<-0
odds$Tipsport2[i]<-0
}
#country, league, match, results
odds$match[i] <-remDr$findElement('xpath','//*[@id="col-content"]/h1')$getElementText()
odds$krajina[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[3]')$getElementText()
odds$liga[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[4]')$getElementText()
if(length(remDr$findElements('xpath', '//*[@id="event-status"]/p/strong'))!=0){
odds$result[i] <-remDr$findElement('xpath', '//*[@id="event-status"]/p/strong')$getElementText()
}else{odds$result[i]=0}
i<-i+1
}