更快的替代硒

时间:2019-04-08 05:56:40

标签: r web-scraping phantomjs rselenium

我正在为以下页面创建刮板:https://www.oddsportal.com。由于页面使用JavaScript渲染,因此我决定使用Rselenium。我的目标是为今年的每场比赛争取赔率。我使用登录表格作为页面,因为我设置了自己的博彩公司来显示这些匹配项。我已经为这些比赛抓取了5万个URL,现在我使用Rselenium打开每个URL并抓取特定数据。我想知道是否有更好的解决方案来解决这个问题,因为我的脚本花费的时间主要是remDr $ navigate(url)部分。我还尝试了更快的启动程序包,但是我无法登录并看到我需要的庄家。也尝试过webdriver软件包,但无法设置useragent,以免出现404错误。我使用findElements,但是如果导航到URL并用html_nodes抓取后呈现页面,则可以节省一些时间。我也尝试禁用css,但在R中找不到phantomjs或无头chromedriver的任何有效解决方案。谢谢您的提前答复。到目前为止,这是我用于测试20个URL的脚本:

pjs <- wdman::phantomjs()

eCap <- list(phantomjs.page.settings.userAgent 
             = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0", phantomjs.page.settings.loadImages = FALSE, phantomjs.phantom.cookiesEnabled = FALSE, phantomjs.phantom.javascriptEnabled = TRUE)

remDr <- remoteDriver(browserName = "phantomjs", port = 4567L, extraCapabilities = eCap)
remDr$open()

#login to webpage
remDr$navigate("https://www.oddsportal.com/results/#soccer")
remDr$findElement('name', 'login-submit')$clickElement()
remDr$findElement(using = 'css selector', "#login-username1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', "#login-password1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', '#col-content > div:nth-child(3) > div > form > div:nth-child(3) > button')$clickElement()


#loop through the URL adresses and get the odds with results

while(i<=20){

url<-links1$links[i]
remDr$navigate(url)
# odds for 18Bet
if(length(remDr$findElements('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$bet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$bet1[i]<-0
  odds$betx[i]<-0
  odds$bet2[i]<-0
}

# odds for 1xBet

if(length(remDr$findElements('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$xBet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$xBetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$xBet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$xBet1[i]<-0
  odds$xBetx[i]<-0
  odds$xBet2[i]<-0
}

# odds for Asianodds

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Asianodds1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Asianoddsx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Asianodds2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Asianodds1[i]<-0
  odds$Asianoddsx[i]<-0
  odds$Asianodds2[i]<-0
}

# odds for bet-at-home

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$betathome1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betathomex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$betathome2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$betathome1[i]<-0
  odds$betathomex[i]<-0
  odds$betathome2[i]<-0
}

# odds for bet365

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Bet3651[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Bet365x[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Bet3652[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Bet3651[i]<-0
  odds$Bet365x[i]<-0
  odds$Bet3652[i]<-0
}

# odds for bwin

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$bwin1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$bwinx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bwin2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$bwin1[i]<-0
  odds$bwinx[i]<-0
  odds$bwin2[i]<-0
}

# odds for Chance.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Chance1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Chancex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Chance2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Chance1[i]<-0
  odds$Chancex[i]<-0
  odds$Chance2[i]<-0
}

# odds for iFortuna.sk

if(length(remDr$findElements('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$iFortuna1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$iFortunax[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$iFortuna2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$iFortuna1[i]<-0
  odds$iFortunax[i]<-0
  odds$iFortuna2[i]<-0
}

# odds for Marathonbet

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Marathonbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Marathonbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Marathonbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Marathonbet1[i]<-0
  odds$Marathonbetx[i]<-0
  odds$Marathonbet2[i]<-0
}

# odds for MAXITIP.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$MAXITIP1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$MAXITIPx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$MAXITIP2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$MAXITIP1[i]<-0
  odds$MAXITIPx[i]<-0
  odds$MAXITIP2[i]<-0
}

# odds for Pinnacle

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Pinnacle1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$Pinnaclex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$Pinnacle2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Pinnacle1[i]<-0
  odds$Pinnaclex[i]<-0
  odds$Pinnacle2[i]<-0
}

# odds for SAZKAbet.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$SAZKAbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$SAZKAbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$SAZKAbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$SAZKAbet1[i]<-0
  odds$SAZKAbetx[i]<-0
  odds$SAZKAbet2[i]<-0
}

# odds for Tipsport.sk

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Tipsport1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$Tipsportx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$Tipsport2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Tipsport1[i]<-0
  odds$Tipsportx[i]<-0
  odds$Tipsport2[i]<-0
  }

#country, league, match, results

odds$match[i] <-remDr$findElement('xpath','//*[@id="col-content"]/h1')$getElementText()
odds$krajina[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[3]')$getElementText()
odds$liga[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[4]')$getElementText()

if(length(remDr$findElements('xpath', '//*[@id="event-status"]/p/strong'))!=0){
  odds$result[i] <-remDr$findElement('xpath', '//*[@id="event-status"]/p/strong')$getElementText()
}else{odds$result[i]=0}  

i<-i+1
}

0 个答案:

没有答案