还记得上一页的信息吗?

时间:2018-10-18 22:03:56

标签: python selenium web-scraping

我正在尝试刮除Bet365,因为这是现场足球比赛的赔率。我正在遍历所有可用的现场比赛。对于每场比赛,我都需要单击比赛,然后将转到一些新内容,其中包含所有详细的oddsinfo。从这里开始,当我继续进行迭代时,它崩溃了。

它抛出此错误:

Traceback (most recent call last):
  File "/Users/christian/Google Drev/Data Science/Bet365/main.py", line 32, in <module>
    getScoreH = game.find_element_by_css_selector(scoreH).text
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 430, in find_element_by_css_selector
    return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 654, in find_element
    {"using": by, "value": value})['value']
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 628, in _execute
    return self._parent.execute(command, params)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 320, in execute
    self.error_handler.check_response(response)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: chrome=69.0.3497.100)
  (Driver info: chromedriver=2.42.591059 (a3d9684d10d61aa0c45f6723b327283be1ebaad8),platform=Mac OS X 10.14.0 x86_64)

错误来自我main.py中的最后一个代码:

# HERE IT BREAKS!: 

# Redirects to a games detailed odds page 
game.find_element_by_css_selector(oddsBtn).click() 
time.sleep(5)

# Go back and keep choose the click the next games details.
obj.find_element(overview).click()
time.sleep(5)

下面是我的程序。但是正如我说的那样,问题出在main.py的最后几行代码中,我需要返回到迭代。似乎不记得我从哪里离开了。

有人可以帮助我吗?

cls_scraper.py:

"""
    Class to find element(s) by css selector
"""
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
import platform
import time
import os


class Scraper():
    def __init__(self, driver):
        self.driver = driver

    def wait(self, element):
        return WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, element)))

    def element_exist_css(self, element):
        try:
            self.driver.find_element_by_css_selector(element)
        except NoSuchElementException:
            print('Element doesnt exist')
            return False
        return True

    def element_css(self, element):

        try:
            time.sleep(2)
            return WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable(
                                                (By.CSS_SELECTOR, element)))
        except StaleElementReferenceException:
            print("XB: StaleElementReferenceException")
        except WebDriverException:
            print("XB: WebDriverException")

    def find_elements(self, element):

        time.sleep(2)
        return self.driver.find_elements_by_css_selector(element)

    def find_element(self, element):

        time.sleep(2)
        return self.driver.find_element_by_css_selector(element)

str_elements.py:

"""
    String library to have relevant css selector elements in one place.
"""

""" BET 365 - Overview-page """

# Enter the page
enterPage   = '#TopPromotionMainArea'

# Page with live odds
inPlay      = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Primary > div.hm-BigButtons > nav > a:nth-child(2)'

# Element containing relevent games and info about time, score etc, se below.
games       = 'div.ipo-FixtureRenderer.ipo-Competition_Container > div'
# For each game in games, these elements can be found:
teamH       = 'div.ipo-TeamStack > div:nth-child(1)'
teamA       = 'div.ipo-TeamStack > div:nth-child(2)'
scoreH      = 'div.ipo-TeamPoints_TeamScore.ipo-TeamPoints_TeamScore-teamone'
scoreA      = 'div.ipo-TeamPoints_TeamScore.ipo-TeamPoints_TeamScore-teamtwo'
gameTime    = 'div.ipo-InPlayTimer'

# The redirection in order to get all kinds of odds from a match
# Main overview-page only show a part of it.
oddsBtn     = 'div.ipo-FixtureEventCountButton_EventCountWrapper'

# The overview tab to see all the live games
overview    = 'div.ip-ControlBar > span.ip-ControlBar_ButtonBar > div:nth-child(1)'

# Choose english language             
langTab     = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Secondary > div.hm-HeaderModule_Menus > div.hm-LanguageDropDownSelections.hm-DropDownSelections > a'
pickEng     = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Secondary > div.hm-HeaderModule_Menus > div.hm-LanguageDropDownSelections.hm-DropDownSelections.hm-DropDownSelections_Selected > div > div > a:nth-child(1)'

# Get a better overview
allMarkets  = 'body > div:nth-child(1) > div > div.wc-PageView > div.wc-PageView_Main.wc-InPlayPage_MainContainer > div > div > div.ipo-OverViewView > div > div > div > div.ipo-OverViewDetail > div.ipo-OverViewDetail_Container.ipo-Classification > div.ipo-ClassificationHeader_Header.ipo-ClassificationHeader_Header-1.ipo-ClassificationHeader_Header-lightgreenborder.ipo-ClassificationHeader_Header-moremarkets > div.ipo-ClassificationHeader_MarketsButtonOuterWrapper > div > div.ipo-ClassificationHeader_MarketsButton.ipo-ClassificationHeader_MarketsButton-transparent'

""" BET 365 - Odds-page """
# Collect all the odds from the redirection page.

main.py:

""" Run program from here """
from str_elements import *
from cls_scraper  import *
from browser.path import *
import time

if __name__ == '__main__':

    print("Welcome \n")
    # Open website
    options = webdriver.ChromeOptions()
    driver = webdriver.Chrome(driver_path, chrome_options=options)
    driver.get('https://www.bet365.dk/#/HO/')

    """ Click relevant elements """
    print("Bet365: Pressing buttons ...")
    obj = Scraper(driver)
    obj.element_css(enterPage).click()  # Enters the bet365 main page
    obj.element_css(inPlay).click()     # Presses the in-play tab
    obj.element_css(langTab).click()    # Choose languages
    obj.element_css(pickEng).click()    # Choose english
    obj.element_css(overview).click()   # Shows all live games
    obj.element_css(allMarkets).click() # Better overview

    print("Bet365: Collecting game data ...")

    # All live games
    liveContainer = obj.find_elements(games)        # Contains a list of running games

    for game in liveContainer:

        getScoreH = game.find_element_by_css_selector(scoreH).text
        getScoreA = game.find_element_by_css_selector(scoreA).text
        getTeamH  = game.find_element_by_css_selector(teamH).text
        getTeamA  = game.find_element_by_css_selector(teamA).text
        getTime   = game.find_element_by_css_selector(gameTime).text

        print("Score:   ", getScoreH, "-", getScoreA)
        print("GameTime:", getTime)
        print("HomeTeam:", getTeamH)
        print("AwayTeam:", getTeamA)
        print("")

        ## HERE IT BREAKS!: 

        ## Redirects to a games detailed odds page 
        # game.find_element_by_css_selector(oddsBtn).click() 
        # time.sleep(5)

        ## Go back and keep choose the click the next games details.
        # obj.find_element(overview).click()
        # time.sleep(5)

0 个答案:

没有答案