作业不会写入CSV或从抓取中打印行

时间:2017-10-29 12:06:06

标签: python python-3.x csv selenium web-scraping

我正在尝试提取数据并在下面的代码中写入csv。据我所知,Selenium倾向于打印所有元素,除了它似乎没有打印行或写入CSV。我过去曾遇到过这个问题,但通常可以通过重写这份工作轻松解决这个问题。我正在使用Windows。不幸的是,我不知道为什么会这样做,因为这项工作在功能方面起作用。

我的代码如下:

driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()

try:
    os.remove('vtg121.csv')
except OSError:
    pass

driver.get('https://www.topbetta.com.au/sports/football/')

SCROLL_PAUSE_TIME = 0.5


last_height = driver.execute_script("return document.body.scrollHeight")

while True:

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")


    time.sleep(SCROLL_PAUSE_TIME)


    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

time.sleep(1)

url = "https://www.topbetta.com.au/sports/football/"
driver.get(url)

counter = 0
for link in range(len(wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@href="/sports" and ./div[@class="name"]]'))))):
    wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask")))
    link = wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@href="/sports" and ./div[@class="name"]]')))[counter]
    link.location_once_scrolled_into_view
    link = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '(//a[@href="/sports" and ./div[@class="name"]])[%s]' % str(counter + 1))))
    wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask")))
    header = driver.find_element_by_tag_name('header')
    header = driver.execute_script('arguments[0].hidden="true";', header)
    header = driver.find_element_by_xpath('//*[@id="js_body-region"]/div/div[1]/div[2]/div[2]/div/div[3]')
    header = driver.execute_script('arguments[0].hidden="true";', header)
    link.click()
    print(driver.current_url)
    wait(driver, 10).until(EC.staleness_of(driver.find_element(By.XPATH, '//div[@class="competition-events-module"]')))
    time.sleep(2)

    time.sleep(2)

    # link


    # Team ODDS
    langs = driver.find_elements_by_css_selector(".team-container.home div")
    langs_text = []

    for lang in langs:
        print(lang.text)
        langs_text.append(lang.text)

    time.sleep(0)

    # BACK TEAM
    #langs1 = driver.find_elements_by_xpath("//ul[@class='runners']//li[2]")
    langs1 = driver.find_elements_by_css_selector(".home .price")
    langs1_text = []

    for lang in langs1:
        print(lang.text)
        langs1_text.append(lang.text)


    # Draw Odds
    #langs2 = driver.find_elements_by_xpath("//ul[@class='runners']//li[1]")
    langs2 = driver.find_elements_by_css_selector("td.draw.odds > div > a > span")
    langs2_text = []

    for lang in langs2:
        print(lang.text)
        langs2_text.append(lang.text)

    with open('vtg121.csv', 'a', newline='', encoding="utf-8") as outfile:
        writer = csv.writer(outfile)
        for row in zip(langs_text, langs1_text, langs2_text):
            print(row)
            writer.writerow(row)
    counter += 1
    driver.get(url)

0 个答案:

没有答案