无法使用python

时间:2017-11-23 08:50:19

标签: python selenium ubuntu

我无法打开浏览器网站,导致整个代码无法运行。我无法弄清楚问题,因为终端上也没有打印出错误。 我的代码需要使用xPath从网站中提取事件详细信息,并需要遍历主页面并获取所有事件详细信息的列表。

非常感谢您的帮助!

    import sys
    import os
    import selenium 
    import csv
    import contextlib
    import time
    from selenium import webdriver
    from time import sleep
    from collections import defaultdict
    from selenium.common.exceptions import NoSuchElementException,TimeoutException
    from selenium.webdriver.support.ui import WebDriverWait

    try:
       from urlparse import urljoin
       from urllib import urlretrieve
    except ImportError:
       from urllib.parse import urljoin
       from urllib.request import urlretrieve

    host = "http://www.sportshub.com.sg/sportshubtix/Pages/Home.aspx"

    #xPath & CSS selectors
    xpath_load_more_button = "//*[@id='togglercal-CalListslide1']/span[1]"
    name = '//*[@id="eventCalendarScroll"]/div[2]/div[2]/ul/div[1]/div/dl/dt/a'
    date = '//*[@id="eventCalendarScroll"]/div[2]/div[2]/ul/div[1]/div/dl/dd[1]/div'
    venue = '//*[@id="eventCalendarScroll"]/div[2]/div[2]/ul/div[1]/div/dl/dd[2]/div'
    description = '//*[@id="eventCalendarScroll"]/div[2]/div[2]/ul/div[1]/div/dl/dd[3]/div'

    class Crawler(object):
        def __init__(self):
             self.data = defaultdict(list)

        def start(self):
           self.driver = webdriver.Firefox()
           #self.driver.implicitly_wait(1)

        def load_main_page(self):
            self.driver.get(host)

        def load_page(self, url):
            self.driver.get(url)

        def load_more_page(self):
           try:
               self.driver.find_element_by_xpath(xpath_load_more_button).click()
            return True
        except (NoSuchElementException):
            return False

    def get_name(self):
        try:
            event_name = [n.text for n in self.driver.find_element_xpath(name)]
            self.data["event_names"] = self.remove_duplicate(self.data["event_names"] + event_name)
        except:
           print("event invalid")

    def remove_duplicate(self, x):
        result = []
        for i in x:
            if i != "":
               if i not in result:
                result.append(i)


    def crawler(self, event):
        self.data["event_names"] = event
        #self.data["dates"] = []
        #self.data["time"] = []
        #self.data["venues"] = []
        #self.data["description"] = []
        print('Crawling Events = {}'.format(event))
        self.load_main_page()

def main():
    crawler = Crawler()
    crawler.start()

main()

1 个答案:

答案 0 :(得分:0)

在启动功能中,添加self.crawler()。 然后该函数将在调用驱动程序变量后启动。然后浏览器将转到URL