无法从谷歌浏览器中抓取链接

时间:2021-06-07 15:38:50

标签: python html hyperlink scrape

我的代码正在打开标签页,搜索主题并关闭,但它没有向我发送它应该收集的链接。

from selenium import webdriver

pesquisa = input ("o que você quer pesquisar: ")


def get_results(search_term):
    url = "https://www.startpage.com"
    driver = webdriver.Chrome()
    driver.get(url)
    search_box = driver.find_element_by_id("q")
    search_box.send_keys(search_term)
    search_box.submit()
    try:
        links = driver.find_elements_by_xpath("//ol[@class='web_regular_results']//div//a")
    except:
        links = driver.find_elements_by_xpath("//div//a")
    results = []
    for link in links:
        href = link.get_attribute("href")
        print(href)
        results.append(href)
    driver.close()
    return results
    
get_results(pesquisa)

1 个答案:

答案 0 :(得分:0)

from selenium import webdriver

pesquisa = input ("o que você quer pesquisar: ")


def get_results(search_term):
    url = "https://www.startpage.com"
    driver = webdriver.Chrome()
    driver.get(url)
    search_box = driver.find_element_by_id("q")
    search_box.send_keys(search_term)
    search_box.submit()
    try:
        links = driver.find_elements_by_xpath("//ol[@class='web_regular_results']//div//a")
    except:
        links = driver.find_elements_by_xpath("//div//a")
        pass            # Here you have to write pass to make following statements run
    results = []
    for link in links:
        href = link.get_attribute("href")
        print(href)
        results.append(href)
    driver.close()
    return results
    
get_results(pesquisa)

在try-except中,需要在异常块之后通过才能运行下面的语句。如果没有,它们将被取消。