focus_Search = raw_input("Focus Search ")
url = "https://www.google.com/search?q="
res = requests.get(url + focus_Search)
print("You Just Searched")
res_String = res.text
#Now I must get ALL the sections of code that start with "<a href" and end with "/a>"
我试图从谷歌搜索网页上抓取所有链接。我可以一次提取一个链接,但我确信这是一个更好的方法。
答案 0 :(得分:0)
这将创建搜索页面中所有链接的列表,其中包含一些代码,而无需进入BeautifulSoup
import requests
import lxml.html
focus_Search = input("Focus Search ")
url = "https://www.google.com/search?q="
#focus_Search
res = requests.get(url + focus_Search).content
# res
dom = lxml.html.fromstring(res)
links = [x for x in dom.xpath('//a/@href')] # Borrows from cheekybastard in link below
# http://stackoverflow.com/questions/1080411/retrieve-links-from-web-page-using-python-and-beautifulsoup
links