def custom_scrape(e1, master):
session = requests.Session()
# selectWikiPage = input("Please enter the Wikipedia page you wish to scrape from")
selectWikiPage = e1.get()
if "wikipedia" in selectWikiPage: #turn this into a regular expression
html = session.post(selectWikiPage)
bsObj = BeautifulSoup(html.text, "html.parser")
findReferences = bsObj.find('ol', {'class': 'references'}) # isolate refereces section of page
href = BeautifulSoup(str(findReferences), "html.parser")
links = [a["href"] for a in href.find_all("a", href=True)]
for link in links:
print("Link: " + link)
else:
print("Error: Please enter a valid Wikipedia URL")
正如您所看到的,我能够隔离维基百科页面的引用部分,但我不确定如何使其进入隔离链接,然后执行标题中提到的操作。
答案 0 :(得分:0)
您可以使用selenium驱动程序模拟点击这些链接。
<body>
<a href="hello.html" target="_self">Hello</a>
</body>
要点击链接hello.html,请执行此操作
link = driver.find_element_by_link_text('Details')
link.click()