我正试图从网页抓取所有链接,但我正在运行的代码似乎只是找到了一些选择。
def main():
try:
import urllib.request as urllib2
except:
import urllib2
from bs4 import BeautifulSoup
redditFile = urllib2.urlopen("http://www.indeed.com/q-biomedical-Engineer-l-boston,-MA-jobs.html")
redditHtml = redditFile.read()
redditFile.close()
soup = BeautifulSoup(redditHtml,'html.parser')
#print(soup.prettify())
redditAll = soup.find_all("a")
for links in soup.find_all('a'):
print (links.get('href'))
main()