我想从coursera网站上抓取课程名称和咆哮
这是我的代码
# Step 1
# Import libraries
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
# Step 2
# Create Generic URL retrieval function
def get_site_file(url):
"""
url - base url to access desired web file
"""
try:
html = urlopen(url)
bs = BeautifulSoup(html, 'html.parser')
except HTTPError as e:
print(e)
try:
discovery_course = bs.find("ul", {'class':'ais-InfiniteHits-list'})
except AttributeError as e:
print('Something seems to be missing with the tag')
return discovery_course
# Step 3
# Assign variable to Generic URL
# Coursera courses
page_content = get_site_file('https://www.coursera.org/courses')
# Step 5
# Parse Data
if page_content == None:
print('The file could not be found')
else:
courses = page_content.find_all('li', {'class': 'ais-InfiniteHits-item'})
for course in courses:
try:
course_title = course.h2.get_text()
course_rating = course.find('span',{'class': 'ratings-text'}).get_text()
print(f"Course Title: \t {course_title}")
print(f"Course Rating: \t {course_rating}")
print('\n'+('|')+('<'*3)+('-'*7)+' New Course ' +('-'*7)+('>'*3)+('|')+'\n')
except AttributeError as e:
print(e)
这段代码有一些错误......它返回输出为 找不到此文件 请帮助获得正确的输出