我正试图弄清楚如何遍历多个页面来获得确实的公司评级。有任何想法吗???这是我完成的工作,以提取一页数据。
def parse(base_url):
base_url = requests.get('https://www.indeed.com/cmp/Google/reviews', timeout=5)
page_content = BeautifulSoup(base_url.content, 'lxml')
containers = page_content.findAll('div', {'class':'cmp-review-container'})
df = pd.DataFrame(columns = ['rating', 'rating_title', 'rating_description',
'rating_pros', 'rating_cons'])
for item in containers:
try:
rating = item.find('div', {'class': 'cmp-ratingNumber'}).text.replace('\n', '')
except:
rating = None
try:
rating_title = item.find('div', {'class': 'cmp-review-title'}).text.replace('\n', '')
except:
rating_title = None
try:
rating_description = item.find('span', {'class': 'cmp-review-text'}).text.replace('\n', '')
except:
rating_description = None
try:
rating_pros = item.find('div', {'class': 'cmp-review-pro-text'}).text.replace('\n', '')
except:
rating_pros = None
try:
rating_cons = item.find('div', {'class': 'cmp-review-con-text'}).text.replace('\n', '')
except:
rating_cons = None
df = df.append({'rating': rating, 'rating_title': rating_title, 'rating_description': rating_description,
'rating_pros': rating_pros, 'rating_cons': rating_cons}, ignore_index=True)
return df