headers = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html))'}
reviews = []
ratings = []
amazon_url = 'https://www.amazon.com/Apple-MacBook-13-inch-128GB-Storage/product-reviews/B07V398CVJ/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews&pageNumber='
for i in range(1,60):
url = amazon_url + str(i)
response = requests.get(url, headers=headers, verify=False).text
soup = BeautifulSoup(response, "html.parser")
review_blocks = soup.find_all('div', attrs= {'data-hook':'review'})
for b in review_blocks:
review = b.find('div', attrs= {'class':'a-row a-spacing-small review-data'})
rating = b.find('i', attrs= {'data-hook':'review-star-rating'})
reviews.append(str(review))
ratings.append(str(rating)[36:37]) # The rating appears in the 36 position in the string
print(len(reviews))
print(len(reviews))
答案 0 :(得分:1)
如果您打印从亚马逊获得的响应之一,您将看到它们返回以下内容:
<h4>Enter the characters you see below</h4>
<p class="a-last">Sorry, we just need to make sure you're not a robot. For best results, please make sure your browser is accepting cookies.</p>
To discuss automated access to Amazon data please contact api-services-support@amazon.com.
For information about migrating to our APIs refer to our Marketplace APIs at https://developer.amazonservices.com/ref=rm_c_sv, or our Product Advertising API at https://affiliate-program.amazon.com/gp/advertising/api/detail/main.html/ref=rm_c_ac for advertising use cases.