我写了一个代码,用于从airbnb.com抓取数据,实际上我想抓取所有带有详细信息的评论,例如列表名称,总评论,评论,评论者姓名,日期,评论,但我的代码不执行直接执行的try部分除了一部分。请指导我如何解决此问题。谢谢! 这是我的代码:
import requests
from bs4 import BeautifulSoup
#import pandas as pd
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'html.parser') # 1. html , 2. parser
return soup
def get_detail_data(soup):
try:
title = soup.find_all('span',class_="_18hrqvin",id=False).text
except:
title = 'empty'
print(title)
try:
reviews = soup.find_all('div',class_="_10za72m2",id=False).text
except:
reviews = 'empty revies'
print(reviews)
try:
total_reviews = soup.find_all('span',class_="_krjbj",id=False).text
except:
total_reviews = 'empty total revies'
print(total_reviews)
try:
total_reviews = soup.find_all('span',class_="_krjbj",id=False).text
except:
total_reviews = 'empty total revies'
print(total_reviews)
try:
commenter_name = soup.find_all('div',class_="_1p3joamp",id=False).text
except:
commenter_name = 'empty commenter_name'
print(commenter_name)
try:
comment_date = soup.find_all('span',class_="_1jlnvra2",id=False).text
except:
comment_date = 'empty comment_date'
print(comment_date)
try:
comment_date = soup.find_all('span',class_="_1jlnvra2",id=False).text
except:
comment_date = 'empty comment_date'
print(comment_date)
try:
comment = soup.find_all('div',class_="_czm8crp",id=False).text
except:
comment = 'empty comment'
print(comment)
def main():
url = "https://www.airbnb.com/rooms/34826867?source_impression_id=p3_1584615891_nVK823DKHNHuFWCQ"
get_detail_data(get_page(url))
if __name__ == '__main__':
main()
答案 0 :(得分:0)
如@arcticsanto所建议,如果发现响应不正确,则get_page
返回None
,因此get_detail_data
无法获得BeautifulSoup,因此只需在get_detail_data
中添加汤的验证即可
def get_page(url):
response = requests.get(url)
if not response.ok:
print('server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'html.parser') # 1. html , 2. parser
return soup
def get_detail_data(soup):
if not soup:
return
----