我有以下python脚本
from bs4 import BeautifulSoup
import requests
home_dict = []
for year in range(2005, 2021):
if year == 2020:
for month in range(1, 6):
url = 'https://www.rebgv.org/market-watch/MLS-HPI-home-price-comparison.hpi.all.all.' + str(year) + '-' + str(month) + '-1.html';
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
home_table = soup.find('div', class_="table-wrapper")
for home in home_table.find_all('tbody'):
rows = home.find_all('tr')
for row in rows:
area = row.find('td').text;
benchmark = row.find_all('td')[1].text
priceIndex = row.find_all('td')[2].text
oneMonthChange = row.find_all('td')[3].text
sixMonthChange = row.find_all('td')[4].text
oneYearChange = row.find_all('td')[5].text
threeYearChange = row.find_all('td')[6].text
fiveYearChange = row.find_all('td')[7].text
propertyType = row.find_all('td')[8].text
year = year;
month = month;
home_obj = {
"Area": area,
"Benchmark": benchmark,
"Price Index": priceIndex,
"1 Month +/-": oneMonthChange,
"6 Month +/-": sixMonthChange,
"1 Year +/-": oneYearChange,
"3 Year +/-": threeYearChange,
"5 Year +/-": fiveYearChange,
"Property Type": propertyType,
"Report Month": month,
"Report Year": year
}
home_dict.append(home_obj)
else:
for month in range(1, 13):
url = 'https://www.rebgv.org/market-watch/MLS-HPI-home-price-comparison.hpi.all.all.' + str(year) + '-' + str(month) + '-1.html';
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
home_table = soup.find('div', class_="table-wrapper")
for home in home_table.find_all('tbody'):
rows = home.find_all('tr')
for row in rows:
area = row.find('td').text;
benchmark = row.find_all('td')[1].text
priceIndex = row.find_all('td')[2].text
oneMonthChange = row.find_all('td')[3].text
sixMonthChange = row.find_all('td')[4].text
oneYearChange = row.find_all('td')[5].text
threeYearChange = row.find_all('td')[6].text
fiveYearChange = row.find_all('td')[7].text
propertyType = row.find_all('td')[8].text
year = year;
month = month;
home_obj = {
"Area": area,
"Benchmark": benchmark,
"Price Index": priceIndex,
"1 Month +/-": oneMonthChange,
"6 Month +/-": sixMonthChange,
"1 Year +/-": oneYearChange,
"3 Year +/-": threeYearChange,
"5 Year +/-": fiveYearChange,
"Property Type": propertyType,
"Report Month": month,
"Report Year": year
}
home_dict.append(home_obj)
print(home_dict)
此脚本是通过网络抓取网站。如果年份是2020年,那么只能从1月到5月。其他年份,则从一月到十二月。
您可以告诉我们,如果在if-else条件语句中重复脚本的主体,是否有更简单的方法编写此脚本,以使其看起来更整洁而不重复自身?
答案 0 :(得分:1)
也许尝试使用try
子句?
for year in range(2005, 2021):
month in range(1, 13):
try:
<your code>
except:
continue
答案 1 :(得分:1)
只需定义一个dict
,将年作为键并将月范围作为值,
filter_ = {2020 : (1, 6)}
for year in range(2005, 2021):
start, stop = filter_.get(year, (1,13))
for month in range(start, stop):
url = 'https://www.rebgv.org/market-watch/MLS-HPI-home-price-comparison.hpi.all.all.' + str(
year) + '-' + str(month) + '-1.html'
r = requests.get(url)
...
答案 2 :(得分:0)
因为刮擦通常需要1到6个月。您可以先取消这些年份。然后如果年份不等于2020,则可以取消其余年份