def proba():
html_file = urllib.request.urlopen('https://www.biznesradar.pl/wskazniki-zadluzenia/06N')
soup = BeautifulSoup(html_file, 'lxml')
fields = [item['data-field'] for item in soup.find_all('tr', attrs = {'data-field': True})]
row_data = []
dict_of_list = {}
for field in fields:
for war in soup.find(attrs = {f'data-field': '{}'.format(field)}).find_all('td', class_ = 'h'):
[row_data.append(float(i.text.replace(' ', ''))) for i in war.find_all('span', class_ = 'value')]
for field in enumerate(fields):
dict_of_list["zadl_{}".format(field[1])] = np.array_split(row_data, len(fields))[field[0]].astype(np.float)
我想将此空表数据作为None或np.nan进行刮擦,以使刮擦数据中的行数相等: HTML看起来像这样:
例如,当我寻找不存在的属性时,例如:
x = soup.find(attrs = {f'data-field': 'LDER'}).tr
,python返回NoneType对象,所以我尝试了这种情况:
[row_data.append(float(i.text.replace(' ', ''))) if war.span is not None else row_data.append(np.nan) for i in war.find_all('span', class_ = 'value')]
但是它不能正常工作:(。 有什么想法吗?
答案 0 :(得分:0)
dict_of_list = {}
def proba():
html_file = urllib.request.urlopen('https://www.biznesradar.pl/wskazniki-zadluzenia/06N')
soup = BeautifulSoup(html_file, 'lxml')
fields = [item['data-field'] for item in soup.find_all('tr', attrs = {'data-field': True})]
row_data = []
for field in fields:
for war in soup.find(attrs = {f'data-field': '{}'.format(field)}).find_all('td', class_ = 'h'):
if war.span is None:
row_data.append(np.nan)
else:
row_data.append(float(war.span.text.replace(' ', '')))
for field in enumerate(fields):
dict_of_list["zadl_{}".format(field[1])] = np.array_split(row_data, len(fields))[field[0]].astype(np.float)