我正在从电话簿中抓取数据,问题是它没有显示完整的html链接。它在čćžšđ字母之前停止,我的问题是如何在html链接中包括čćžšđ字母。
from urllib.request import urlopen
from bs4 import BeautifulSoup
url = 'http://www.imenik.hr/imenik/trazi/1/Osobe/sve' \
'/sve/vaznost/mjesto:vi%C5%A1njan.html'
r = urlopen(url).read()
page_soup = BeautifulSoup(r, 'html.parser')
for container in page_soup.find_all('div', class_ = 'list_naslov'):
base_url = 'http://www.imenik.hr'
for link in container.find_all('a', href=True):
print('href:',link['href'])
request_href = base_url + link['href'].replace(' ', '%20')
print(request_href)