我想解析一个网站,所以我写了两个版本的解析器(一个带有asyncio,另一个没有)。但是异步代码执行的代码与没有异步解析器的代码更多或相同。俄语代码中的一些细节认为没有问题。我今天开始使用asyncio。帮我。
此代码带有asyncio:
import asyncio
from bs4 import BeautifulSoup
from urllib.request import *
import pprint
import time
url = "https://besmart.kz"
def get_html(url):
req = Request(url)
html = urlopen(req).read()
return html
async def get_stock_data(i):
html = get_html(url + i['href'])
soup = BeautifulSoup(html, 'html.parser')
stock_data = {}
try:
stock_data["Old price"] = soup.find('span', class_='line-through red').find('span', class_='text-muted greyColor').text.strip().replace('\u2009','')
except:
stock_data["Old price"] = "Отсутствует"
try:
stock_data["Price"] = soup.find('div', id='calc-price', class_='price').text.strip().replace('\u2009','')
except:
stock_data["Price"] = "Ошибка"
try:
stock_data["Title"] = soup.find('div', class_='title').find('h1', itemprop='name').text.strip().replace('\u2009','')
except:
stock_data["Title"] = "Ошибка"
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(stock_data)
if __name__ == "__main__":
opener = build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
install_opener(opener)
stock_list = []
for i in range(1,4):
html = get_html(url + "/?page=" + str(i))
soup = BeautifulSoup(html, 'html.parser')
stock_list.extend(soup.find_all('a', class_='deal__discount-kz'))
ioloop = asyncio.get_event_loop()
try:
start = time.time()
coroutines = [ioloop.create_task(get_stock_data(i)) for i in stock_list]
ioloop.run_until_complete(asyncio.wait(coroutines))
finally:
ioloop.close()
print(f"Время выполнения: {time.time() - start}")
这种情况没有:
import asyncio
from bs4 import BeautifulSoup
from urllib.request import *
import pprint
import time
url = "https://besmart.kz"
def get_html(url):
req = Request(url)
html = urlopen(req).read()
return html
if __name__ == "__main__":
opener = build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
install_opener(opener)
stock_list = []
for i in range(1,4):
html = get_html(url + "/?page=" + str(i))
soup = BeautifulSoup(html, 'html.parser')
stock_list.extend(soup.find_all('a', class_='deal__discount-kz'))
start = time.time()
for i in stock_list:
html = get_html(url + i['href'])
soup = BeautifulSoup(html, 'html.parser')
stock_data = {}
try:
stock_data["Old price"] = soup.find('span', class_='line-through red').find('span', class_='text-muted greyColor').text.strip()
except:
stock_data["Old price"] = "Отсутствует"
try:
stock_data["Price"] = soup.find('div', id='calc-price', class_='price').text.strip()
except:
stock_data["Price"] = "Ошибка"
try:
stock_data["Title"] = soup.find('div', class_='title').find('h1', itemprop='name').text.strip()
except:
stock_data["Title"] = "Ошибка"
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(stock_data)
print(f"Время выполнения: {time.time() - start}")
答案 0 :(得分:2)
您可以使用n = n;
模块来简化事情。例如:
aiohttp