我尝试使用aiohttp和asyncio进行请求。但是出现错误
'需要asyncio.Future,协程或可等待的对象'
这是我的代码。我该如何解决。
import requests
from bs4 import BeautifulSoup
import asyncio
import aiohttp
res = requests.get('https://www.rottentomatoes.com/top/')
soup = BeautifulSoup(res.text,'lxml')
movie_list=[]
for link in soup.select('section li a[href]'):
movie_list.append('https://www.rottentomatoes.com'+link.get('href'))
async def request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
body = await resp.text(encoding='utf-8')
soup =BeautifulSoup(body,'lxml')
movie = []
async for link in soup.select('tbody tr td a '):
await movie.append(link.get('href'))
return movie
async def main():
results = await asyncio.gather(*[request(url) for url in movie_list])
print(results)
return results
print(movie_list)
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main)
答案 0 :(得分:0)
您需要调用loop.run_until_complete(main())
,而不仅仅是函数main
(不带括号)。接下来的事情是您在async
中不需要soup.select()
关键字。我还更改了选择字符串以解析内容:
import requests
from bs4 import BeautifulSoup
import asyncio
import aiohttp
res = requests.get('https://www.rottentomatoes.com/top/')
soup = BeautifulSoup(res.text,'lxml')
movie_list=[]
for link in soup.select('section li a[href]'):
movie_list.append('https://www.rottentomatoes.com'+link.get('href'))
async def request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
body = await resp.text(encoding='utf-8')
soup = BeautifulSoup(body,'lxml')
movie = []
# no need to call async for here!
for link in soup.select('section#top_movies_main table a'):
movie.append(link['href'])
return movie
async def main():
results = await asyncio.gather(*[request(url) for url in movie_list])
print(results)
return results
print(movie_list)
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main()) # you need to create coroutine
打印:
['https://www.rottentomatoes.com/top/bestofrt/top_100_action__adventure_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_animation_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_art_house__international_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_classics_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_comedy_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_documentary_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_drama_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_horror_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_kids__family_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_musical__performing_arts_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_mystery__suspense_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_romance_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_science_fiction__fantasy_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_special_interest_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_sports__fitness_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_television_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_western_movies/']
[['/m/mad_max_fury_road', '/m/1013775-metropolis', '/m/wonder_woman_2017', '/m/logan_2017', '/m/1011615-king_kong', '/m/zootopia', '/m/1000355-adventures_of_robin_hood', '/m/star_wars_episode_vii_the_force_awakens',
... and so on