我正在尝试使用 grequests 和 beautifulsoup 抓取多个页面。我能够抓取单个页面,但是当我将其更改为迭代多个页面时,我收到了标题中列出的上述错误。 代码:
from bs4 import BeautifulSoup
import pandas as pd
_city = input('Enter the name of the City and State, example format(miami-fl): ')
headers = {'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'}
def get_urls():
urls = []
for x in range(1,2):
urls.append(f'https://www.apartments.com/miami-fl/{x}/')
return urls
def get_data(urls):
reqs = [grequests.get(link) for link in urls]
resp = grequests.map(reqs)
return resp
def parse(resp):
apartments = []
for r in resp:
soup = BeautifulSoup(r.text, 'lxml')
results = soup.find_all('li', {'class': 'mortar-wrapper'})
for item in results:
apartment = {
'Property_name': item.find('span', {'class': 'js-placardTitle title'}).text,
'Unit_name': item.find(''),
'Formatted_address': item.find('div', {'class': 'property-address js-url'}).text,
'City&State': _city,
'Bedrooms': item.find('div', {'class': 'bed-range'}).text,
'Price_Range': item.find('div', {'class': 'price-range'}).text,
'Availability': item.find('div', {'class': 'availability'}).text,
'Property_Amenities': item.find('div', {'class': 'property-amenities'}).text.strip(),
'Phone_Number': item.find('a', {'class': 'phone-link js-phone'}).attrs['href'],
}
apartments.append(apartment)
print(apartments)
return apartments
#def output(apartments):
aptdf = pd.DataFrame(apartments)
aptdf.to_csv('apts.csv', index=False)
print('Saved to CSV')
return
if __name__ == '__main__':
urls = get_urls()
resp = get_data(urls)
df = pd.DataFrame(parse(resp))
df.to_csv('apts.csv', index=False)
#output(apartments)```
edited code to correct format but still wont run or debug