我几天前学习了python crawler,然后做了一些练习。但我遇到了一个问题,我不知道如何解决它。
from bs4 import BeautifulSoup
import requests
import time
def crawler(url, data=None):
wb_data = requests.get(url)
time.sleep(2)
soup = BeautifulSoup(wb_data.text, 'lxml')
headers = soup.select('div.pho_info > h4 > em')
addresses = soup.select('div.pho_info > p > span.pr5')
prices = soup.select('#pricePart > div.day_l > span')
images1 = soup.select('#curBigImage')
images2 = soup.select('div.member_pic > a > img')
names = soup.select('div.w_240 > h6 > a')
genders = soup.select('div.member_pic > div')
def crawl_gender(a):
if a == 'member_ico1':
return 'man'
elif a == 'member_ico':
return 'woman'
if data == None:
for header, address, price, image1, image2, name, gender in zip(headers, addresses, prices, images1, images2, names, genders):
data = {
'header': header.get('header'),
'address': address.get('adderss'),
'price': price.get('price'),
'image1': image1.get('image1'),
'image2': images2.get('src'),
'name': name.get('name'),
'gender': crawl_gender(genders)
}
print(data)
crawler('http://bj.xiaozhu.com/fangzi/1115060277.html')
以下是编译器返回的内容:
Traceback (most recent call last):
File "/Users/duanshulan/PycharmProjects/untitled1/Crawler.py", line 36, in <module>
crawler('http://bj.xiaozhu.com/fangzi/1115060277.html')
File "/Users/duanshulan/PycharmProjects/untitled1/Crawler.py", line 30, in crawler
'image2': images2.get('src'),
AttributeError: 'list' object has no attribute 'get'
答案 0 :(得分:0)
这意味着您的images2
变量中有超过1张图片。尝试:
'image2': images2[0].get('src'),
仅访问第一个。