我正在尝试废弃一个页面并获取标签中的hrefs数据,但我无法获得结果。这是我学校的任务。 请问有人帮我解决这个问题吗?
这是我的代码:
from bs4 import BeautifulSoup
import re
import requests
for i in range (1,5):
base_url = 'https://www.leboncoin.fr/locations'
url = 'https://www.leboncoin.fr/locations/1152669519.htm?ca=22_s'
res = requests.get(url)
soup = BeautifulSoup(res.text)
locations = []
links = soup.find_all(['li','a'], href=re.compile('.*\/locations\/+(i)+'))
for l in links:
full_link = base_url + l['href']
titre = l[['li', 'section', 'h2']].strip()
res = requests.get(full_link)
soup = BeautifulSoup(res.text)
loyer = soup.find(['h2','span'], attrs={"class": "value\^((?![A-Z]).)*$"})
loyers = loyer.text
ville = soup.find(['h2','span'], attrs={"class": "value", "itemprop": "adresse"})
villes = ville.text
surface = soup.find(['h2','span'], attrs={"class": "clearfix", "calss": "value"})
surfaces = surface.text
description = soup.find('p', attrs={"class": "value", "itemprop": "description"})
descriptions = description.text
shops.append(titre, loyers, villes, surfaces, descriptions)
print(locations)
我得到了这样的结果:
[]
[]
[]
[]
提前感谢您的回答。
答案 0 :(得分:0)
你还没有在任何地方定义商店,我假设你想要一本字典。所以为此:
for l in links:
shops = {}
full_link = base_url + l['href']
shops['titre'] = l[['li', 'section', 'h2']].strip()
res = requests.get(full_link)
soup = BeautifulSoup(res.text)
loyer = soup.find(['h2','span'], attrs={"class": "value\^((?![A-Z]).)*$"})
shops['loyers'] = loyer.text
ville = soup.find(['h2','span'], attrs={"class": "value", "itemprop": "adresse"})
shops['villes'] = ville.text
surface = soup.find(['h2','span'], attrs={"class": "clearfix", "calss": "value"})
shops['surfaces'] = surface.text
description = soup.find('p', attrs={"class": "value", "itemprop": "description"})
shops['descriptions'] = description.text
locations.append(shops)
print(locations)