嗨,我能得到一些帮助吗,我得到滴度不是定义错误
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
url = "https://www.centris.ca/Property/GetInscriptions"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101
Firefox/89.0",
}
json_data = {"startPosition": 0}
with requests.session() as s:
Centris = []
# load cookies:
s.get(
"https://www.centris.ca/en/commercial-units~for-rent?uc=0",
headers=headers,
)
for page in range(0, 100, 20): # <-- increase number of pages here
json_data["startPosition"] = page
data = s.post(url, headers=headers, json=json_data).json()
soup = BeautifulSoup(data["d"]["Result"]["html"], "html.parser")
for a in soup.find_all('div', attrs={'id':'divMainResult'}):
titre = a.find('span', attrs={'data-id': 'PageTitle'}).get_text(strip=True)
emplacement = a.find_all('h2', attrs={'class': 'pt-1'}).get_text(strip=True, separator="\n")
lien = "https://www.centris.ca" + a["href"]
prix = a.find_all('span', attrs={'class':'text-nowrap'}).get_text(strip=True)
superficie = a.find('div', attrs={'class': 'carac-value'}, string=re.compile('sqft')).get_text(strip=True)
Centris.append((titre, emplacement, lien, prix))
df = pd.DataFrame(Centris, columns={'Titre':titre, 'Emplacement':emplacement, 'Lien':lien,
'Prix':prix, 'Superficie': superficie })
writer = pd.ExcelWriter('Centris4.xlsx')
df.to_excel(writer)
writer.save()
print( 'Data Saved To excel' )
当我运行这段代码时,我得到了 name titre not defined 我找不到错误在哪里。 任何帮助将不胜感激,此代码是否能够抓取多个页面?