嗨,我正在尝试抓取医生及其地址的列表,但是我得到的是空列表 我为该过程的必填字段,页面和类设置了定位器
这是我的代码:
import requests
from bs4 import BeautifulSoup
page_content=requests.get('https://easy.co.il/list/Orthopedics/?c2=5405').content
#assign locators
class QuoteLocators:
DOCTOR_LOCATOR='p.biz-list-name'
ADRESS_LOCATOR='p.biz-list-address'
class QuotesPageLocators:
QUOTE = 'div.biz-item '
#assign pages
class QuotesPage:
def __init__(self, page):
self.soup = BeautifulSoup(page,'html.parser')
@property
def quotes(self):
return [QuoteParser(e) for e in self.soup.select(QuotesPageLocators.QUOTE)]
#set parser
class QuoteParser:
def __init__(self, parent):
self.parent = parent
def __repr__(self):
return f'<Doctor {self.name}, at {self.adress}>'
@property
def name(self):
locator = QuoteLocators.DOCTOR_LOCATOR
return self.parent.select_one(locator).string
@property
def adress(self):
locator = QuoteLocators.ADRESS_LOCATOR
return self.parent.select_one(locator).string
page=QuotesPage(page_content)
for quote in page.quotes:
print(quote)
print(page.quotes)
答案 0 :(得分:3)
您在页面上看到的数据是通过JavaScript从外部URL加载的。您可以使用此示例获取医生的姓名,地址:
// Validation before going to next page
wizard.on("beforeNext", function(wizardObj) {
// validate the form and use below function to stop the wizard's step
wizardObj.stop();
});
打印:
import re
import json
import requests
from bs4 import BeautifulSoup
url = 'https://easy.co.il/list/Orthopedics/?c2=5405'
c = re.search(r'var catid="(\d+)"', requests.get(url).text).group(1)
c2 = url.split('=')[-1]
api_url = 'https://easy.co.il/json/list.json?v=1.1&c={}&c2={}&listpage=1&lat=32.059925&lng=34.785126&rad=8905&mapid=0&viewport=desktop'.format(c, c2)
data = requests.get(api_url).json()
# uncomment this to print all data:
# print(json.dumps(data, indent=4))
for l in data['bizlist']['list']:
print(l['bizname'])
print(l['address'])
print('-' * 80)