试图从“查看电话号码”弹出窗口中提取数据,但出现错误
from bs4 import BeautifulSoup
import requests
for count in range(1,2):
r = requests.get('https://www.docmed360.com/mumbai/general-physician?page='+str(count),headers={'User-Agent':'Googleboat'})
soup = BeautifulSoup(r.text,'lxml')
for data in soup.find_all('div',class_='listing-item text-left'):
link = data.find('div',class_='col-md-12 col-xs-12 col-sm-12 title')
link1 = link.find('a')
links ="https://www.docmed360.com"+link1['href']
r1 = requests.get(links)
soup1 = BeautifulSoup(r1.text,'lxml')
data1 = soup1.find('div',class_='container-fluid tab-pane')
data2 = data1.find('table',class_='table details-table')
data3 = data2.find_all('tr')
for tr1 in data3[0:1]:
print(tr1.text)
for tr2 in data3[1:2]:
print(tr2.text)
for tr3 in data3[2:3]:
print(tr3.text)
在上面的代码中,将“ tr3”用作“查看电话号码”,该电话会重新作为弹出窗口打开,尝试从该弹出窗口中提取。
答案 0 :(得分:1)
您可以从脚本标签中提取
import requests, json
from bs4 import BeautifulSoup as bs
with requests.Session() as s:
for count in range(1,3):
r = s.get(f'https://www.docmed360.com/mumbai/general-physician?page={count}')
soup = bs(r.content)
data = [json.loads(i.text) for i in soup.select('[type="application/ld+json"]')]
for person in data:
keys = person.keys()
if 'telephone' in keys:
print(person['name'],person['telephone'])